1NUMA(3) Linux Programmer's Manual NUMA(3)
2
3
4
6 numa - NUMA policy library
7
9 #include <numa.h>
10
11 cc ... -lnuma
12
13 int numa_available(void);
14
15 int numa_max_possible_node(void);
16 int numa_num_possible_nodes();
17
18 int numa_max_node(void);
19 int numa_num_configured_nodes();
20 struct bitmask *numa_get_mems_allowed(void);
21
22 int numa_num_configured_cpus(void);
23 struct bitmask *numa_all_nodes_ptr;
24 struct bitmask *numa_no_nodes_ptr;
25 struct bitmask *numa_all_cpus_ptr;
26
27 int numa_num_task_cpus();
28 int numa_num_task_nodes();
29
30 int numa_parse_bitmap(char *line , struct bitmask *mask);
31 struct bitmask *numa_parse_nodestring(const char *string);
32 struct bitmask *numa_parse_nodestring_all(const char *string);
33 struct bitmask *numa_parse_cpustring(const char *string);
34 struct bitmask *numa_parse_cpustring_all(const char *string);
35
36 long numa_node_size(int node, long *freep);
37 long long numa_node_size64(int node, long long *freep);
38
39 int numa_preferred(void);
40 void numa_set_preferred(int node);
41 int numa_get_interleave_node(void);
42 struct bitmask *numa_get_interleave_mask(void);
43 void numa_set_interleave_mask(struct bitmask *nodemask);
44 void numa_interleave_memory(void *start, size_t size, struct bitmask
45 *nodemask);
46 void numa_bind(struct bitmask *nodemask);
47 void numa_set_localalloc(void);
48 void numa_set_membind(struct bitmask *nodemask);
49 struct bitmask *numa_get_membind(void);
50
51 void *numa_alloc_onnode(size_t size, int node);
52 void *numa_alloc_local(size_t size);
53 void *numa_alloc_interleaved(size_t size);
54 void *numa_alloc_interleaved_subset(size_t size, struct bitmask *node‐
55 mask); void *numa_alloc(size_t size);
56 void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
57 void numa_free(void *start, size_t size);
58
59 int numa_run_on_node(int node);
60 int numa_run_on_node_mask(struct bitmask *nodemask);
61 int numa_run_on_node_mask_all(struct bitmask *nodemask);
62 struct bitmask *numa_get_run_node_mask(void);
63
64 void numa_tonode_memory(void *start, size_t size, int node);
65 void numa_tonodemask_memory(void *start, size_t size, struct bitmask
66 *nodemask);
67 void numa_setlocal_memory(void *start, size_t size);
68 void numa_police_memory(void *start, size_t size);
69 void numa_set_bind_policy(int strict);
70 void numa_set_strict(int strict);
71
72 int numa_distance(int node1, int node2);
73
74 int numa_sched_getaffinity(pid_t pid, struct bitmask *mask);
75 int numa_sched_setaffinity(pid_t pid, struct bitmask *mask);
76 int numa_node_to_cpus(int node, struct bitmask *mask);
77 int numa_node_of_cpu(int cpu);
78
79 struct bitmask *numa_allocate_cpumask();
80
81 void numa_free_cpumask();
82 struct bitmask *numa_allocate_nodemask();
83
84 void numa_free_nodemask();
85 struct bitmask *numa_bitmask_alloc(unsigned int n);
86 struct bitmask *numa_bitmask_clearall(struct bitmask *bmp);
87 struct bitmask *numa_bitmask_clearbit(struct bitmask *bmp, unsigned int
88 n);
89 int numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask
90 *bmp2);
91 void numa_bitmask_free(struct bitmask *bmp);
92 int numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
93 unsigned int numa_bitmask_nbytes(struct bitmask *bmp);
94 struct bitmask *numa_bitmask_setall(struct bitmask *bmp);
95 struct bitmask *numa_bitmask_setbit(struct bitmask *bmp, unsigned int
96 n);
97 void copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *node‐
98 mask)
99 void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmask
100 *bmp)
101 void copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask
102 *bmpto)
103 unsigned int numa_bitmask_weight(const struct bitmask *bmp )
104
105 int numa_move_pages(int pid, unsigned long count, void **pages, const
106 int *nodes, int *status, int flags);
107 int numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bit‐
108 mask *tonodes);
109
110 void numa_error(char *where);
111
112 extern int numa_exit_on_error;
113 extern int numa_exit_on_warn;
114 void numa_warn(int number, char *where, ...);
115
116
118 The libnuma library offers a simple programming interface to the NUMA
119 (Non Uniform Memory Access) policy supported by the Linux kernel. On a
120 NUMA architecture some memory areas have different latency or bandwidth
121 than others.
122
123 Available policies are page interleaving (i.e., allocate in a round-
124 robin fashion from all, or a subset, of the nodes on the system), pre‐
125 ferred node allocation (i.e., preferably allocate on a particular
126 node), local allocation (i.e., allocate on the node on which the task
127 is currently executing), or allocation only on specific nodes (i.e.,
128 allocate on some subset of the available nodes). It is also possible
129 to bind tasks to specific nodes.
130
131 Numa memory allocation policy may be specified as a per-task attribute,
132 that is inherited by children tasks and processes, or as an attribute
133 of a range of process virtual address space. Numa memory policies
134 specified for a range of virtual address space are shared by all tasks
135 in the process. Further more, memory policies specified for a range of
136 a shared memory attached using shmat(2) or mmap(2) from shmfs/hugetlbfs
137 are shared by all processes that attach to that region. Memory poli‐
138 cies for shared disk backed file mappings are currently ignored.
139
140 The default memory allocation policy for tasks and all memory range is
141 local allocation. This assumes that no ancestor has installed a non-
142 default policy.
143
144 For setting a specific policy globally for all memory allocations in a
145 process and its children it is easiest to start it with the numactl(8)
146 utility. For more finegrained policy inside an application this library
147 can be used.
148
149 All numa memory allocation policy only takes effect when a page is
150 actually faulted into the address space of a process by accessing it.
151 The numa_alloc_* functions take care of this automatically.
152
153 A node is defined as an area where all memory has the same speed as
154 seen from a particular CPU. A node can contain multiple CPUs. Caches
155 are ignored for this definition.
156
157 Most functions in this library are only concerned about numa nodes and
158 their memory. The exceptions to this are: numa_node_to_cpus(),
159 numa_node_of_cpu(), numa_bind(), numa_run_on_node(),
160 numa_run_on_node_mask(), numa_run_on_node_mask_all(), and
161 numa_get_run_node_mask(). These functions deal with the CPUs associ‐
162 ated with numa nodes. See the descriptions below for more information.
163
164 Some of these functions accept or return a pointer to struct bitmask.
165 A struct bitmask controls a bit map of arbitrary length containing a
166 bit representation of nodes. The predefined variable
167 numa_all_nodes_ptr points to a bit mask that has all available nodes
168 set; numa_no_nodes_ptr points to the empty set.
169
170 Before any other calls in this library can be used numa_available()
171 must be called. If it returns -1, all other functions in this library
172 are undefined.
173
174 numa_max_possible_node() returns the number of the highest possible
175 node in a system. In other words, the size of a kernel type nodemask_t
176 (in bits) minus 1. This number can be gotten by calling numa_num_pos‐
177 sible_nodes() and subtracting 1.
178
179 numa_num_possible_nodes() returns the size of kernel's node mask (ker‐
180 nel type nodemask_t). In other words, large enough to represent the
181 maximum number of nodes that the kernel can handle. This will match the
182 kernel's MAX_NUMNODES value. This count is derived from
183 /proc/self/status, field Mems_allowed.
184
185 numa_max_node() returns the highest node number available on the cur‐
186 rent system. (See the node numbers in /sys/devices/system/node/ ).
187 Also see numa_num_configured_nodes().
188
189 numa_num_configured_nodes() returns the number of memory nodes in the
190 system. This count includes any nodes that are currently disabled. This
191 count is derived from the node numbers in /sys/devices/system/node.
192 (Depends on the kernel being configured with /sys (CONFIG_SYSFS)).
193
194 numa_get_mems_allowed() returns the mask of nodes from which the
195 process is allowed to allocate memory in it's current cpuset context.
196 Any nodes that are not included in the returned bitmask will be ignored
197 in any of the following libnuma memory policy calls.
198
199 numa_num_configured_cpus() returns the number of cpus in the system.
200 This count includes any cpus that are currently disabled. This count is
201 derived from the cpu numbers in /sys/devices/system/cpu. If the kernel
202 is configured without /sys (CONFIG_SYSFS=n) then it falls back to using
203 the number of online cpus.
204
205 numa_all_nodes_ptr points to a bitmask that is allocated by the library
206 with bits representing all nodes on which the calling task may allocate
207 memory. This set may be up to all nodes on the system, or up to the
208 nodes in the current cpuset. The bitmask is allocated by a call to
209 numa_allocate_nodemask() using size numa_max_possible_node(). The set
210 of nodes to record is derived from /proc/self/status, field
211 "Mems_allowed". The user should not alter this bitmask.
212
213 numa_no_nodes_ptr points to a bitmask that is allocated by the library
214 and left all zeroes. The bitmask is allocated by a call to numa_allo‐
215 cate_nodemask() using size numa_max_possible_node(). The user should
216 not alter this bitmask.
217
218 numa_all_cpus_ptr points to a bitmask that is allocated by the library
219 with bits representing all cpus on which the calling task may execute.
220 This set may be up to all cpus on the system, or up to the cpus in the
221 current cpuset. The bitmask is allocated by a call to numa_allo‐
222 cate_cpumask() using size numa_num_possible_cpus(). The set of cpus to
223 record is derived from /proc/self/status, field "Cpus_allowed". The
224 user should not alter this bitmask.
225
226 numa_num_task_cpus() returns the number of cpus that the calling task
227 is allowed to use. This count is derived from the map /proc/self/sta‐
228 tus, field "Cpus_allowed". Also see the bitmask numa_all_cpus_ptr.
229
230 numa_num_task_nodes() returns the number of nodes on which the calling
231 task is allowed to allocate memory. This count is derived from the map
232 /proc/self/status, field "Mems_allowed". Also see the bitmask
233 numa_all_nodes_ptr.
234
235 numa_parse_bitmap() parses line , which is a character string such as
236 found in /sys/devices/system/node/nodeN/cpumap into a bitmask struc‐
237 ture. The string contains the hexadecimal representation of a bit map.
238 The bitmask may be allocated with numa_allocate_cpumask(). Returns 0
239 on success. Returns -1 on failure. This function is probably of lit‐
240 tle use to a user application, but it is used by libnuma internally.
241
242 numa_parse_nodestring() parses a character string list of nodes into a
243 bit mask. The bit mask is allocated by numa_allocate_nodemask(). The
244 string is a comma-separated list of node numbers or node ranges. A
245 leading ! can be used to indicate "not" this list (in other words, all
246 nodes except this list), and a leading + can be used to indicate that
247 the node numbers in the list are relative to the task's cpuset. The
248 string can be "all" to specify all ( numa_num_task_nodes() ) nodes.
249 Node numbers are limited by the number in the system. See
250 numa_max_node() and numa_num_configured_nodes().
251 Examples: 1-5,7,10 !4-5 +0-3
252 If the string is of 0 length, bitmask numa_no_nodes_ptr is returned.
253 Returns 0 if the string is invalid.
254
255 numa_parse_nodestring_all() is similar to numa_parse_nodestring , but
256 can parse all possible nodes, not only current nodeset.
257
258 numa_parse_cpustring() parses a character string list of cpus into a
259 bit mask. The bit mask is allocated by numa_allocate_cpumask(). The
260 string is a comma-separated list of cpu numbers or cpu ranges. A lead‐
261 ing ! can be used to indicate "not" this list (in other words, all cpus
262 except this list), and a leading + can be used to indicate that the cpu
263 numbers in the list are relative to the task's cpuset. The string can
264 be "all" to specify all ( numa_num_task_cpus() ) cpus. Cpu numbers are
265 limited by the number in the system. See numa_num_task_cpus() and
266 numa_num_configured_cpus().
267 Examples: 1-5,7,10 !4-5 +0-3
268 Returns 0 if the string is invalid.
269
270 numa_parse_cpustring_all() is similar to numa_parse_cpustring , but can
271 parse all possible cpus, not only current cpuset.
272
273 numa_node_size() returns the memory size of a node. If the argument
274 freep is not NULL, it used to return the amount of free memory on the
275 node. On error it returns -1.
276
277 numa_node_size64() works the same as numa_node_size() except that it
278 returns values as long long instead of long. This is useful on 32-bit
279 architectures with large nodes.
280
281 numa_preferred() returns the preferred node of the current task. This
282 is the node on which the kernel preferably allocates memory, unless
283 some other policy overrides this.
284
285 numa_set_preferred() sets the preferred node for the current task to
286 node. The system will attempt to allocate memory from the preferred
287 node, but will fall back to other nodes if no memory is available on
288 the the preferred node. Passing a node of -1 argument specifies local
289 allocation and is equivalent to calling numa_set_localalloc().
290
291 numa_get_interleave_mask() returns the current interleave mask if the
292 task's memory allocation policy is page interleaved. Otherwise, this
293 function returns an empty mask.
294
295 numa_set_interleave_mask() sets the memory interleave mask for the cur‐
296 rent task to nodemask. All new memory allocations are page interleaved
297 over all nodes in the interleave mask. Interleaving can be turned off
298 again by passing an empty mask (numa_no_nodes). The page interleaving
299 only occurs on the actual page fault that puts a new page into the cur‐
300 rent address space. It is also only a hint: the kernel will fall back
301 to other nodes if no memory is available on the interleave target.
302
303 numa_interleave_memory() interleaves size bytes of memory page by page
304 from start on nodes specified in nodemask. The size argument will be
305 rounded up to a multiple of the system page size. If nodemask contains
306 nodes that are externally denied to this process, this call will fail.
307 This is a lower level function to interleave allocated but not yet
308 faulted in memory. Not yet faulted in means the memory is allocated
309 using mmap(2) or shmat(2), but has not been accessed by the current
310 process yet. The memory is page interleaved to all nodes specified in
311 nodemask. Normally numa_alloc_interleaved() should be used for private
312 memory instead, but this function is useful to handle shared memory
313 areas. To be useful the memory area should be several megabytes at
314 least (or tens of megabytes of hugetlbfs mappings) If the
315 numa_set_strict() flag is true then the operation will cause a
316 numa_error if there were already pages in the mapping that do not fol‐
317 low the policy.
318
319 numa_bind() binds the current task and its children to the nodes speci‐
320 fied in nodemask. They will only run on the CPUs of the specified
321 nodes and only be able to allocate memory from them. This function is
322 equivalent to calling numa_run_on_node_mask(nodemask) followed by
323 numa_set_membind(nodemask). If tasks should be bound to individual
324 CPUs inside nodes consider using numa_node_to_cpus and the
325 sched_setaffinity(2) syscall.
326
327 numa_set_localalloc() sets the memory allocation policy for the calling
328 task to local allocation. In this mode, the preferred node for memory
329 allocation is effectively the node where the task is executing at the
330 time of a page allocation.
331
332 numa_set_membind() sets the memory allocation mask. The task will only
333 allocate memory from the nodes set in nodemask. Passing an empty node‐
334 mask or a nodemask that contains nodes other than those in the mask
335 returned by numa_get_mems_allowed() will result in an error.
336
337 numa_get_membind() returns the mask of nodes from which memory can cur‐
338 rently be allocated. If the returned mask is equal to numa_all_nodes,
339 then memory allocation is allowed from all nodes.
340
341 numa_alloc_onnode() allocates memory on a specific node. The size
342 argument will be rounded up to a multiple of the system page size. if
343 the specified node is externally denied to this process, this call will
344 fail. This function is relatively slow compared to the malloc(3), fam‐
345 ily of functions. The memory must be freed with numa_free(). On
346 errors NULL is returned.
347
348 numa_alloc_local() allocates size bytes of memory on the local node.
349 The size argument will be rounded up to a multiple of the system page
350 size. This function is relatively slow compared to the malloc(3) fam‐
351 ily of functions. The memory must be freed with numa_free(). On
352 errors NULL is returned.
353
354 numa_alloc_interleaved() allocates size bytes of memory page inter‐
355 leaved on all nodes. This function is relatively slow and should only
356 be used for large areas consisting of multiple pages. The interleaving
357 works at page level and will only show an effect when the area is
358 large. The allocated memory must be freed with numa_free(). On error,
359 NULL is returned.
360
361 numa_alloc_interleaved_subset() attempts to allocate size bytes of mem‐
362 ory page interleaved on all nodes. The size argument will be rounded
363 up to a multiple of the system page size. The nodes on which a process
364 is allowed to allocate memory may be constrained externally. If this
365 is the case, this function may fail. This function is relatively slow
366 compare to malloc(3), family of functions and should only be used for
367 large areas consisting of multiple pages. The interleaving works at
368 page level and will only show an effect when the area is large. The
369 allocated memory must be freed with numa_free(). On error, NULL is
370 returned.
371
372 numa_alloc() allocates size bytes of memory with the current NUMA pol‐
373 icy. The size argument will be rounded up to a multiple of the system
374 page size. This function is relatively slow compare to the malloc(3)
375 family of functions. The memory must be freed with numa_free(). On
376 errors NULL is returned.
377
378 numa_realloc() changes the size of the memory area pointed to by
379 old_addr from old_size to new_size. The memory area pointed to by
380 old_addr must have been allocated with one of the numa_alloc* func‐
381 tions. The new_size will be rounded up to a multiple of the system
382 page size. The contents of the memory area will be unchanged to the
383 minimum of the old and new sizes; newly allocated memory will be unini‐
384 tialized. The memory policy (and node bindings) associated with the
385 original memory area will be preserved in the resized area. For exam‐
386 ple, if the initial area was allocated with a call to numa_alloc_onn‐
387 ode(), then the new pages (if the area is enlarged) will be allocated
388 on the same node. However, if no memory policy was set for the origi‐
389 nal area, then numa_realloc() cannot guarantee that the new pages will
390 be allocated on the same node. On success, the address of the resized
391 area is returned (which might be different from that of the initial
392 area), otherwise NULL is returned and errno is set to indicate the
393 error. The pointer returned by numa_realloc() is suitable for passing
394 to numa_free().
395
396
397 numa_free() frees size bytes of memory starting at start, allocated by
398 the numa_alloc_* functions above. The size argument will be rounded up
399 to a multiple of the system page size.
400
401 numa_run_on_node() runs the current task and its children on a specific
402 node. They will not migrate to CPUs of other nodes until the node
403 affinity is reset with a new call to numa_run_on_node_mask(). Passing
404 -1 permits the kernel to schedule on all nodes again. On success, 0 is
405 returned; on error -1 is returned, and errno is set to indicate the
406 error.
407
408 numa_run_on_node_mask() runs the current task and its children only on
409 nodes specified in nodemask. They will not migrate to CPUs of other
410 nodes until the node affinity is reset with a new call to
411 numa_run_on_node_mask() or numa_run_on_node(). Passing numa_all_nodes
412 permits the kernel to schedule on all nodes again. On success, 0 is
413 returned; on error -1 is returned, and errno is set to indicate the
414 error.
415
416 numa_run_on_node_mask_all() runs the current task and its children only
417 on nodes specified in nodemask like numa_run_on_node_mask but without
418 any cpuset awareness.
419
420 numa_get_run_node_mask() returns a mask of CPUs on which the current
421 task is allowed to run.
422
423 numa_tonode_memory() put memory on a specific node. The constraints
424 described for numa_interleave_memory() apply here too.
425
426 numa_tonodemask_memory() put memory on a specific set of nodes. The
427 constraints described for numa_interleave_memory() apply here too.
428
429 numa_setlocal_memory() locates memory on the current node. The con‐
430 straints described for numa_interleave_memory() apply here too.
431
432 numa_police_memory() locates memory with the current NUMA policy. The
433 constraints described for numa_interleave_memory() apply here too.
434
435 numa_distance() reports the distance in the machine topology between
436 two nodes. The factors are a multiple of 10. It returns 0 when the
437 distance cannot be determined. A node has distance 10 to itself.
438 Reporting the distance requires a Linux kernel version of 2.6.10 or
439 newer.
440
441 numa_set_bind_policy() specifies whether calls that bind memory to a
442 specific node should use the preferred policy or a strict policy. The
443 preferred policy allows the kernel to allocate memory on other nodes
444 when there isn't enough free on the target node. strict will fail the
445 allocation in that case. Setting the argument to specifies strict, 0
446 preferred. Note that specifying more than one node non strict may only
447 use the first node in some kernel versions.
448
449 numa_set_strict() sets a flag that says whether the functions allocat‐
450 ing on specific nodes should use use a strict policy. Strict means the
451 allocation will fail if the memory cannot be allocated on the target
452 node. Default operation is to fall back to other nodes. This doesn't
453 apply to interleave and default.
454
455 numa_get_interleave_node() is used by libnuma internally. It is proba‐
456 bly not useful for user applications. It uses the MPOL_F_NODE flag of
457 the get_mempolicy system call, which is not intended for application
458 use (its operation may change or be removed altogether in future kernel
459 versions). See get_mempolicy(2).
460
461 numa_pagesize() returns the number of bytes in page. This function is
462 simply a fast alternative to repeated calls to the getpagesize system
463 call. See getpagesize(2).
464
465 numa_sched_getaffinity() retrieves a bitmask of the cpus on which a
466 task may run. The task is specified by pid. Returns the return value
467 of the sched_getaffinity system call. See sched_getaffinity(2). The
468 bitmask must be at least the size of the kernel's cpu mask structure.
469 Use numa_allocate_cpumask() to allocate it. Test the bits in the mask
470 by calling numa_bitmask_isbitset().
471
472 numa_sched_setaffinity() sets a task's allowed cpu's to those cpu's
473 specified in mask. The task is specified by pid. Returns the return
474 value of the sched_setaffinity system call. See sched_setaffinity(2).
475 You may allocate the bitmask with numa_allocate_cpumask(). Or the bit‐
476 mask may be smaller than the kernel's cpu mask structure. For example,
477 call numa_bitmask_alloc() using a maximum number of cpus from
478 numa_num_configured_cpus(). Set the bits in the mask by calling
479 numa_bitmask_setbit().
480
481 numa_node_to_cpus() converts a node number to a bitmask of CPUs. The
482 user must pass a bitmask structure with a mask buffer long enough to
483 represent all possible cpu's. Use numa_allocate_cpumask() to create
484 it. If the bitmask is not long enough errno will be set to ERANGE and
485 -1 returned. On success 0 is returned.
486
487 numa_node_of_cpu() returns the node that a cpu belongs to. If the user
488 supplies an invalid cpu errno will be set to EINVAL and -1 will be
489 returned.
490
491 numa_allocate_cpumask () returns a bitmask of a size equal to the ker‐
492 nel's cpu mask (kernel type cpumask_t). In other words, large enough
493 to represent NR_CPUS cpus. This number of cpus can be gotten by call‐
494 ing numa_num_possible_cpus(). The bitmask is zero-filled.
495
496 numa_free_cpumask frees a cpumask previously allocate by numa_allo‐
497 cate_cpumask.
498
499 numa_allocate_nodemask() returns a bitmask of a size equal to the ker‐
500 nel's node mask (kernel type nodemask_t). In other words, large enough
501 to represent MAX_NUMNODES nodes. This number of nodes can be gotten by
502 calling numa_num_possible_nodes(). The bitmask is zero-filled.
503
504 numa_free_nodemask() frees a nodemask previous allocated by numa_allo‐
505 cate_nodemask().
506
507 numa_bitmask_alloc() allocates a bitmask structure and its associated
508 bit mask. The memory allocated for the bit mask contains enough words
509 (type unsigned long) to contain n bits. The bit mask is zero-filled.
510 The bitmask structure points to the bit mask and contains the n value.
511
512 numa_bitmask_clearall() sets all bits in the bit mask to 0. The bit‐
513 mask structure points to the bit mask and contains its size ( bmp
514 ->size). The value of bmp is always returned. Note that numa_bit‐
515 mask_alloc() creates a zero-filled bit mask.
516
517 numa_bitmask_clearbit() sets a specified bit in a bit mask to 0. Noth‐
518 ing is done if the n value is greater than the size of the bitmask (and
519 no error is returned). The value of bmp is always returned.
520
521 numa_bitmask_equal() returns 1 if two bitmasks are equal. It returns 0
522 if they are not equal. If the bitmask structures control bit masks of
523 different sizes, the "missing" trailing bits of the smaller bit mask
524 are considered to be 0.
525
526 numa_bitmask_free() deallocates the memory of both the bitmask struc‐
527 ture pointed to by bmp and the bit mask. It is an error to attempt to
528 free this bitmask twice.
529
530 numa_bitmask_isbitset() returns the value of a specified bit in a bit
531 mask. If the n value is greater than the size of the bit map, 0 is
532 returned.
533
534 numa_bitmask_nbytes() returns the size (in bytes) of the bit mask con‐
535 trolled by bmp. The bit masks are always full words (type unsigned
536 long), and the returned size is the actual size of all those words.
537
538 numa_bitmask_setall() sets all bits in the bit mask to 1. The bitmask
539 structure points to the bit mask and contains its size ( bmp ->size).
540 The value of bmp is always returned.
541
542 numa_bitmask_setbit() sets a specified bit in a bit mask to 1. Nothing
543 is done if n is greater than the size of the bitmask (and no error is
544 returned). The value of bmp is always returned.
545
546 copy_bitmask_to_nodemask() copies the body (the bit map itself) of the
547 bitmask structure pointed to by bmp to the nodemask_t structure pointed
548 to by the nodemask pointer. If the two areas differ in size, the copy
549 is truncated to the size of the receiving field or zero-filled.
550
551 copy_nodemask_to_bitmask() copies the nodemask_t structure pointed to
552 by the nodemask pointer to the body (the bit map itself) of the bitmask
553 structure pointed to by the bmp pointer. If the two areas differ in
554 size, the copy is truncated to the size of the receiving field or zero-
555 filled.
556
557 copy_bitmask_to_bitmask() copies the body (the bit map itself) of the
558 bitmask structure pointed to by the bmpfrom pointer to the body of the
559 bitmask structure pointed to by the bmpto pointer. If the two areas
560 differ in size, the copy is truncated to the size of the receiving
561 field or zero-filled.
562
563 numa_bitmask_weight() returns a count of the bits that are set in the
564 body of the bitmask pointed to by the bmp argument.
565
566 numa_move_pages() moves a list of pages in the address space of the
567 currently executing or current process. It simply uses the move_pages
568 system call.
569 pid - ID of task. If not valid, use the current task.
570 count - Number of pages.
571 pages - List of pages to move.
572 nodes - List of nodes to which pages can be moved.
573 status - Field to which status is to be returned.
574 flags - MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
575 See move_pages(2).
576
577 numa_migrate_pages() simply uses the migrate_pages system call to cause
578 the pages of the calling task, or a specified task, to be migated from
579 one set of nodes to another. See migrate_pages(2). The bit masks rep‐
580 resenting the nodes should be allocated with numa_allocate_nodemask() ,
581 or with numa_bitmask_alloc() using an n value returned from
582 numa_num_possible_nodes(). A task's current node set can be gotten by
583 calling numa_get_membind(). Bits in the tonodes mask can be set by
584 calls to numa_bitmask_setbit().
585
586 numa_error() is a libnuma internal function that can be overridden by
587 the user program. This function is called with a char * argument when
588 a libnuma function fails. Overriding the library internal definition
589 makes it possible to specify a different error handling strategy when a
590 libnuma function fails. It does not affect numa_available(). The
591 numa_error() function defined in libnuma prints an error on stderr and
592 terminates the program if numa_exit_on_error is set to a non-zero
593 value. The default value of numa_exit_on_error is zero.
594
595 numa_warn() is a libnuma internal function that can be also overridden
596 by the user program. It is called to warn the user when a libnuma
597 function encounters a non-fatal error. The default implementation
598 prints a warning to stderr. The first argument is a unique number
599 identifying each warning. After that there is a printf(3)-style format
600 string and a variable number of arguments. numa_warn exits the program
601 when numa_exit_on_warn is set to a non-zero value. The default value
602 of numa_exit_on_warn is zero.
603
604
606 Binaries that were compiled for libnuma version 1 need not be re-com‐
607 piled to run with libnuma version 2.
608 Source codes written for libnuma version 1 may be re-compiled without
609 change with version 2 installed. To do so, in the code's Makefile add
610 this option to CFLAGS: -DNUMA_VERSION1_COMPATIBILITY
611
612
614 numa_set_bind_policy and numa_exit_on_error are process global. The
615 other calls are thread safe.
616
617
619 Copyright 2002, 2004, 2007, 2008 Andi Kleen, SuSE Labs. libnuma is
620 under the GNU Lesser General Public License, v2.1.
621
622
624 get_mempolicy(2), set_mempolicy(2), getpagesize(2), mbind(2), mmap(2),
625 shmat(2), numactl(8), sched_getaffinity(2) sched_setaffinity(2)
626 move_pages(2) migrate_pages(2)
627
628
629
630SuSE Labs December 2007 NUMA(3)