1NUMA(3) Linux Programmer's Manual NUMA(3)
2
3
4
6 numa - NUMA policy library
7
9 #include <numa.h>
10
11 cc ... -lnuma
12
13 int numa_available(void);
14
15 int numa_max_possible_node(void);
16 int numa_num_possible_nodes();
17
18 int numa_max_node(void);
19 int numa_num_configured_nodes();
20 struct bitmask *numa_get_mems_allowed(void);
21
22 int numa_num_configured_cpus(void);
23 struct bitmask *numa_all_nodes_ptr;
24 struct bitmask *numa_no_nodes_ptr;
25 struct bitmask *numa_all_cpus_ptr;
26
27 int numa_num_task_cpus();
28 int numa_num_task_nodes();
29
30 int numa_parse_bitmap(char *line , struct bitmask *mask);
31 struct bitmask *numa_parse_nodestring(const char *string);
32 struct bitmask *numa_parse_nodestring_all(const char *string);
33 struct bitmask *numa_parse_cpustring(const char *string);
34 struct bitmask *numa_parse_cpustring_all(const char *string);
35
36 long numa_node_size(int node, long *freep);
37 long long numa_node_size64(int node, long long *freep);
38
39 int numa_preferred(void);
40 void numa_set_preferred(int node);
41 int numa_get_interleave_node(void);
42 struct bitmask *numa_get_interleave_mask(void);
43 void numa_set_interleave_mask(struct bitmask *nodemask);
44 void numa_interleave_memory(void *start, size_t size, struct bitmask
45 *nodemask);
46 void numa_bind(struct bitmask *nodemask);
47 void numa_set_localalloc(void);
48 void numa_set_membind(struct bitmask *nodemask);
49 struct bitmask *numa_get_membind(void);
50
51 void *numa_alloc_onnode(size_t size, int node);
52 void *numa_alloc_local(size_t size);
53 void *numa_alloc_interleaved(size_t size);
54 void *numa_alloc_interleaved_subset(size_t size, struct bitmask *node‐
55 mask); void *numa_alloc(size_t size);
56 void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
57 void numa_free(void *start, size_t size);
58
59 int numa_run_on_node(int node);
60 int numa_run_on_node_mask(struct bitmask *nodemask);
61 int numa_run_on_node_mask_all(struct bitmask *nodemask);
62 struct bitmask *numa_get_run_node_mask(void);
63
64 void numa_tonode_memory(void *start, size_t size, int node);
65 void numa_tonodemask_memory(void *start, size_t size, struct bitmask
66 *nodemask);
67 void numa_setlocal_memory(void *start, size_t size);
68 void numa_police_memory(void *start, size_t size);
69 void numa_set_bind_policy(int strict);
70 void numa_set_strict(int strict);
71
72 int numa_distance(int node1, int node2);
73
74 int numa_sched_getaffinity(pid_t pid, struct bitmask *mask);
75 int numa_sched_setaffinity(pid_t pid, struct bitmask *mask);
76 int numa_node_to_cpus(int node, struct bitmask *mask);
77 void numa_node_to_cpu_update();
78 int numa_node_of_cpu(int cpu);
79
80 struct bitmask *numa_allocate_cpumask();
81
82 void numa_free_cpumask();
83 struct bitmask *numa_allocate_nodemask();
84
85 void numa_free_nodemask();
86 struct bitmask *numa_bitmask_alloc(unsigned int n);
87 struct bitmask *numa_bitmask_clearall(struct bitmask *bmp);
88 struct bitmask *numa_bitmask_clearbit(struct bitmask *bmp, unsigned int
89 n);
90 int numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask
91 *bmp2);
92 void numa_bitmask_free(struct bitmask *bmp);
93 int numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
94 unsigned int numa_bitmask_nbytes(struct bitmask *bmp);
95 struct bitmask *numa_bitmask_setall(struct bitmask *bmp);
96 struct bitmask *numa_bitmask_setbit(struct bitmask *bmp, unsigned int
97 n);
98 void copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *node‐
99 mask)
100 void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmask
101 *bmp)
102 void copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask
103 *bmpto)
104 unsigned int numa_bitmask_weight(const struct bitmask *bmp )
105
106 int numa_move_pages(int pid, unsigned long count, void **pages, const
107 int *nodes, int *status, int flags);
108 int numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bit‐
109 mask *tonodes);
110
111 void numa_error(char *where);
112
113 extern int numa_exit_on_error;
114 extern int numa_exit_on_warn;
115 void numa_warn(int number, char *where, ...);
116
117
119 The libnuma library offers a simple programming interface to the NUMA
120 (Non Uniform Memory Access) policy supported by the Linux kernel. On a
121 NUMA architecture some memory areas have different latency or bandwidth
122 than others.
123
124 Available policies are page interleaving (i.e., allocate in a round-
125 robin fashion from all, or a subset, of the nodes on the system), pre‐
126 ferred node allocation (i.e., preferably allocate on a particular
127 node), local allocation (i.e., allocate on the node on which the task
128 is currently executing), or allocation only on specific nodes (i.e.,
129 allocate on some subset of the available nodes). It is also possible
130 to bind tasks to specific nodes.
131
132 Numa memory allocation policy may be specified as a per-task attribute,
133 that is inherited by children tasks and processes, or as an attribute
134 of a range of process virtual address space. Numa memory policies
135 specified for a range of virtual address space are shared by all tasks
136 in the process. Furthermore, memory policies specified for a range of
137 a shared memory attached using shmat(2) or mmap(2) from shmfs/hugetlbfs
138 are shared by all processes that attach to that region. Memory poli‐
139 cies for shared disk backed file mappings are currently ignored.
140
141 The default memory allocation policy for tasks and all memory range is
142 local allocation. This assumes that no ancestor has installed a non-
143 default policy.
144
145 For setting a specific policy globally for all memory allocations in a
146 process and its children it is easiest to start it with the numactl(8)
147 utility. For more finegrained policy inside an application this library
148 can be used.
149
150 All numa memory allocation policy only takes effect when a page is
151 actually faulted into the address space of a process by accessing it.
152 The numa_alloc_* functions take care of this automatically.
153
154 A node is defined as an area where all memory has the same speed as
155 seen from a particular CPU. A node can contain multiple CPUs. Caches
156 are ignored for this definition.
157
158 Most functions in this library are only concerned about numa nodes and
159 their memory. The exceptions to this are: numa_node_to_cpus(),
160 numa_node_to_cpu_update(), numa_node_of_cpu(), numa_bind(),
161 numa_run_on_node(), numa_run_on_node_mask(),
162 numa_run_on_node_mask_all(), and numa_get_run_node_mask(). These func‐
163 tions deal with the CPUs associated with numa nodes. See the descrip‐
164 tions below for more information.
165
166 Some of these functions accept or return a pointer to struct bitmask.
167 A struct bitmask controls a bit map of arbitrary length containing a
168 bit representation of nodes. The predefined variable
169 numa_all_nodes_ptr points to a bit mask that has all available nodes
170 set; numa_no_nodes_ptr points to the empty set.
171
172 Before any other calls in this library can be used numa_available()
173 must be called. If it returns -1, all other functions in this library
174 are undefined.
175
176 numa_max_possible_node() returns the number of the highest possible
177 node in a system. In other words, the size of a kernel type nodemask_t
178 (in bits) minus 1. This number can be gotten by calling numa_num_pos‐
179 sible_nodes() and subtracting 1.
180
181 numa_num_possible_nodes() returns the size of kernel's node mask (ker‐
182 nel type nodemask_t). In other words, large enough to represent the
183 maximum number of nodes that the kernel can handle. This will match the
184 kernel's MAX_NUMNODES value. This count is derived from
185 /proc/self/status, field Mems_allowed.
186
187 numa_max_node() returns the highest node number available on the cur‐
188 rent system. (See the node numbers in /sys/devices/system/node/ ).
189 Also see numa_num_configured_nodes().
190
191 numa_num_configured_nodes() returns the number of memory nodes in the
192 system. This count includes any nodes that are currently disabled. This
193 count is derived from the node numbers in /sys/devices/system/node.
194 (Depends on the kernel being configured with /sys (CONFIG_SYSFS)).
195
196 numa_get_mems_allowed() returns the mask of nodes from which the
197 process is allowed to allocate memory in it's current cpuset context.
198 Any nodes that are not included in the returned bitmask will be ignored
199 in any of the following libnuma memory policy calls.
200
201 numa_num_configured_cpus() returns the number of cpus in the system.
202 This count includes any cpus that are currently disabled. This count is
203 derived from the cpu numbers in /sys/devices/system/cpu. If the kernel
204 is configured without /sys (CONFIG_SYSFS=n) then it falls back to using
205 the number of online cpus.
206
207 numa_all_nodes_ptr points to a bitmask that is allocated by the library
208 with bits representing all nodes on which the calling task may allocate
209 memory. This set may be up to all nodes on the system, or up to the
210 nodes in the current cpuset. The bitmask is allocated by a call to
211 numa_allocate_nodemask() using size numa_max_possible_node(). The set
212 of nodes to record is derived from /proc/self/status, field
213 "Mems_allowed". The user should not alter this bitmask.
214
215 numa_no_nodes_ptr points to a bitmask that is allocated by the library
216 and left all zeroes. The bitmask is allocated by a call to numa_allo‐
217 cate_nodemask() using size numa_max_possible_node(). The user should
218 not alter this bitmask.
219
220 numa_all_cpus_ptr points to a bitmask that is allocated by the library
221 with bits representing all cpus on which the calling task may execute.
222 This set may be up to all cpus on the system, or up to the cpus in the
223 current cpuset. The bitmask is allocated by a call to numa_allo‐
224 cate_cpumask() using size numa_num_possible_cpus(). The set of cpus to
225 record is derived from /proc/self/status, field "Cpus_allowed". The
226 user should not alter this bitmask.
227
228 numa_num_task_cpus() returns the number of cpus that the calling task
229 is allowed to use. This count is derived from the map /proc/self/sta‐
230 tus, field "Cpus_allowed". Also see the bitmask numa_all_cpus_ptr.
231
232 numa_num_task_nodes() returns the number of nodes on which the calling
233 task is allowed to allocate memory. This count is derived from the map
234 /proc/self/status, field "Mems_allowed". Also see the bitmask
235 numa_all_nodes_ptr.
236
237 numa_parse_bitmap() parses line , which is a character string such as
238 found in /sys/devices/system/node/nodeN/cpumap into a bitmask struc‐
239 ture. The string contains the hexadecimal representation of a bit map.
240 The bitmask may be allocated with numa_allocate_cpumask(). Returns 0
241 on success. Returns -1 on failure. This function is probably of lit‐
242 tle use to a user application, but it is used by libnuma internally.
243
244 numa_parse_nodestring() parses a character string list of nodes into a
245 bit mask. The bit mask is allocated by numa_allocate_nodemask(). The
246 string is a comma-separated list of node numbers or node ranges. A
247 leading ! can be used to indicate "not" this list (in other words, all
248 nodes except this list), and a leading + can be used to indicate that
249 the node numbers in the list are relative to the task's cpuset. The
250 string can be "all" to specify all ( numa_num_task_nodes() ) nodes.
251 Node numbers are limited by the number in the system. See
252 numa_max_node() and numa_num_configured_nodes().
253 Examples: 1-5,7,10 !4-5 +0-3
254 If the string is of 0 length, bitmask numa_no_nodes_ptr is returned.
255 Returns 0 if the string is invalid.
256
257 numa_parse_nodestring_all() is similar to numa_parse_nodestring , but
258 can parse all possible nodes, not only current nodeset.
259
260 numa_parse_cpustring() parses a character string list of cpus into a
261 bit mask. The bit mask is allocated by numa_allocate_cpumask(). The
262 string is a comma-separated list of cpu numbers or cpu ranges. A lead‐
263 ing ! can be used to indicate "not" this list (in other words, all cpus
264 except this list), and a leading + can be used to indicate that the cpu
265 numbers in the list are relative to the task's cpuset. The string can
266 be "all" to specify all ( numa_num_task_cpus() ) cpus. Cpu numbers are
267 limited by the number in the system. See numa_num_task_cpus() and
268 numa_num_configured_cpus().
269 Examples: 1-5,7,10 !4-5 +0-3
270 Returns 0 if the string is invalid.
271
272 numa_parse_cpustring_all() is similar to numa_parse_cpustring , but can
273 parse all possible cpus, not only current cpuset.
274
275 numa_node_size() returns the memory size of a node. If the argument
276 freep is not NULL, it used to return the amount of free memory on the
277 node. On error it returns -1.
278
279 numa_node_size64() works the same as numa_node_size() except that it
280 returns values as long long instead of long. This is useful on 32-bit
281 architectures with large nodes.
282
283 numa_preferred() returns the preferred node of the current task. This
284 is the node on which the kernel preferably allocates memory, unless
285 some other policy overrides this.
286
287 numa_set_preferred() sets the preferred node for the current task to
288 node. The system will attempt to allocate memory from the preferred
289 node, but will fall back to other nodes if no memory is available on
290 the the preferred node. Passing a node of -1 argument specifies local
291 allocation and is equivalent to calling numa_set_localalloc().
292
293 numa_get_interleave_mask() returns the current interleave mask if the
294 task's memory allocation policy is page interleaved. Otherwise, this
295 function returns an empty mask.
296
297 numa_set_interleave_mask() sets the memory interleave mask for the cur‐
298 rent task to nodemask. All new memory allocations are page interleaved
299 over all nodes in the interleave mask. Interleaving can be turned off
300 again by passing an empty mask (numa_no_nodes). The page interleaving
301 only occurs on the actual page fault that puts a new page into the cur‐
302 rent address space. It is also only a hint: the kernel will fall back
303 to other nodes if no memory is available on the interleave target.
304
305 numa_interleave_memory() interleaves size bytes of memory page by page
306 from start on nodes specified in nodemask. The size argument will be
307 rounded up to a multiple of the system page size. If nodemask contains
308 nodes that are externally denied to this process, this call will fail.
309 This is a lower level function to interleave allocated but not yet
310 faulted in memory. Not yet faulted in means the memory is allocated
311 using mmap(2) or shmat(2), but has not been accessed by the current
312 process yet. The memory is page interleaved to all nodes specified in
313 nodemask. Normally numa_alloc_interleaved() should be used for private
314 memory instead, but this function is useful to handle shared memory
315 areas. To be useful the memory area should be several megabytes at
316 least (or tens of megabytes of hugetlbfs mappings) If the
317 numa_set_strict() flag is true then the operation will cause a
318 numa_error if there were already pages in the mapping that do not fol‐
319 low the policy.
320
321 numa_bind() binds the current task and its children to the nodes speci‐
322 fied in nodemask. They will only run on the CPUs of the specified
323 nodes and only be able to allocate memory from them. This function is
324 equivalent to calling numa_run_on_node_mask(nodemask) followed by
325 numa_set_membind(nodemask). If tasks should be bound to individual
326 CPUs inside nodes consider using numa_node_to_cpus and the
327 sched_setaffinity(2) syscall.
328
329 numa_set_localalloc() sets the memory allocation policy for the calling
330 task to local allocation. In this mode, the preferred node for memory
331 allocation is effectively the node where the task is executing at the
332 time of a page allocation.
333
334 numa_set_membind() sets the memory allocation mask. The task will only
335 allocate memory from the nodes set in nodemask. Passing an empty node‐
336 mask or a nodemask that contains nodes other than those in the mask
337 returned by numa_get_mems_allowed() will result in an error.
338
339 numa_get_membind() returns the mask of nodes from which memory can cur‐
340 rently be allocated. If the returned mask is equal to numa_all_nodes,
341 then memory allocation is allowed from all nodes.
342
343 numa_alloc_onnode() allocates memory on a specific node. The size
344 argument will be rounded up to a multiple of the system page size. if
345 the specified node is externally denied to this process, this call will
346 fail. This function is relatively slow compared to the malloc(3), fam‐
347 ily of functions. The memory must be freed with numa_free(). On
348 errors NULL is returned.
349
350 numa_alloc_local() allocates size bytes of memory on the local node.
351 The size argument will be rounded up to a multiple of the system page
352 size. This function is relatively slow compared to the malloc(3) fam‐
353 ily of functions. The memory must be freed with numa_free(). On
354 errors NULL is returned.
355
356 numa_alloc_interleaved() allocates size bytes of memory page inter‐
357 leaved on all nodes. This function is relatively slow and should only
358 be used for large areas consisting of multiple pages. The interleaving
359 works at page level and will only show an effect when the area is
360 large. The allocated memory must be freed with numa_free(). On error,
361 NULL is returned.
362
363 numa_alloc_interleaved_subset() attempts to allocate size bytes of mem‐
364 ory page interleaved on all nodes. The size argument will be rounded
365 up to a multiple of the system page size. The nodes on which a process
366 is allowed to allocate memory may be constrained externally. If this
367 is the case, this function may fail. This function is relatively slow
368 compare to malloc(3), family of functions and should only be used for
369 large areas consisting of multiple pages. The interleaving works at
370 page level and will only show an effect when the area is large. The
371 allocated memory must be freed with numa_free(). On error, NULL is
372 returned.
373
374 numa_alloc() allocates size bytes of memory with the current NUMA pol‐
375 icy. The size argument will be rounded up to a multiple of the system
376 page size. This function is relatively slow compare to the malloc(3)
377 family of functions. The memory must be freed with numa_free(). On
378 errors NULL is returned.
379
380 numa_realloc() changes the size of the memory area pointed to by
381 old_addr from old_size to new_size. The memory area pointed to by
382 old_addr must have been allocated with one of the numa_alloc* func‐
383 tions. The new_size will be rounded up to a multiple of the system
384 page size. The contents of the memory area will be unchanged to the
385 minimum of the old and new sizes; newly allocated memory will be unini‐
386 tialized. The memory policy (and node bindings) associated with the
387 original memory area will be preserved in the resized area. For exam‐
388 ple, if the initial area was allocated with a call to numa_alloc_onn‐
389 ode(), then the new pages (if the area is enlarged) will be allocated
390 on the same node. However, if no memory policy was set for the origi‐
391 nal area, then numa_realloc() cannot guarantee that the new pages will
392 be allocated on the same node. On success, the address of the resized
393 area is returned (which might be different from that of the initial
394 area), otherwise NULL is returned and errno is set to indicate the
395 error. The pointer returned by numa_realloc() is suitable for passing
396 to numa_free().
397
398
399 numa_free() frees size bytes of memory starting at start, allocated by
400 the numa_alloc_* functions above. The size argument will be rounded up
401 to a multiple of the system page size.
402
403 numa_run_on_node() runs the current task and its children on a specific
404 node. They will not migrate to CPUs of other nodes until the node
405 affinity is reset with a new call to numa_run_on_node_mask(). Passing
406 -1 permits the kernel to schedule on all nodes again. On success, 0 is
407 returned; on error -1 is returned, and errno is set to indicate the
408 error.
409
410 numa_run_on_node_mask() runs the current task and its children only on
411 nodes specified in nodemask. They will not migrate to CPUs of other
412 nodes until the node affinity is reset with a new call to
413 numa_run_on_node_mask() or numa_run_on_node(). Passing numa_all_nodes
414 permits the kernel to schedule on all nodes again. On success, 0 is
415 returned; on error -1 is returned, and errno is set to indicate the
416 error.
417
418 numa_run_on_node_mask_all() runs the current task and its children only
419 on nodes specified in nodemask like numa_run_on_node_mask but without
420 any cpuset awareness.
421
422 numa_get_run_node_mask() returns a mask of CPUs on which the current
423 task is allowed to run.
424
425 numa_tonode_memory() put memory on a specific node. The constraints
426 described for numa_interleave_memory() apply here too.
427
428 numa_tonodemask_memory() put memory on a specific set of nodes. The
429 constraints described for numa_interleave_memory() apply here too.
430
431 numa_setlocal_memory() locates memory on the current node. The con‐
432 straints described for numa_interleave_memory() apply here too.
433
434 numa_police_memory() locates memory with the current NUMA policy. The
435 constraints described for numa_interleave_memory() apply here too.
436
437 numa_distance() reports the distance in the machine topology between
438 two nodes. The factors are a multiple of 10. It returns 0 when the
439 distance cannot be determined. A node has distance 10 to itself.
440 Reporting the distance requires a Linux kernel version of 2.6.10 or
441 newer.
442
443 numa_set_bind_policy() specifies whether calls that bind memory to a
444 specific node should use the preferred policy or a strict policy. The
445 preferred policy allows the kernel to allocate memory on other nodes
446 when there isn't enough free on the target node. strict will fail the
447 allocation in that case. Setting the argument to specifies strict, 0
448 preferred. Note that specifying more than one node non strict may only
449 use the first node in some kernel versions.
450
451 numa_set_strict() sets a flag that says whether the functions allocat‐
452 ing on specific nodes should use use a strict policy. Strict means the
453 allocation will fail if the memory cannot be allocated on the target
454 node. Default operation is to fall back to other nodes. This doesn't
455 apply to interleave and default.
456
457 numa_get_interleave_node() is used by libnuma internally. It is proba‐
458 bly not useful for user applications. It uses the MPOL_F_NODE flag of
459 the get_mempolicy system call, which is not intended for application
460 use (its operation may change or be removed altogether in future kernel
461 versions). See get_mempolicy(2).
462
463 numa_pagesize() returns the number of bytes in page. This function is
464 simply a fast alternative to repeated calls to the getpagesize system
465 call. See getpagesize(2).
466
467 numa_sched_getaffinity() retrieves a bitmask of the cpus on which a
468 task may run. The task is specified by pid. Returns the return value
469 of the sched_getaffinity system call. See sched_getaffinity(2). The
470 bitmask must be at least the size of the kernel's cpu mask structure.
471 Use numa_allocate_cpumask() to allocate it. Test the bits in the mask
472 by calling numa_bitmask_isbitset().
473
474 numa_sched_setaffinity() sets a task's allowed cpu's to those cpu's
475 specified in mask. The task is specified by pid. Returns the return
476 value of the sched_setaffinity system call. See sched_setaffinity(2).
477 You may allocate the bitmask with numa_allocate_cpumask(). Or the bit‐
478 mask may be smaller than the kernel's cpu mask structure. For example,
479 call numa_bitmask_alloc() using a maximum number of cpus from
480 numa_num_configured_cpus(). Set the bits in the mask by calling
481 numa_bitmask_setbit().
482
483 numa_node_to_cpus() converts a node number to a bitmask of CPUs. The
484 user must pass a bitmask structure with a mask buffer long enough to
485 represent all possible cpu's. Use numa_allocate_cpumask() to create
486 it. If the bitmask is not long enough errno will be set to ERANGE and
487 -1 returned. On success 0 is returned.
488
489 numa_node_to_cpu_update() Mark cpus bitmask of all nodes stale, then
490 get the latest bitmask by calling numa_node_to_cpus() This allows to
491 update the libnuma state after a CPU hotplug event. The application is
492 in charge of detecting CPU hotplug events.
493
494 numa_node_of_cpu() returns the node that a cpu belongs to. If the user
495 supplies an invalid cpu errno will be set to EINVAL and -1 will be
496 returned.
497
498 numa_allocate_cpumask () returns a bitmask of a size equal to the ker‐
499 nel's cpu mask (kernel type cpumask_t). In other words, large enough
500 to represent NR_CPUS cpus. This number of cpus can be gotten by call‐
501 ing numa_num_possible_cpus(). The bitmask is zero-filled.
502
503 numa_free_cpumask frees a cpumask previously allocate by numa_allo‐
504 cate_cpumask.
505
506 numa_allocate_nodemask() returns a bitmask of a size equal to the ker‐
507 nel's node mask (kernel type nodemask_t). In other words, large enough
508 to represent MAX_NUMNODES nodes. This number of nodes can be gotten by
509 calling numa_num_possible_nodes(). The bitmask is zero-filled.
510
511 numa_free_nodemask() frees a nodemask previous allocated by numa_allo‐
512 cate_nodemask().
513
514 numa_bitmask_alloc() allocates a bitmask structure and its associated
515 bit mask. The memory allocated for the bit mask contains enough words
516 (type unsigned long) to contain n bits. The bit mask is zero-filled.
517 The bitmask structure points to the bit mask and contains the n value.
518
519 numa_bitmask_clearall() sets all bits in the bit mask to 0. The bit‐
520 mask structure points to the bit mask and contains its size ( bmp
521 ->size). The value of bmp is always returned. Note that numa_bit‐
522 mask_alloc() creates a zero-filled bit mask.
523
524 numa_bitmask_clearbit() sets a specified bit in a bit mask to 0. Noth‐
525 ing is done if the n value is greater than the size of the bitmask (and
526 no error is returned). The value of bmp is always returned.
527
528 numa_bitmask_equal() returns 1 if two bitmasks are equal. It returns 0
529 if they are not equal. If the bitmask structures control bit masks of
530 different sizes, the "missing" trailing bits of the smaller bit mask
531 are considered to be 0.
532
533 numa_bitmask_free() deallocates the memory of both the bitmask struc‐
534 ture pointed to by bmp and the bit mask. It is an error to attempt to
535 free this bitmask twice.
536
537 numa_bitmask_isbitset() returns the value of a specified bit in a bit
538 mask. If the n value is greater than the size of the bit map, 0 is
539 returned.
540
541 numa_bitmask_nbytes() returns the size (in bytes) of the bit mask con‐
542 trolled by bmp. The bit masks are always full words (type unsigned
543 long), and the returned size is the actual size of all those words.
544
545 numa_bitmask_setall() sets all bits in the bit mask to 1. The bitmask
546 structure points to the bit mask and contains its size ( bmp ->size).
547 The value of bmp is always returned.
548
549 numa_bitmask_setbit() sets a specified bit in a bit mask to 1. Nothing
550 is done if n is greater than the size of the bitmask (and no error is
551 returned). The value of bmp is always returned.
552
553 copy_bitmask_to_nodemask() copies the body (the bit map itself) of the
554 bitmask structure pointed to by bmp to the nodemask_t structure pointed
555 to by the nodemask pointer. If the two areas differ in size, the copy
556 is truncated to the size of the receiving field or zero-filled.
557
558 copy_nodemask_to_bitmask() copies the nodemask_t structure pointed to
559 by the nodemask pointer to the body (the bit map itself) of the bitmask
560 structure pointed to by the bmp pointer. If the two areas differ in
561 size, the copy is truncated to the size of the receiving field or zero-
562 filled.
563
564 copy_bitmask_to_bitmask() copies the body (the bit map itself) of the
565 bitmask structure pointed to by the bmpfrom pointer to the body of the
566 bitmask structure pointed to by the bmpto pointer. If the two areas
567 differ in size, the copy is truncated to the size of the receiving
568 field or zero-filled.
569
570 numa_bitmask_weight() returns a count of the bits that are set in the
571 body of the bitmask pointed to by the bmp argument.
572
573 numa_move_pages() moves a list of pages in the address space of the
574 currently executing or current process. It simply uses the move_pages
575 system call.
576 pid - ID of task. If not valid, use the current task.
577 count - Number of pages.
578 pages - List of pages to move.
579 nodes - List of nodes to which pages can be moved.
580 status - Field to which status is to be returned.
581 flags - MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
582 See move_pages(2).
583
584 numa_migrate_pages() simply uses the migrate_pages system call to cause
585 the pages of the calling task, or a specified task, to be migated from
586 one set of nodes to another. See migrate_pages(2). The bit masks rep‐
587 resenting the nodes should be allocated with numa_allocate_nodemask() ,
588 or with numa_bitmask_alloc() using an n value returned from
589 numa_num_possible_nodes(). A task's current node set can be gotten by
590 calling numa_get_membind(). Bits in the tonodes mask can be set by
591 calls to numa_bitmask_setbit().
592
593 numa_error() is a libnuma internal function that can be overridden by
594 the user program. This function is called with a char * argument when
595 a libnuma function fails. Overriding the library internal definition
596 makes it possible to specify a different error handling strategy when a
597 libnuma function fails. It does not affect numa_available(). The
598 numa_error() function defined in libnuma prints an error on stderr and
599 terminates the program if numa_exit_on_error is set to a non-zero
600 value. The default value of numa_exit_on_error is zero.
601
602 numa_warn() is a libnuma internal function that can be also overridden
603 by the user program. It is called to warn the user when a libnuma
604 function encounters a non-fatal error. The default implementation
605 prints a warning to stderr. The first argument is a unique number
606 identifying each warning. After that there is a printf(3)-style format
607 string and a variable number of arguments. numa_warn exits the program
608 when numa_exit_on_warn is set to a non-zero value. The default value
609 of numa_exit_on_warn is zero.
610
611
613 Binaries that were compiled for libnuma version 1 need not be re-com‐
614 piled to run with libnuma version 2.
615 Source codes written for libnuma version 1 may be re-compiled without
616 change with version 2 installed. To do so, in the code's Makefile add
617 this option to CFLAGS: -DNUMA_VERSION1_COMPATIBILITY
618
619
621 numa_set_bind_policy and numa_exit_on_error are process global. The
622 other calls are thread safe.
623
624
626 Copyright 2002, 2004, 2007, 2008 Andi Kleen, SuSE Labs. libnuma is
627 under the GNU Lesser General Public License, v2.1.
628
629
631 get_mempolicy(2), set_mempolicy(2), getpagesize(2), mbind(2), mmap(2),
632 shmat(2), numactl(8), sched_getaffinity(2) sched_setaffinity(2)
633 move_pages(2) migrate_pages(2)
634
635
636
637SuSE Labs December 2007 NUMA(3)