1NUMA(3) Linux Programmer's Manual NUMA(3)
2
3
4
6 numa - NUMA policy library
7
9 #include <numa.h>
10
11 cc ... -lnuma
12
13 int numa_available(void);
14
15 int numa_max_possible_node(void);
16 int numa_num_possible_nodes();
17
18 int numa_max_node(void);
19 int numa_num_configured_nodes();
20 struct bitmask *numa_get_mems_allowed(void);
21
22 int numa_num_configured_cpus(void);
23 struct bitmask *numa_all_nodes_ptr;
24 struct bitmask *numa_no_nodes_ptr;
25 struct bitmask *numa_all_cpus_ptr;
26
27 int numa_num_task_cpus();
28 int numa_num_task_nodes();
29
30 int numa_parse_bitmap(char *line , struct bitmask *mask);
31 struct bitmask *numa_parse_nodestring(const char *string);
32 struct bitmask *numa_parse_nodestring_all(const char *string);
33 struct bitmask *numa_parse_cpustring(const char *string);
34 struct bitmask *numa_parse_cpustring_all(const char *string);
35
36 long long numa_node_size(int node, long long*freep);
37 long long numa_node_size64(int node, long long *freep);
38
39 int numa_preferred(void);
40 int numa_has_preferred_many(void);
41 struct bitmask *numa_preferred_many(void);
42 void numa_set_preferred(int node);
43 void numa_set_preferred_many(struct bitmask *nodemask);
44 int numa_get_interleave_node(void);
45 struct bitmask *numa_get_interleave_mask(void);
46 void numa_set_interleave_mask(struct bitmask *nodemask);
47 void numa_interleave_memory(void *start, size_t size, struct bitmask
48 *nodemask);
49 void numa_bind(struct bitmask *nodemask);
50 void numa_set_localalloc(void);
51 void numa_set_membind(struct bitmask *nodemask);
52 void numa_set_membind_balancing(struct bitmask *nodemask);
53 struct bitmask *numa_get_membind(void);
54
55 void *numa_alloc_onnode(size_t size, int node);
56 void *numa_alloc_local(size_t size);
57 void *numa_alloc_interleaved(size_t size);
58 void *numa_alloc_interleaved_subset(size_t size, struct bitmask *node‐
59 mask); void *numa_alloc(size_t size);
60 void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
61 void numa_free(void *start, size_t size);
62
63 int numa_run_on_node(int node);
64 int numa_run_on_node_mask(struct bitmask *nodemask);
65 int numa_run_on_node_mask_all(struct bitmask *nodemask);
66 struct bitmask *numa_get_run_node_mask(void);
67
68 void numa_tonode_memory(void *start, size_t size, int node);
69 void numa_tonodemask_memory(void *start, size_t size, struct bitmask
70 *nodemask);
71 void numa_setlocal_memory(void *start, size_t size);
72 void numa_police_memory(void *start, size_t size);
73 void numa_set_bind_policy(int strict);
74 void numa_set_strict(int strict);
75
76 int numa_distance(int node1, int node2);
77
78 int numa_sched_getaffinity(pid_t pid, struct bitmask *mask);
79 int numa_sched_setaffinity(pid_t pid, struct bitmask *mask);
80 int numa_node_to_cpus(int node, struct bitmask *mask);
81 void numa_node_to_cpu_update();
82 int numa_node_of_cpu(int cpu);
83
84 struct bitmask *numa_allocate_cpumask();
85
86 void numa_free_cpumask();
87 struct bitmask *numa_allocate_nodemask();
88
89 void numa_free_nodemask();
90 struct bitmask *numa_bitmask_alloc(unsigned int n);
91 struct bitmask *numa_bitmask_clearall(struct bitmask *bmp);
92 struct bitmask *numa_bitmask_clearbit(struct bitmask *bmp, unsigned int
93 n);
94 int numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask
95 *bmp2);
96 void numa_bitmask_free(struct bitmask *bmp);
97 int numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
98 unsigned int numa_bitmask_nbytes(struct bitmask *bmp);
99 struct bitmask *numa_bitmask_setall(struct bitmask *bmp);
100 struct bitmask *numa_bitmask_setbit(struct bitmask *bmp, unsigned int
101 n);
102 void copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *node‐
103 mask)
104 void copy_nodemask_to_bitmask(nodemask_t *nodemask, struct bitmask
105 *bmp)
106 void copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask
107 *bmpto)
108 unsigned int numa_bitmask_weight(const struct bitmask *bmp )
109
110 int numa_move_pages(int pid, unsigned long count, void **pages, const
111 int *nodes, int *status, int flags);
112 int numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bit‐
113 mask *tonodes);
114
115 void numa_error(char *where);
116
117 extern int numa_exit_on_error;
118 extern int numa_exit_on_warn;
119 void numa_warn(int number, char *where, ...);
120
121
123 The libnuma library offers a simple programming interface to the NUMA
124 (Non Uniform Memory Access) policy supported by the Linux kernel. On a
125 NUMA architecture some memory areas have different latency or bandwidth
126 than others.
127
128 Available policies are page interleaving (i.e., allocate in a round-
129 robin fashion from all, or a subset, of the nodes on the system), pre‐
130 ferred node allocation (i.e., preferably allocate on a particular
131 node), local allocation (i.e., allocate on the node on which the task
132 is currently executing), or allocation only on specific nodes (i.e.,
133 allocate on some subset of the available nodes). It is also possible
134 to bind tasks to specific nodes.
135
136 Numa memory allocation policy may be specified as a per-task attribute,
137 that is inherited by children tasks and processes, or as an attribute
138 of a range of process virtual address space. Numa memory policies
139 specified for a range of virtual address space are shared by all tasks
140 in the process. Furthermore, memory policies specified for a range of
141 a shared memory attached using shmat(2) or mmap(2) from shmfs/hugetlbfs
142 are shared by all processes that attach to that region. Memory poli‐
143 cies for shared disk backed file mappings are currently ignored.
144
145 The default memory allocation policy for tasks and all memory range is
146 local allocation. This assumes that no ancestor has installed a non-
147 default policy.
148
149 For setting a specific policy globally for all memory allocations in a
150 process and its children it is easiest to start it with the numactl(8)
151 utility. For more finegrained policy inside an application this library
152 can be used.
153
154 All numa memory allocation policy only takes effect when a page is ac‐
155 tually faulted into the address space of a process by accessing it. The
156 numa_alloc_* functions take care of this automatically.
157
158 A node is defined as an area where all memory has the same speed as
159 seen from a particular CPU. A node can contain multiple CPUs. Caches
160 are ignored for this definition.
161
162 Most functions in this library are only concerned about numa nodes and
163 their memory. The exceptions to this are: numa_node_to_cpus(),
164 numa_node_to_cpu_update(), numa_node_of_cpu(), numa_bind(),
165 numa_run_on_node(), numa_run_on_node_mask(),
166 numa_run_on_node_mask_all(), and numa_get_run_node_mask(). These func‐
167 tions deal with the CPUs associated with numa nodes. See the descrip‐
168 tions below for more information.
169
170 Some of these functions accept or return a pointer to struct bitmask.
171 A struct bitmask controls a bit map of arbitrary length containing a
172 bit representation of nodes. The predefined variable
173 numa_all_nodes_ptr points to a bit mask that has all available nodes
174 set; numa_no_nodes_ptr points to the empty set.
175
176 Before any other calls in this library can be used numa_available()
177 must be called. If it returns -1, all other functions in this library
178 are undefined.
179
180 numa_max_possible_node() returns the number of the highest possible
181 node in a system. In other words, the size of a kernel type nodemask_t
182 (in bits) minus 1. This number can be gotten by calling numa_num_pos‐
183 sible_nodes() and subtracting 1.
184
185 numa_num_possible_nodes() returns the size of kernel's node mask (ker‐
186 nel type nodemask_t). In other words, large enough to represent the
187 maximum number of nodes that the kernel can handle. This will match the
188 kernel's MAX_NUMNODES value. This count is derived from
189 /proc/self/status, field Mems_allowed.
190
191 numa_max_node() returns the highest node number available on the cur‐
192 rent system. (See the node numbers in /sys/devices/system/node/ ).
193 Also see numa_num_configured_nodes().
194
195 numa_num_configured_nodes() returns the number of memory nodes in the
196 system. This count includes any nodes that are currently disabled. This
197 count is derived from the node numbers in /sys/devices/system/node.
198 (Depends on the kernel being configured with /sys (CONFIG_SYSFS)).
199
200 numa_get_mems_allowed() returns the mask of nodes from which the
201 process is allowed to allocate memory in it's current cpuset context.
202 Any nodes that are not included in the returned bitmask will be ignored
203 in any of the following libnuma memory policy calls.
204
205 numa_num_configured_cpus() returns the number of cpus in the system.
206 This count includes any cpus that are currently disabled. This count is
207 derived from the cpu numbers in /sys/devices/system/cpu. If the kernel
208 is configured without /sys (CONFIG_SYSFS=n) then it falls back to using
209 the number of online cpus.
210
211 numa_all_nodes_ptr points to a bitmask that is allocated by the library
212 with bits representing all nodes on which the calling task may allocate
213 memory. This set may be up to all nodes on the system, or up to the
214 nodes in the current cpuset. The bitmask is allocated by a call to
215 numa_allocate_nodemask() using size numa_max_possible_node(). The set
216 of nodes to record is derived from /proc/self/status, field "Mems_al‐
217 lowed". The user should not alter this bitmask.
218
219 numa_no_nodes_ptr points to a bitmask that is allocated by the library
220 and left all zeroes. The bitmask is allocated by a call to numa_allo‐
221 cate_nodemask() using size numa_max_possible_node(). The user should
222 not alter this bitmask.
223
224 numa_all_cpus_ptr points to a bitmask that is allocated by the library
225 with bits representing all cpus on which the calling task may execute.
226 This set may be up to all cpus on the system, or up to the cpus in the
227 current cpuset. The bitmask is allocated by a call to numa_allo‐
228 cate_cpumask() using size numa_num_possible_cpus(). The set of cpus to
229 record is derived from /proc/self/status, field "Cpus_allowed". The
230 user should not alter this bitmask.
231
232 numa_num_task_cpus() returns the number of cpus that the calling task
233 is allowed to use. This count is derived from the map /proc/self/sta‐
234 tus, field "Cpus_allowed". Also see the bitmask numa_all_cpus_ptr.
235
236 numa_num_task_nodes() returns the number of nodes on which the calling
237 task is allowed to allocate memory. This count is derived from the map
238 /proc/self/status, field "Mems_allowed". Also see the bitmask
239 numa_all_nodes_ptr.
240
241 numa_parse_bitmap() parses line , which is a character string such as
242 found in /sys/devices/system/node/nodeN/cpumap into a bitmask struc‐
243 ture. The string contains the hexadecimal representation of a bit map.
244 The bitmask may be allocated with numa_allocate_cpumask(). Returns 0
245 on success. Returns -1 on failure. This function is probably of lit‐
246 tle use to a user application, but it is used by libnuma internally.
247
248 numa_parse_nodestring() parses a character string list of nodes into a
249 bit mask. The bit mask is allocated by numa_allocate_nodemask(). The
250 string is a comma-separated list of node numbers or node ranges. A
251 leading ! can be used to indicate "not" this list (in other words, all
252 nodes except this list), and a leading + can be used to indicate that
253 the node numbers in the list are relative to the task's cpuset. The
254 string can be "all" to specify all ( numa_num_task_nodes() ) nodes.
255 Node numbers are limited by the number in the system. See
256 numa_max_node() and numa_num_configured_nodes().
257 Examples: 1-5,7,10 !4-5 +0-3
258 If the string is of 0 length, bitmask numa_no_nodes_ptr is returned.
259 Returns 0 if the string is invalid.
260
261 numa_parse_nodestring_all() is similar to numa_parse_nodestring , but
262 can parse all possible nodes, not only current nodeset.
263
264 numa_parse_cpustring() parses a character string list of cpus into a
265 bit mask. The bit mask is allocated by numa_allocate_cpumask(). The
266 string is a comma-separated list of cpu numbers or cpu ranges. A lead‐
267 ing ! can be used to indicate "not" this list (in other words, all cpus
268 except this list), and a leading + can be used to indicate that the cpu
269 numbers in the list are relative to the task's cpuset. The string can
270 be "all" to specify all ( numa_num_task_cpus() ) cpus. Cpu numbers are
271 limited by the number in the system. See numa_num_task_cpus() and
272 numa_num_configured_cpus().
273 Examples: 1-5,7,10 !4-5 +0-3
274 Returns 0 if the string is invalid.
275
276 numa_parse_cpustring_all() is similar to numa_parse_cpustring , but can
277 parse all possible cpus, not only current cpuset.
278
279 numa_node_size() returns the memory size of a node. If the argument
280 freep is not NULL, it used to return the amount of free memory on the
281 node. On error it returns -1.
282
283 numa_node_size64() works the same as numa_node_size(). This is useful
284 on 32-bit architectures with large nodes.
285
286 numa_preferred() returns the preferred node of the current task. This
287 is the node on which the kernel preferably allocates memory, unless
288 some other policy overrides this.
289
290 numa_has_preferred_many() Returns > 0 if the system supports multiple
291 preferred nodes.
292
293 numa_preferred_many() Returns the current set of preferred nodes. This
294 implies the empty set when the policy isn't one used for preference
295 (PREFERRED, PREFERRED_MANY, BIND). The caller is responsible for free‐
296 ing the mask with numa_bitmask_free().
297
298 numa_set_preferred() sets the preferred node for the current task to
299 node. The system will attempt to allocate memory from the preferred
300 node, but will fall back to other nodes if no memory is available on
301 the the preferred node. Passing a node of -1 argument specifies local
302 allocation and is equivalent to calling numa_set_localalloc().
303
304 numa_set_preferred_many() sets the preferred set of nodes for the cur‐
305 rent task to nodemask. This is similar to numa_set_preferred() with
306 the exception that it utilizes a different kernel interface to specify
307 multiple preferred nodes. The caller is responsible for freeing the
308 mask with numa_bitmask_free().
309
310 numa_get_interleave_mask() returns the current interleave mask if the
311 task's memory allocation policy is page interleaved. Otherwise, this
312 function returns an empty mask.
313
314 numa_set_interleave_mask() sets the memory interleave mask for the cur‐
315 rent task to nodemask. All new memory allocations are page interleaved
316 over all nodes in the interleave mask. Interleaving can be turned off
317 again by passing an empty mask (numa_no_nodes). The page interleaving
318 only occurs on the actual page fault that puts a new page into the cur‐
319 rent address space. It is also only a hint: the kernel will fall back
320 to other nodes if no memory is available on the interleave target.
321
322 numa_interleave_memory() interleaves size bytes of memory page by page
323 from start on nodes specified in nodemask. The size argument will be
324 rounded up to a multiple of the system page size. If nodemask contains
325 nodes that are externally denied to this process, this call will fail.
326 This is a lower level function to interleave allocated but not yet
327 faulted in memory. Not yet faulted in means the memory is allocated us‐
328 ing mmap(2) or shmat(2), but has not been accessed by the current
329 process yet. The memory is page interleaved to all nodes specified in
330 nodemask. Normally numa_alloc_interleaved() should be used for private
331 memory instead, but this function is useful to handle shared memory ar‐
332 eas. To be useful the memory area should be several megabytes at least
333 (or tens of megabytes of hugetlbfs mappings) If the numa_set_strict()
334 flag is true then the operation will cause a numa_error if there were
335 already pages in the mapping that do not follow the policy.
336
337 numa_bind() binds the current task and its children to the nodes speci‐
338 fied in nodemask. They will only run on the CPUs of the specified
339 nodes and only be able to allocate memory from them. This function is
340 equivalent to calling numa_run_on_node_mask(nodemask) followed by
341 numa_set_membind(nodemask). If tasks should be bound to individual
342 CPUs inside nodes consider using numa_node_to_cpus and the
343 sched_setaffinity(2) syscall.
344
345 numa_set_localalloc() sets the memory allocation policy for the calling
346 task to local allocation. In this mode, the preferred node for memory
347 allocation is effectively the node where the task is executing at the
348 time of a page allocation.
349
350 numa_set_membind() sets the memory allocation mask. The task will only
351 allocate memory from the nodes set in nodemask. Passing an empty node‐
352 mask or a nodemask that contains nodes other than those in the mask re‐
353 turned by numa_get_mems_allowed() will result in an error.
354
355 numa_set_membind_balancing() sets the memory allocation mask and enable
356 the Linux kernel NUMA balancing for the task if the feature is sup‐
357 ported by the kernel. The task will only allocate memory from the
358 nodes set in nodemask. Passing an empty nodemask or a nodemask that
359 contains nodes other than those in the mask returned by
360 numa_get_mems_allowed() will result in an error.
361
362 numa_get_membind() returns the mask of nodes from which memory can cur‐
363 rently be allocated. If the returned mask is equal to numa_all_nodes,
364 then memory allocation is allowed from all nodes.
365
366 numa_alloc_onnode() allocates memory on a specific node. The size ar‐
367 gument will be rounded up to a multiple of the system page size. if
368 the specified node is externally denied to this process, this call will
369 fail. This function is relatively slow compared to the malloc(3) fam‐
370 ily of functions. The memory must be freed with numa_free(). On er‐
371 rors NULL is returned.
372
373 numa_alloc_local() allocates size bytes of memory on the local node.
374 The size argument will be rounded up to a multiple of the system page
375 size. This function is relatively slow compared to the malloc(3) fam‐
376 ily of functions. The memory must be freed with numa_free(). On er‐
377 rors NULL is returned.
378
379 numa_alloc_interleaved() allocates size bytes of memory page inter‐
380 leaved on all nodes. This function is relatively slow and should only
381 be used for large areas consisting of multiple pages. The interleaving
382 works at page level and will only show an effect when the area is
383 large. The allocated memory must be freed with numa_free(). On error,
384 NULL is returned.
385
386 numa_alloc_interleaved_subset() attempts to allocate size bytes of mem‐
387 ory page interleaved on all nodes. The size argument will be rounded
388 up to a multiple of the system page size. The nodes on which a process
389 is allowed to allocate memory may be constrained externally. If this
390 is the case, this function may fail. This function is relatively slow
391 compared to the malloc(3) family of functions and should only be used
392 for large areas consisting of multiple pages. The interleaving works
393 at page level and will only show an effect when the area is large. The
394 allocated memory must be freed with numa_free(). On error, NULL is re‐
395 turned.
396
397 numa_alloc() allocates size bytes of memory with the current NUMA pol‐
398 icy. The size argument will be rounded up to a multiple of the system
399 page size. This function is relatively slow compared to the malloc(3)
400 family of functions. The memory must be freed with numa_free(). On
401 errors NULL is returned.
402
403 numa_realloc() changes the size of the memory area pointed to by
404 old_addr from old_size to new_size. The memory area pointed to by
405 old_addr must have been allocated with one of the numa_alloc* func‐
406 tions. The new_size will be rounded up to a multiple of the system
407 page size. The contents of the memory area will be unchanged to the
408 minimum of the old and new sizes; newly allocated memory will be unini‐
409 tialized. The memory policy (and node bindings) associated with the
410 original memory area will be preserved in the resized area. For exam‐
411 ple, if the initial area was allocated with a call to numa_alloc_onn‐
412 ode(), then the new pages (if the area is enlarged) will be allocated
413 on the same node. However, if no memory policy was set for the origi‐
414 nal area, then numa_realloc() cannot guarantee that the new pages will
415 be allocated on the same node. On success, the address of the resized
416 area is returned (which might be different from that of the initial
417 area), otherwise NULL is returned and errno is set to indicate the er‐
418 ror. The pointer returned by numa_realloc() is suitable for passing to
419 numa_free().
420
421
422 numa_free() frees size bytes of memory starting at start, allocated by
423 the numa_alloc_* functions above. The size argument will be rounded up
424 to a multiple of the system page size.
425
426 numa_run_on_node() runs the current task and its children on a specific
427 node. They will not migrate to CPUs of other nodes until the node
428 affinity is reset with a new call to numa_run_on_node_mask(). Passing
429 -1 permits the kernel to schedule on all nodes again. On success, 0 is
430 returned; on error -1 is returned, and errno is set to indicate the er‐
431 ror.
432
433 numa_run_on_node_mask() runs the current task and its children only on
434 nodes specified in nodemask. They will not migrate to CPUs of other
435 nodes until the node affinity is reset with a new call to
436 numa_run_on_node_mask() or numa_run_on_node(). Passing numa_all_nodes
437 permits the kernel to schedule on all nodes again. On success, 0 is
438 returned; on error -1 is returned, and errno is set to indicate the er‐
439 ror.
440
441 numa_run_on_node_mask_all() runs the current task and its children only
442 on nodes specified in nodemask like numa_run_on_node_mask but without
443 any cpuset awareness.
444
445 numa_get_run_node_mask() returns a mask of CPUs on which the current
446 task is allowed to run.
447
448 numa_tonode_memory() put memory on a specific node. The constraints de‐
449 scribed for numa_interleave_memory() apply here too.
450
451 numa_tonodemask_memory() put memory on a specific set of nodes. The
452 constraints described for numa_interleave_memory() apply here too.
453
454 numa_setlocal_memory() locates memory on the current node. The con‐
455 straints described for numa_interleave_memory() apply here too.
456
457 numa_police_memory() locates memory with the current NUMA policy. The
458 constraints described for numa_interleave_memory() apply here too.
459
460 numa_distance() reports the distance in the machine topology between
461 two nodes. The factors are a multiple of 10. It returns 0 when the
462 distance cannot be determined. A node has distance 10 to itself. Re‐
463 porting the distance requires a Linux kernel version of 2.6.10 or
464 newer.
465
466 numa_set_bind_policy() specifies whether calls that bind memory to a
467 specific node should use the preferred policy or a strict policy. The
468 preferred policy allows the kernel to allocate memory on other nodes
469 when there isn't enough free on the target node. strict will fail the
470 allocation in that case. Setting the argument to specifies strict, 0
471 preferred. Note that specifying more than one node non strict may only
472 use the first node in some kernel versions.
473
474 numa_set_strict() sets a flag that says whether the functions allocat‐
475 ing on specific nodes should use use a strict policy. Strict means the
476 allocation will fail if the memory cannot be allocated on the target
477 node. Default operation is to fall back to other nodes. This doesn't
478 apply to interleave and default.
479
480 numa_get_interleave_node() is used by libnuma internally. It is proba‐
481 bly not useful for user applications. It uses the MPOL_F_NODE flag of
482 the get_mempolicy system call, which is not intended for application
483 use (its operation may change or be removed altogether in future kernel
484 versions). See get_mempolicy(2).
485
486 numa_pagesize() returns the number of bytes in page. This function is
487 simply a fast alternative to repeated calls to the getpagesize system
488 call. See getpagesize(2).
489
490 numa_sched_getaffinity() retrieves a bitmask of the cpus on which a
491 task may run. The task is specified by pid. Returns the return value
492 of the sched_getaffinity system call. See sched_getaffinity(2). The
493 bitmask must be at least the size of the kernel's cpu mask structure.
494 Use numa_allocate_cpumask() to allocate it. Test the bits in the mask
495 by calling numa_bitmask_isbitset().
496
497 numa_sched_setaffinity() sets a task's allowed cpu's to those cpu's
498 specified in mask. The task is specified by pid. Returns the return
499 value of the sched_setaffinity system call. See sched_setaffinity(2).
500 You may allocate the bitmask with numa_allocate_cpumask(). Or the bit‐
501 mask may be smaller than the kernel's cpu mask structure. For example,
502 call numa_bitmask_alloc() using a maximum number of cpus from
503 numa_num_configured_cpus(). Set the bits in the mask by calling
504 numa_bitmask_setbit().
505
506 numa_node_to_cpus() converts a node number to a bitmask of CPUs. The
507 user must pass a bitmask structure with a mask buffer long enough to
508 represent all possible cpu's. Use numa_allocate_cpumask() to create
509 it. If the bitmask is not long enough errno will be set to ERANGE and
510 -1 returned. On success 0 is returned.
511
512 numa_node_to_cpu_update() Mark cpus bitmask of all nodes stale, then
513 get the latest bitmask by calling numa_node_to_cpus() This allows to
514 update the libnuma state after a CPU hotplug event. The application is
515 in charge of detecting CPU hotplug events.
516
517 numa_node_of_cpu() returns the node that a cpu belongs to. If the user
518 supplies an invalid cpu errno will be set to EINVAL and -1 will be re‐
519 turned.
520
521 numa_allocate_cpumask () returns a bitmask of a size equal to the ker‐
522 nel's cpu mask (kernel type cpumask_t). In other words, large enough
523 to represent NR_CPUS cpus. This number of cpus can be gotten by call‐
524 ing numa_num_possible_cpus(). The bitmask is zero-filled.
525
526 numa_free_cpumask frees a cpumask previously allocate by numa_allo‐
527 cate_cpumask.
528
529 numa_allocate_nodemask() returns a bitmask of a size equal to the ker‐
530 nel's node mask (kernel type nodemask_t). In other words, large enough
531 to represent MAX_NUMNODES nodes. This number of nodes can be gotten by
532 calling numa_num_possible_nodes(). The bitmask is zero-filled.
533
534 numa_free_nodemask() frees a nodemask previous allocated by numa_allo‐
535 cate_nodemask().
536
537 numa_bitmask_alloc() allocates a bitmask structure and its associated
538 bit mask. The memory allocated for the bit mask contains enough words
539 (type unsigned long) to contain n bits. The bit mask is zero-filled.
540 The bitmask structure points to the bit mask and contains the n value.
541
542 numa_bitmask_clearall() sets all bits in the bit mask to 0. The bit‐
543 mask structure points to the bit mask and contains its size ( bmp
544 ->size). The value of bmp is always returned. Note that numa_bit‐
545 mask_alloc() creates a zero-filled bit mask.
546
547 numa_bitmask_clearbit() sets a specified bit in a bit mask to 0. Noth‐
548 ing is done if the n value is greater than the size of the bitmask (and
549 no error is returned). The value of bmp is always returned.
550
551 numa_bitmask_equal() returns 1 if two bitmasks are equal. It returns 0
552 if they are not equal. If the bitmask structures control bit masks of
553 different sizes, the "missing" trailing bits of the smaller bit mask
554 are considered to be 0.
555
556 numa_bitmask_free() deallocates the memory of both the bitmask struc‐
557 ture pointed to by bmp and the bit mask. It is an error to attempt to
558 free this bitmask twice.
559
560 numa_bitmask_isbitset() returns the value of a specified bit in a bit
561 mask. If the n value is greater than the size of the bit map, 0 is re‐
562 turned.
563
564 numa_bitmask_nbytes() returns the size (in bytes) of the bit mask con‐
565 trolled by bmp. The bit masks are always full words (type unsigned
566 long), and the returned size is the actual size of all those words.
567
568 numa_bitmask_setall() sets all bits in the bit mask to 1. The bitmask
569 structure points to the bit mask and contains its size ( bmp ->size).
570 The value of bmp is always returned.
571
572 numa_bitmask_setbit() sets a specified bit in a bit mask to 1. Nothing
573 is done if n is greater than the size of the bitmask (and no error is
574 returned). The value of bmp is always returned.
575
576 copy_bitmask_to_nodemask() copies the body (the bit map itself) of the
577 bitmask structure pointed to by bmp to the nodemask_t structure pointed
578 to by the nodemask pointer. If the two areas differ in size, the copy
579 is truncated to the size of the receiving field or zero-filled.
580
581 copy_nodemask_to_bitmask() copies the nodemask_t structure pointed to
582 by the nodemask pointer to the body (the bit map itself) of the bitmask
583 structure pointed to by the bmp pointer. If the two areas differ in
584 size, the copy is truncated to the size of the receiving field or zero-
585 filled.
586
587 copy_bitmask_to_bitmask() copies the body (the bit map itself) of the
588 bitmask structure pointed to by the bmpfrom pointer to the body of the
589 bitmask structure pointed to by the bmpto pointer. If the two areas
590 differ in size, the copy is truncated to the size of the receiving
591 field or zero-filled.
592
593 numa_bitmask_weight() returns a count of the bits that are set in the
594 body of the bitmask pointed to by the bmp argument.
595
596 numa_move_pages() moves a list of pages in the address space of the
597 currently executing or current process. It simply uses the move_pages
598 system call.
599 pid - ID of task. If not valid, use the current task.
600 count - Number of pages.
601 pages - List of pages to move.
602 nodes - List of nodes to which pages can be moved.
603 status - Field to which status is to be returned.
604 flags - MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
605 See move_pages(2).
606
607 numa_migrate_pages() simply uses the migrate_pages system call to cause
608 the pages of the calling task, or a specified task, to be migated from
609 one set of nodes to another. See migrate_pages(2). The bit masks rep‐
610 resenting the nodes should be allocated with numa_allocate_nodemask() ,
611 or with numa_bitmask_alloc() using an n value returned from
612 numa_num_possible_nodes(). A task's current node set can be gotten by
613 calling numa_get_membind(). Bits in the tonodes mask can be set by
614 calls to numa_bitmask_setbit().
615
616 numa_error() is a libnuma internal function that can be overridden by
617 the user program. This function is called with a char * argument when
618 a libnuma function fails. Overriding the library internal definition
619 makes it possible to specify a different error handling strategy when a
620 libnuma function fails. It does not affect numa_available(). The
621 numa_error() function defined in libnuma prints an error on stderr and
622 terminates the program if numa_exit_on_error is set to a non-zero
623 value. The default value of numa_exit_on_error is zero.
624
625 numa_warn() is a libnuma internal function that can be also overridden
626 by the user program. It is called to warn the user when a libnuma
627 function encounters a non-fatal error. The default implementation
628 prints a warning to stderr. The first argument is a unique number
629 identifying each warning. After that there is a printf(3)-style format
630 string and a variable number of arguments. numa_warn exits the program
631 when numa_exit_on_warn is set to a non-zero value. The default value
632 of numa_exit_on_warn is zero.
633
634
636 Binaries that were compiled for libnuma version 1 need not be re-com‐
637 piled to run with libnuma version 2.
638 Source codes written for libnuma version 1 may be re-compiled without
639 change with version 2 installed. To do so, in the code's Makefile add
640 this option to CFLAGS: -DNUMA_VERSION1_COMPATIBILITY
641
642
644 numa_set_bind_policy and numa_exit_on_error are process global. The
645 other calls are thread safe.
646
647
649 Copyright 2002, 2004, 2007, 2008 Andi Kleen, SuSE Labs. libnuma is un‐
650 der the GNU Lesser General Public License, v2.1.
651
652
654 get_mempolicy(2), set_mempolicy(2), getpagesize(2), mbind(2), mmap(2),
655 shmat(2), numactl(8), sched_getaffinity(2) sched_setaffinity(2)
656 move_pages(2) migrate_pages(2)
657
658
659
660SuSE Labs December 2007 NUMA(3)