1NUMA(3)                    Linux Programmer's Manual                   NUMA(3)
2
3
4

NAME

6       numa - NUMA policy library
7

SYNOPSIS

9       #include <numa.h>
10
11       cc ... -lnuma
12
13       int numa_available(void);
14
15       int numa_max_possible_node(void);
16       int numa_num_possible_nodes();
17
18       int numa_max_node(void);
19       int numa_num_configured_nodes();
20       struct bitmask *numa_get_mems_allowed(void);
21
22       int numa_num_configured_cpus(void);
23       struct bitmask *numa_all_nodes_ptr;
24       struct bitmask *numa_no_nodes_ptr;
25       struct bitmask *numa_all_cpus_ptr;
26
27       int numa_num_task_cpus();
28       int numa_num_task_nodes();
29
30       int numa_parse_bitmap(char *line , struct bitmask *mask);
31       struct bitmask *numa_parse_nodestring(const char *string);
32       struct bitmask *numa_parse_nodestring_all(const char *string);
33       struct bitmask *numa_parse_cpustring(const char *string);
34       struct bitmask *numa_parse_cpustring_all(const char *string);
35
36       long numa_node_size(int node, long *freep);
37       long long numa_node_size64(int node, long long *freep);
38
39       int numa_preferred(void);
40       void numa_set_preferred(int node);
41       int numa_get_interleave_node(void);
42       struct bitmask *numa_get_interleave_mask(void);
43       void numa_set_interleave_mask(struct bitmask *nodemask);
44       void  numa_interleave_memory(void  *start,  size_t size, struct bitmask
45       *nodemask);
46       void numa_bind(struct bitmask *nodemask);
47       void numa_set_localalloc(void);
48       void numa_set_membind(struct bitmask *nodemask);
49       struct bitmask *numa_get_membind(void);
50
51       void *numa_alloc_onnode(size_t size, int node);
52       void *numa_alloc_local(size_t size);
53       void *numa_alloc_interleaved(size_t size);
54       void *numa_alloc_interleaved_subset(size_t size,  struct bitmask *node‐
55       mask); void *numa_alloc(size_t size);
56       void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
57       void numa_free(void *start, size_t size);
58
59       int numa_run_on_node(int node);
60       int numa_run_on_node_mask(struct bitmask *nodemask);
61       int numa_run_on_node_mask_all(struct bitmask *nodemask);
62       struct bitmask *numa_get_run_node_mask(void);
63
64       void numa_tonode_memory(void *start, size_t size, int node);
65       void  numa_tonodemask_memory(void  *start,  size_t size, struct bitmask
66       *nodemask);
67       void numa_setlocal_memory(void *start, size_t size);
68       void numa_police_memory(void *start, size_t size);
69       void numa_set_bind_policy(int strict);
70       void numa_set_strict(int strict);
71
72       int numa_distance(int node1, int node2);
73
74       int numa_sched_getaffinity(pid_t pid, struct bitmask *mask);
75       int numa_sched_setaffinity(pid_t pid, struct bitmask *mask);
76       int numa_node_to_cpus(int node, struct bitmask *mask);
77       void numa_node_to_cpu_update();
78       int numa_node_of_cpu(int cpu);
79
80       struct bitmask *numa_allocate_cpumask();
81
82       void numa_free_cpumask();
83       struct bitmask *numa_allocate_nodemask();
84
85       void numa_free_nodemask();
86       struct bitmask *numa_bitmask_alloc(unsigned int n);
87       struct bitmask *numa_bitmask_clearall(struct bitmask *bmp);
88       struct bitmask *numa_bitmask_clearbit(struct bitmask *bmp, unsigned int
89       n);
90       int numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask
91       *bmp2);
92       void numa_bitmask_free(struct bitmask *bmp);
93       int numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
94       unsigned int numa_bitmask_nbytes(struct bitmask *bmp);
95       struct bitmask *numa_bitmask_setall(struct bitmask *bmp);
96       struct bitmask *numa_bitmask_setbit(struct bitmask *bmp,  unsigned  int
97       n);
98       void  copy_bitmask_to_nodemask(struct  bitmask  *bmp, nodemask_t *node‐
99       mask)
100       void  copy_nodemask_to_bitmask(nodemask_t  *nodemask,  struct   bitmask
101       *bmp)
102       void  copy_bitmask_to_bitmask(struct  bitmask  *bmpfrom, struct bitmask
103       *bmpto)
104       unsigned int numa_bitmask_weight(const struct bitmask *bmp )
105
106       int numa_move_pages(int pid, unsigned long count, void  **pages,  const
107       int *nodes, int *status, int flags);
108       int  numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bit‐
109       mask *tonodes);
110
111       void numa_error(char *where);
112
113       extern int numa_exit_on_error;
114       extern int numa_exit_on_warn;
115       void numa_warn(int number, char *where, ...);
116
117

DESCRIPTION

119       The libnuma library offers a simple programming interface to  the  NUMA
120       (Non  Uniform Memory Access) policy supported by the Linux kernel. On a
121       NUMA architecture some memory areas have different latency or bandwidth
122       than others.
123
124       Available  policies  are  page interleaving (i.e., allocate in a round-
125       robin fashion from all, or a subset, of the nodes on the system),  pre‐
126       ferred  node  allocation  (i.e.,  preferably  allocate  on a particular
127       node), local allocation (i.e., allocate on the node on which  the  task
128       is  currently  executing),  or allocation only on specific nodes (i.e.,
129       allocate on some subset of the available nodes).  It is  also  possible
130       to bind tasks to specific nodes.
131
132       Numa memory allocation policy may be specified as a per-task attribute,
133       that is inherited by children tasks and processes, or as  an  attribute
134       of  a  range  of  process  virtual address space.  Numa memory policies
135       specified for a range of virtual address space are shared by all  tasks
136       in  the process.  Furthermore, memory policies specified for a range of
137       a shared memory attached using shmat(2) or mmap(2) from shmfs/hugetlbfs
138       are  shared  by all processes that attach to that region.  Memory poli‐
139       cies for shared disk backed file mappings are currently ignored.
140
141       The default memory allocation policy for tasks and all memory range  is
142       local  allocation.   This assumes that no ancestor has installed a non-
143       default policy.
144
145       For setting a specific policy globally for all memory allocations in  a
146       process  and its children it is easiest to start it with the numactl(8)
147       utility. For more finegrained policy inside an application this library
148       can be used.
149
150       All  numa  memory  allocation  policy  only takes effect when a page is
151       actually faulted into the address space of a process by  accessing  it.
152       The numa_alloc_* functions take care of this automatically.
153
154       A  node  is  defined  as an area where all memory has the same speed as
155       seen from a particular CPU.  A node can contain multiple CPUs.   Caches
156       are ignored for this definition.
157
158       Most  functions in this library are only concerned about numa nodes and
159       their  memory.   The  exceptions  to  this  are:   numa_node_to_cpus(),
160       numa_node_to_cpu_update(),       numa_node_of_cpu(),       numa_bind(),
161       numa_run_on_node(),                            numa_run_on_node_mask(),
162       numa_run_on_node_mask_all(), and numa_get_run_node_mask().  These func‐
163       tions deal with the CPUs associated with numa nodes.  See the  descrip‐
164       tions below for more information.
165
166       Some  of  these functions accept or return a pointer to struct bitmask.
167       A struct bitmask controls a bit map of arbitrary  length  containing  a
168       bit    representation    of    nodes.     The    predefined    variable
169       numa_all_nodes_ptr points to a bit mask that has  all  available  nodes
170       set; numa_no_nodes_ptr points to the empty set.
171
172       Before  any  other  calls  in this library can be used numa_available()
173       must be called. If it returns -1, all other functions in  this  library
174       are undefined.
175
176       numa_max_possible_node()  returns  the  number  of the highest possible
177       node in a system.  In other words, the size of a kernel type nodemask_t
178       (in  bits) minus 1.  This number can be gotten by calling numa_num_pos‐
179       sible_nodes() and subtracting 1.
180
181       numa_num_possible_nodes() returns the size of kernel's node mask  (ker‐
182       nel  type  nodemask_t).   In other words, large enough to represent the
183       maximum number of nodes that the kernel can handle. This will match the
184       kernel's    MAX_NUMNODES   value.    This   count   is   derived   from
185       /proc/self/status, field Mems_allowed.
186
187       numa_max_node() returns the highest node number available on  the  cur‐
188       rent  system.   (See  the  node numbers in /sys/devices/system/node/ ).
189       Also see numa_num_configured_nodes().
190
191       numa_num_configured_nodes() returns the number of memory nodes  in  the
192       system. This count includes any nodes that are currently disabled. This
193       count is derived from the  node  numbers  in  /sys/devices/system/node.
194       (Depends on the kernel being configured with /sys (CONFIG_SYSFS)).
195
196       numa_get_mems_allowed()  returns  the  mask  of  nodes  from  which the
197       process is allowed to allocate memory in it's current  cpuset  context.
198       Any nodes that are not included in the returned bitmask will be ignored
199       in any of the following libnuma memory policy calls.
200
201       numa_num_configured_cpus() returns the number of cpus  in  the  system.
202       This count includes any cpus that are currently disabled. This count is
203       derived from the cpu numbers in /sys/devices/system/cpu. If the  kernel
204       is configured without /sys (CONFIG_SYSFS=n) then it falls back to using
205       the number of online cpus.
206
207       numa_all_nodes_ptr points to a bitmask that is allocated by the library
208       with bits representing all nodes on which the calling task may allocate
209       memory.  This set may be up to all nodes on the system, or  up  to  the
210       nodes  in  the  current  cpuset.  The bitmask is allocated by a call to
211       numa_allocate_nodemask() using size numa_max_possible_node().  The  set
212       of   nodes   to   record   is  derived  from  /proc/self/status,  field
213       "Mems_allowed".  The user should not alter this bitmask.
214
215       numa_no_nodes_ptr points to a bitmask that is allocated by the  library
216       and  left all zeroes.  The bitmask is allocated by a call to numa_allo‐
217       cate_nodemask() using size numa_max_possible_node().  The  user  should
218       not alter this bitmask.
219
220       numa_all_cpus_ptr  points to a bitmask that is allocated by the library
221       with bits representing all cpus on which the calling task may  execute.
222       This  set may be up to all cpus on the system, or up to the cpus in the
223       current cpuset.  The bitmask is  allocated  by  a  call  to  numa_allo‐
224       cate_cpumask() using size numa_num_possible_cpus().  The set of cpus to
225       record is derived from /proc/self/status,  field  "Cpus_allowed".   The
226       user should not alter this bitmask.
227
228       numa_num_task_cpus()  returns  the number of cpus that the calling task
229       is allowed to use.  This count is derived from the map  /proc/self/sta‐
230       tus, field "Cpus_allowed". Also see the bitmask numa_all_cpus_ptr.
231
232       numa_num_task_nodes()  returns the number of nodes on which the calling
233       task is allowed to allocate memory.  This count is derived from the map
234       /proc/self/status,   field   "Mems_allowed".    Also  see  the  bitmask
235       numa_all_nodes_ptr.
236
237       numa_parse_bitmap() parses line , which is a character string  such  as
238       found  in  /sys/devices/system/node/nodeN/cpumap  into a bitmask struc‐
239       ture.  The string contains the hexadecimal representation of a bit map.
240       The  bitmask may be allocated with numa_allocate_cpumask().  Returns  0
241       on success.  Returns -1 on failure.  This function is probably of  lit‐
242       tle use to a user application, but it is used by libnuma internally.
243
244       numa_parse_nodestring()  parses a character string list of nodes into a
245       bit mask.  The bit mask is allocated by numa_allocate_nodemask().   The
246       string  is  a  comma-separated  list of node numbers or node ranges.  A
247       leading ! can be used to indicate "not" this list (in other words,  all
248       nodes  except  this list), and a leading + can be used to indicate that
249       the node numbers in the list are relative to the  task's  cpuset.   The
250       string  can  be  "all"  to specify all ( numa_num_task_nodes() ) nodes.
251       Node  numbers  are  limited  by  the  number  in   the   system.    See
252       numa_max_node() and numa_num_configured_nodes().
253       Examples:  1-5,7,10   !4-5   +0-3
254       If  the  string  is of 0 length, bitmask numa_no_nodes_ptr is returned.
255       Returns 0 if the string is invalid.
256
257       numa_parse_nodestring_all() is similar to numa_parse_nodestring  ,  but
258       can parse all possible nodes, not only current nodeset.
259
260       numa_parse_cpustring()  parses  a  character string list of cpus into a
261       bit mask.  The bit mask is allocated by  numa_allocate_cpumask().   The
262       string is a comma-separated list of cpu numbers or cpu ranges.  A lead‐
263       ing ! can be used to indicate "not" this list (in other words, all cpus
264       except this list), and a leading + can be used to indicate that the cpu
265       numbers in the list are relative to the task's cpuset.  The string  can
266       be "all" to specify all ( numa_num_task_cpus() ) cpus.  Cpu numbers are
267       limited by the number in  the  system.   See  numa_num_task_cpus()  and
268       numa_num_configured_cpus().
269       Examples:  1-5,7,10   !4-5   +0-3
270       Returns 0 if the string is invalid.
271
272       numa_parse_cpustring_all() is similar to numa_parse_cpustring , but can
273       parse all possible cpus, not only current cpuset.
274
275       numa_node_size() returns the memory size of a  node.  If  the  argument
276       freep  is  not NULL, it used to return the amount of free memory on the
277       node.  On error it returns -1.
278
279       numa_node_size64() works the same as numa_node_size()  except  that  it
280       returns  values as long long instead of long.  This is useful on 32-bit
281       architectures with large nodes.
282
283       numa_preferred() returns the preferred node of the current task.   This
284       is  the  node  on  which the kernel preferably allocates memory, unless
285       some other policy overrides this.
286
287       numa_set_preferred() sets the preferred node for the  current  task  to
288       node.   The  system  will attempt to allocate memory from the preferred
289       node, but will fall back to other nodes if no memory  is  available  on
290       the  the preferred node.  Passing a node of -1 argument specifies local
291       allocation and is equivalent to calling numa_set_localalloc().
292
293       numa_get_interleave_mask() returns the current interleave mask  if  the
294       task's  memory  allocation policy is page interleaved.  Otherwise, this
295       function returns an empty mask.
296
297       numa_set_interleave_mask() sets the memory interleave mask for the cur‐
298       rent task to nodemask.  All new memory allocations are page interleaved
299       over all nodes in the interleave mask. Interleaving can be  turned  off
300       again  by passing an empty mask (numa_no_nodes).  The page interleaving
301       only occurs on the actual page fault that puts a new page into the cur‐
302       rent  address  space. It is also only a hint: the kernel will fall back
303       to other nodes if no memory is available on the interleave target.
304
305       numa_interleave_memory() interleaves size bytes of memory page by  page
306       from  start  on nodes specified in nodemask.  The size argument will be
307       rounded up to a multiple of the system page size.  If nodemask contains
308       nodes  that are externally denied to this process, this call will fail.
309       This is a lower level function to  interleave  allocated  but  not  yet
310       faulted  in  memory.  Not  yet faulted in means the memory is allocated
311       using mmap(2) or shmat(2), but has not been  accessed  by  the  current
312       process  yet.  The memory is page interleaved to all nodes specified in
313       nodemask.  Normally numa_alloc_interleaved() should be used for private
314       memory  instead,  but  this  function is useful to handle shared memory
315       areas. To be useful the memory area  should  be  several  megabytes  at
316       least   (or   tens   of   megabytes   of  hugetlbfs  mappings)  If  the
317       numa_set_strict()  flag  is  true  then  the  operation  will  cause  a
318       numa_error  if there were already pages in the mapping that do not fol‐
319       low the policy.
320
321       numa_bind() binds the current task and its children to the nodes speci‐
322       fied  in  nodemask.   They  will  only run on the CPUs of the specified
323       nodes and only be able to allocate memory from them.  This function  is
324       equivalent   to  calling  numa_run_on_node_mask(nodemask)  followed  by
325       numa_set_membind(nodemask).  If tasks should  be  bound  to  individual
326       CPUs   inside   nodes   consider   using   numa_node_to_cpus   and  the
327       sched_setaffinity(2) syscall.
328
329       numa_set_localalloc() sets the memory allocation policy for the calling
330       task  to local allocation.  In this mode, the preferred node for memory
331       allocation is effectively the node where the task is executing  at  the
332       time of a page allocation.
333
334       numa_set_membind() sets the memory allocation mask.  The task will only
335       allocate memory from the nodes set in nodemask.  Passing an empty node‐
336       mask  or  a  nodemask  that contains nodes other than those in the mask
337       returned by numa_get_mems_allowed() will result in an error.
338
339       numa_get_membind() returns the mask of nodes from which memory can cur‐
340       rently  be allocated.  If the returned mask is equal to numa_all_nodes,
341       then memory allocation is allowed from all nodes.
342
343       numa_alloc_onnode() allocates memory on  a  specific  node.   The  size
344       argument  will be rounded up to a multiple of the system page size.  if
345       the specified node is externally denied to this process, this call will
346       fail.  This function is relatively slow compared to the malloc(3), fam‐
347       ily of functions.  The memory  must  be  freed  with  numa_free().   On
348       errors NULL is returned.
349
350       numa_alloc_local()  allocates  size  bytes of memory on the local node.
351       The size argument will be rounded up to a multiple of the  system  page
352       size.   This function is relatively slow compared to the malloc(3) fam‐
353       ily of functions.  The memory  must  be  freed  with  numa_free().   On
354       errors NULL is returned.
355
356       numa_alloc_interleaved()  allocates  size  bytes  of memory page inter‐
357       leaved on all nodes. This function is relatively slow and  should  only
358       be  used for large areas consisting of multiple pages. The interleaving
359       works at page level and will only show  an  effect  when  the  area  is
360       large.  The allocated memory must be freed with numa_free().  On error,
361       NULL is returned.
362
363       numa_alloc_interleaved_subset() attempts to allocate size bytes of mem‐
364       ory  page  interleaved on all nodes.  The size argument will be rounded
365       up to a multiple of the system page size.  The nodes on which a process
366       is  allowed  to allocate memory may be constrained externally.  If this
367       is the case, this function may fail.  This function is relatively  slow
368       compare  to  malloc(3), family of functions and should only be used for
369       large areas consisting of multiple pages.  The  interleaving  works  at
370       page  level  and  will only show an effect when the area is large.  The
371       allocated memory must be freed with numa_free().   On  error,  NULL  is
372       returned.
373
374       numa_alloc()  allocates size bytes of memory with the current NUMA pol‐
375       icy.  The size argument will be rounded up to a multiple of the  system
376       page  size.   This function is relatively slow compare to the malloc(3)
377       family of functions.  The memory must be freed  with  numa_free().   On
378       errors NULL is returned.
379
380       numa_realloc()  changes  the  size  of  the  memory  area pointed to by
381       old_addr from old_size to new_size.  The  memory  area  pointed  to  by
382       old_addr  must  have  been  allocated with one of the numa_alloc* func‐
383       tions.  The new_size will be rounded up to a  multiple  of  the  system
384       page  size.  The  contents  of the memory area will be unchanged to the
385       minimum of the old and new sizes; newly allocated memory will be unini‐
386       tialized.  The  memory  policy  (and node bindings) associated with the
387       original memory area will be preserved in the resized area.  For  exam‐
388       ple,  if  the initial area was allocated with a call to numa_alloc_onn‐
389       ode(), then the new pages (if the area is enlarged) will  be  allocated
390       on  the same node.  However, if no memory policy was set for the origi‐
391       nal area, then numa_realloc() cannot guarantee that the new pages  will
392       be  allocated  on the same node. On success, the address of the resized
393       area is returned (which might be different from  that  of  the  initial
394       area),  otherwise  NULL  is  returned  and errno is set to indicate the
395       error. The pointer returned by numa_realloc() is suitable  for  passing
396       to numa_free().
397
398
399       numa_free()  frees size bytes of memory starting at start, allocated by
400       the numa_alloc_* functions above.  The size argument will be rounded up
401       to a multiple of the system page size.
402
403       numa_run_on_node() runs the current task and its children on a specific
404       node. They will not migrate to CPUs  of  other  nodes  until  the  node
405       affinity  is reset with a new call to numa_run_on_node_mask().  Passing
406       -1 permits the kernel to schedule on all nodes again.  On success, 0 is
407       returned;  on  error  -1  is returned, and errno is set to indicate the
408       error.
409
410       numa_run_on_node_mask() runs the current task and its children only  on
411       nodes  specified  in  nodemask.  They will not migrate to CPUs of other
412       nodes  until  the  node  affinity  is  reset  with  a   new   call   to
413       numa_run_on_node_mask()  or numa_run_on_node().  Passing numa_all_nodes
414       permits the kernel to schedule on all nodes again.  On  success,  0  is
415       returned;  on  error  -1  is returned, and errno is set to indicate the
416       error.
417
418       numa_run_on_node_mask_all() runs the current task and its children only
419       on  nodes  specified in nodemask like numa_run_on_node_mask but without
420       any cpuset awareness.
421
422       numa_get_run_node_mask() returns a mask of CPUs on  which  the  current
423       task is allowed to run.
424
425       numa_tonode_memory()  put  memory  on  a specific node. The constraints
426       described for numa_interleave_memory() apply here too.
427
428       numa_tonodemask_memory() put memory on a specific  set  of  nodes.  The
429       constraints described for numa_interleave_memory() apply here too.
430
431       numa_setlocal_memory()  locates  memory  on  the current node. The con‐
432       straints described for numa_interleave_memory() apply here too.
433
434       numa_police_memory() locates memory with the current NUMA  policy.  The
435       constraints described for numa_interleave_memory() apply here too.
436
437       numa_distance()  reports  the  distance in the machine topology between
438       two nodes.  The factors are a multiple of 10. It  returns  0  when  the
439       distance  cannot  be  determined.  A  node  has  distance 10 to itself.
440       Reporting the distance requires a Linux kernel  version  of  2.6.10  or
441       newer.
442
443       numa_set_bind_policy()  specifies  whether  calls that bind memory to a
444       specific node should use the preferred policy or a strict policy.   The
445       preferred  policy  allows  the kernel to allocate memory on other nodes
446       when there isn't enough free on the target node. strict will  fail  the
447       allocation  in  that case.  Setting the argument to specifies strict, 0
448       preferred.  Note that specifying more than one node non strict may only
449       use the first node in some kernel versions.
450
451       numa_set_strict()  sets a flag that says whether the functions allocat‐
452       ing on specific nodes should use use a strict policy. Strict means  the
453       allocation  will  fail  if the memory cannot be allocated on the target
454       node.  Default operation is to fall back to other nodes.  This  doesn't
455       apply to interleave and default.
456
457       numa_get_interleave_node()  is used by libnuma internally. It is proba‐
458       bly not useful for user applications.  It uses the MPOL_F_NODE flag  of
459       the  get_mempolicy  system  call, which is not intended for application
460       use (its operation may change or be removed altogether in future kernel
461       versions). See get_mempolicy(2).
462
463       numa_pagesize()  returns  the number of bytes in page. This function is
464       simply a fast alternative to repeated calls to the  getpagesize  system
465       call.  See getpagesize(2).
466
467       numa_sched_getaffinity()  retrieves  a  bitmask  of the cpus on which a
468       task may run.  The task is specified by pid.  Returns the return  value
469       of  the  sched_getaffinity system call.  See sched_getaffinity(2).  The
470       bitmask must be at least the size of the kernel's cpu  mask  structure.
471       Use  numa_allocate_cpumask() to allocate it.  Test the bits in the mask
472       by calling numa_bitmask_isbitset().
473
474       numa_sched_setaffinity() sets a task's allowed  cpu's  to  those  cpu's
475       specified  in  mask.  The task is specified by pid.  Returns the return
476       value of the sched_setaffinity system call.  See  sched_setaffinity(2).
477       You may allocate the bitmask with numa_allocate_cpumask().  Or the bit‐
478       mask may be smaller than the kernel's cpu mask structure. For  example,
479       call   numa_bitmask_alloc()   using  a  maximum  number  of  cpus  from
480       numa_num_configured_cpus().  Set  the  bits  in  the  mask  by  calling
481       numa_bitmask_setbit().
482
483       numa_node_to_cpus()  converts  a  node number to a bitmask of CPUs. The
484       user must pass a bitmask structure with a mask buffer  long  enough  to
485       represent  all  possible  cpu's.  Use numa_allocate_cpumask() to create
486       it.  If the bitmask is not long enough errno will be set to ERANGE  and
487       -1 returned. On success 0 is returned.
488
489       numa_node_to_cpu_update()  Mark  cpus  bitmask of all nodes stale, then
490       get the latest bitmask by calling numa_node_to_cpus()  This  allows  to
491       update  the libnuma state after a CPU hotplug event. The application is
492       in charge of detecting CPU hotplug events.
493
494       numa_node_of_cpu() returns the node that a cpu belongs to. If the  user
495       supplies  an  invalid  cpu  errno  will be set to EINVAL and -1 will be
496       returned.
497
498       numa_allocate_cpumask () returns a bitmask of a size equal to the  ker‐
499       nel's  cpu  mask (kernel type cpumask_t).  In other words, large enough
500       to represent NR_CPUS cpus.  This number of cpus can be gotten by  call‐
501       ing numa_num_possible_cpus().  The bitmask is zero-filled.
502
503       numa_free_cpumask  frees  a  cpumask  previously allocate by numa_allo‐
504       cate_cpumask.
505
506       numa_allocate_nodemask() returns a bitmask of a size equal to the  ker‐
507       nel's node mask (kernel type nodemask_t).  In other words, large enough
508       to represent MAX_NUMNODES nodes.  This number of nodes can be gotten by
509       calling numa_num_possible_nodes().  The bitmask is zero-filled.
510
511       numa_free_nodemask()  frees a nodemask previous allocated by numa_allo‐
512       cate_nodemask().
513
514       numa_bitmask_alloc() allocates a bitmask structure and  its  associated
515       bit  mask.  The memory allocated for the bit mask contains enough words
516       (type unsigned long) to contain n bits.  The bit mask  is  zero-filled.
517       The bitmask structure points to the bit mask and contains the n value.
518
519       numa_bitmask_clearall()  sets  all bits in the bit mask to 0.  The bit‐
520       mask structure points to the bit mask  and  contains  its  size  (  bmp
521       ->size).   The  value  of  bmp is always returned.  Note that numa_bit‐
522       mask_alloc() creates a zero-filled bit mask.
523
524       numa_bitmask_clearbit() sets a specified bit in a bit mask to 0.  Noth‐
525       ing is done if the n value is greater than the size of the bitmask (and
526       no error is returned). The value of bmp is always returned.
527
528       numa_bitmask_equal() returns 1 if two bitmasks are equal.  It returns 0
529       if  they are not equal.  If the bitmask structures control bit masks of
530       different sizes, the "missing" trailing bits of the  smaller  bit  mask
531       are considered to be 0.
532
533       numa_bitmask_free()  deallocates  the memory of both the bitmask struc‐
534       ture pointed to by bmp and the bit mask.  It is an error to attempt  to
535       free this bitmask twice.
536
537       numa_bitmask_isbitset()  returns  the value of a specified bit in a bit
538       mask.  If the n value is greater than the size of the  bit  map,  0  is
539       returned.
540
541       numa_bitmask_nbytes()  returns the size (in bytes) of the bit mask con‐
542       trolled by bmp.  The bit masks are always  full  words  (type  unsigned
543       long), and the returned size is the actual size of all those words.
544
545       numa_bitmask_setall()  sets all bits in the bit mask to 1.  The bitmask
546       structure points to the bit mask and contains its size (  bmp  ->size).
547       The value of bmp is always returned.
548
549       numa_bitmask_setbit() sets a specified bit in a bit mask to 1.  Nothing
550       is done if n is greater than the size of the bitmask (and no  error  is
551       returned). The value of bmp is always returned.
552
553       copy_bitmask_to_nodemask()  copies the body (the bit map itself) of the
554       bitmask structure pointed to by bmp to the nodemask_t structure pointed
555       to  by  the nodemask pointer. If the two areas differ in size, the copy
556       is truncated to the size of the receiving field or zero-filled.
557
558       copy_nodemask_to_bitmask() copies the nodemask_t structure  pointed  to
559       by the nodemask pointer to the body (the bit map itself) of the bitmask
560       structure pointed to by the bmp pointer. If the  two  areas  differ  in
561       size, the copy is truncated to the size of the receiving field or zero-
562       filled.
563
564       copy_bitmask_to_bitmask() copies the body (the bit map itself)  of  the
565       bitmask  structure pointed to by the bmpfrom pointer to the body of the
566       bitmask structure pointed to by the bmpto pointer.  If  the  two  areas
567       differ  in  size,  the  copy  is truncated to the size of the receiving
568       field or zero-filled.
569
570       numa_bitmask_weight() returns a count of the bits that are set  in  the
571       body of the bitmask pointed to by the bmp argument.
572
573       numa_move_pages()  moves  a  list  of pages in the address space of the
574       currently executing or current process.  It simply uses the  move_pages
575       system call.
576       pid - ID of task.  If not valid, use the current task.
577       count - Number of pages.
578       pages - List of pages to move.
579       nodes - List of nodes to which pages can be moved.
580       status - Field to which status is to be returned.
581       flags - MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
582       See move_pages(2).
583
584       numa_migrate_pages() simply uses the migrate_pages system call to cause
585       the pages of the calling task, or a specified task, to be migated  from
586       one set of nodes to another.  See migrate_pages(2).  The bit masks rep‐
587       resenting the nodes should be allocated with numa_allocate_nodemask() ,
588       or   with   numa_bitmask_alloc()   using   an  n  value  returned  from
589       numa_num_possible_nodes().  A task's current node set can be gotten  by
590       calling  numa_get_membind().   Bits  in  the tonodes mask can be set by
591       calls to numa_bitmask_setbit().
592
593       numa_error() is a libnuma internal function that can be  overridden  by
594       the  user program.  This function is called with a char * argument when
595       a libnuma function fails.  Overriding the library  internal  definition
596       makes it possible to specify a different error handling strategy when a
597       libnuma function fails.  It  does  not  affect  numa_available().   The
598       numa_error()  function defined in libnuma prints an error on stderr and
599       terminates the program if  numa_exit_on_error  is  set  to  a  non-zero
600       value.  The default value of numa_exit_on_error is zero.
601
602       numa_warn()  is a libnuma internal function that can be also overridden
603       by the user program.  It is called to warn  the  user  when  a  libnuma
604       function  encounters  a  non-fatal  error.   The default implementation
605       prints a warning to stderr.  The first  argument  is  a  unique  number
606       identifying  each warning. After that there is a printf(3)-style format
607       string and a variable number of arguments.  numa_warn exits the program
608       when  numa_exit_on_warn  is set to a non-zero value.  The default value
609       of numa_exit_on_warn is zero.
610
611

Compatibility with libnuma version 1

613       Binaries that were compiled for libnuma version 1 need not  be  re-com‐
614       piled to run with libnuma version 2.
615       Source  codes  written for libnuma version 1 may be re-compiled without
616       change with version 2 installed. To do so, in the code's  Makefile  add
617       this option to CFLAGS:  -DNUMA_VERSION1_COMPATIBILITY
618
619

THREAD SAFETY

621       numa_set_bind_policy  and  numa_exit_on_error  are  process global. The
622       other calls are thread safe.
623
624
626       Copyright 2002, 2004, 2007, 2008 Andi Kleen,  SuSE  Labs.   libnuma  is
627       under the GNU Lesser General Public License, v2.1.
628
629

SEE ALSO

631       get_mempolicy(2),  set_mempolicy(2), getpagesize(2), mbind(2), mmap(2),
632       shmat(2),   numactl(8),    sched_getaffinity(2)    sched_setaffinity(2)
633       move_pages(2) migrate_pages(2)
634
635
636
637SuSE Labs                        December 2007                         NUMA(3)
Impressum