1NUMA(3)                    Linux Programmer's Manual                   NUMA(3)
2
3
4

NAME

6       numa - NUMA policy library
7

SYNOPSIS

9       #include <numa.h>
10
11       cc ... -lnuma
12
13       int numa_available(void);
14
15       int numa_max_possible_node(void);
16       int numa_num_possible_nodes();
17
18       int numa_max_node(void);
19       int numa_num_configured_nodes();
20       struct bitmask *numa_get_mems_allowed(void);
21
22       int numa_num_configured_cpus(void);
23       struct bitmask *numa_all_nodes_ptr;
24       struct bitmask *numa_no_nodes_ptr;
25       struct bitmask *numa_all_cpus_ptr;
26
27       int numa_num_task_cpus();
28       int numa_num_task_nodes();
29
30       int numa_parse_bitmap(char *line , struct bitmask *mask);
31       struct bitmask *numa_parse_nodestring(const char *string);
32       struct bitmask *numa_parse_nodestring_all(const char *string);
33       struct bitmask *numa_parse_cpustring(const char *string);
34       struct bitmask *numa_parse_cpustring_all(const char *string);
35
36       long long numa_node_size(int node, long long*freep);
37       long long numa_node_size64(int node, long long *freep);
38
39       int numa_preferred(void);
40       int numa_has_preferred_many(void);
41       struct bitmask *numa_preferred_many(void);
42       void numa_set_preferred(int node);
43       void numa_set_preferred_many(struct bitmask *nodemask);
44       int numa_get_interleave_node(void);
45       struct bitmask *numa_get_interleave_mask(void);
46       void numa_set_interleave_mask(struct bitmask *nodemask);
47       void  numa_interleave_memory(void  *start,  size_t size, struct bitmask
48       *nodemask);
49       void numa_bind(struct bitmask *nodemask);
50       void numa_set_localalloc(void);
51       void numa_set_membind(struct bitmask *nodemask);
52       void numa_set_membind_balancing(struct bitmask *nodemask);
53       struct bitmask *numa_get_membind(void);
54
55       void *numa_alloc_onnode(size_t size, int node);
56       void *numa_alloc_local(size_t size);
57       void *numa_alloc_interleaved(size_t size);
58       void *numa_alloc_interleaved_subset(size_t size,  struct bitmask *node‐
59       mask); void *numa_alloc(size_t size);
60       void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
61       void numa_free(void *start, size_t size);
62
63       int numa_run_on_node(int node);
64       int numa_run_on_node_mask(struct bitmask *nodemask);
65       int numa_run_on_node_mask_all(struct bitmask *nodemask);
66       struct bitmask *numa_get_run_node_mask(void);
67
68       void numa_tonode_memory(void *start, size_t size, int node);
69       void  numa_tonodemask_memory(void  *start,  size_t size, struct bitmask
70       *nodemask);
71       void numa_setlocal_memory(void *start, size_t size);
72       void numa_police_memory(void *start, size_t size);
73       void numa_set_bind_policy(int strict);
74       void numa_set_strict(int strict);
75
76       int numa_distance(int node1, int node2);
77
78       int numa_sched_getaffinity(pid_t pid, struct bitmask *mask);
79       int numa_sched_setaffinity(pid_t pid, struct bitmask *mask);
80       int numa_node_to_cpus(int node, struct bitmask *mask);
81       void numa_node_to_cpu_update();
82       int numa_node_of_cpu(int cpu);
83
84       struct bitmask *numa_allocate_cpumask();
85
86       void numa_free_cpumask();
87       struct bitmask *numa_allocate_nodemask();
88
89       void numa_free_nodemask();
90       struct bitmask *numa_bitmask_alloc(unsigned int n);
91       struct bitmask *numa_bitmask_clearall(struct bitmask *bmp);
92       struct bitmask *numa_bitmask_clearbit(struct bitmask *bmp, unsigned int
93       n);
94       int numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask
95       *bmp2);
96       void numa_bitmask_free(struct bitmask *bmp);
97       int numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
98       unsigned int numa_bitmask_nbytes(struct bitmask *bmp);
99       struct bitmask *numa_bitmask_setall(struct bitmask *bmp);
100       struct bitmask *numa_bitmask_setbit(struct bitmask *bmp,  unsigned  int
101       n);
102       void  copy_bitmask_to_nodemask(struct  bitmask  *bmp, nodemask_t *node‐
103       mask)
104       void  copy_nodemask_to_bitmask(nodemask_t  *nodemask,  struct   bitmask
105       *bmp)
106       void  copy_bitmask_to_bitmask(struct  bitmask  *bmpfrom, struct bitmask
107       *bmpto)
108       unsigned int numa_bitmask_weight(const struct bitmask *bmp )
109
110       int numa_move_pages(int pid, unsigned long count, void  **pages,  const
111       int *nodes, int *status, int flags);
112       int  numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bit‐
113       mask *tonodes);
114
115       void numa_error(char *where);
116
117       extern int numa_exit_on_error;
118       extern int numa_exit_on_warn;
119       void numa_warn(int number, char *where, ...);
120
121

DESCRIPTION

123       The libnuma library offers a simple programming interface to  the  NUMA
124       (Non  Uniform Memory Access) policy supported by the Linux kernel. On a
125       NUMA architecture some memory areas have different latency or bandwidth
126       than others.
127
128       Available  policies  are  page interleaving (i.e., allocate in a round-
129       robin fashion from all, or a subset, of the nodes on the system),  pre‐
130       ferred  node  allocation  (i.e.,  preferably  allocate  on a particular
131       node), local allocation (i.e., allocate on the node on which  the  task
132       is  currently  executing),  or allocation only on specific nodes (i.e.,
133       allocate on some subset of the available nodes).  It is  also  possible
134       to bind tasks to specific nodes.
135
136       Numa memory allocation policy may be specified as a per-task attribute,
137       that is inherited by children tasks and processes, or as  an  attribute
138       of  a  range  of  process  virtual address space.  Numa memory policies
139       specified for a range of virtual address space are shared by all  tasks
140       in  the process.  Furthermore, memory policies specified for a range of
141       a shared memory attached using shmat(2) or mmap(2) from shmfs/hugetlbfs
142       are  shared  by all processes that attach to that region.  Memory poli‐
143       cies for shared disk backed file mappings are currently ignored.
144
145       The default memory allocation policy for tasks and all memory range  is
146       local  allocation.   This assumes that no ancestor has installed a non-
147       default policy.
148
149       For setting a specific policy globally for all memory allocations in  a
150       process  and its children it is easiest to start it with the numactl(8)
151       utility. For more finegrained policy inside an application this library
152       can be used.
153
154       All  numa memory allocation policy only takes effect when a page is ac‐
155       tually faulted into the address space of a process by accessing it. The
156       numa_alloc_* functions take care of this automatically.
157
158       A  node  is  defined  as an area where all memory has the same speed as
159       seen from a particular CPU.  A node can contain multiple CPUs.   Caches
160       are ignored for this definition.
161
162       Most  functions in this library are only concerned about numa nodes and
163       their  memory.   The  exceptions  to  this  are:   numa_node_to_cpus(),
164       numa_node_to_cpu_update(),       numa_node_of_cpu(),       numa_bind(),
165       numa_run_on_node(),                            numa_run_on_node_mask(),
166       numa_run_on_node_mask_all(), and numa_get_run_node_mask().  These func‐
167       tions deal with the CPUs associated with numa nodes.  See the  descrip‐
168       tions below for more information.
169
170       Some  of  these functions accept or return a pointer to struct bitmask.
171       A struct bitmask controls a bit map of arbitrary  length  containing  a
172       bit    representation    of    nodes.     The    predefined    variable
173       numa_all_nodes_ptr points to a bit mask that has  all  available  nodes
174       set; numa_no_nodes_ptr points to the empty set.
175
176       Before  any  other  calls  in this library can be used numa_available()
177       must be called. If it returns -1, all other functions in  this  library
178       are undefined.
179
180       numa_max_possible_node()  returns  the  number  of the highest possible
181       node in a system.  In other words, the size of a kernel type nodemask_t
182       (in  bits) minus 1.  This number can be gotten by calling numa_num_pos‐
183       sible_nodes() and subtracting 1.
184
185       numa_num_possible_nodes() returns the size of kernel's node mask  (ker‐
186       nel  type  nodemask_t).   In other words, large enough to represent the
187       maximum number of nodes that the kernel can handle. This will match the
188       kernel's    MAX_NUMNODES   value.    This   count   is   derived   from
189       /proc/self/status, field Mems_allowed.
190
191       numa_max_node() returns the highest node number available on  the  cur‐
192       rent  system.   (See  the  node numbers in /sys/devices/system/node/ ).
193       Also see numa_num_configured_nodes().
194
195       numa_num_configured_nodes() returns the number of memory nodes  in  the
196       system. This count includes any nodes that are currently disabled. This
197       count is derived from the  node  numbers  in  /sys/devices/system/node.
198       (Depends on the kernel being configured with /sys (CONFIG_SYSFS)).
199
200       numa_get_mems_allowed()  returns  the  mask  of  nodes  from  which the
201       process is allowed to allocate memory in it's current  cpuset  context.
202       Any nodes that are not included in the returned bitmask will be ignored
203       in any of the following libnuma memory policy calls.
204
205       numa_num_configured_cpus() returns the number of cpus  in  the  system.
206       This count includes any cpus that are currently disabled. This count is
207       derived from the cpu numbers in /sys/devices/system/cpu. If the  kernel
208       is configured without /sys (CONFIG_SYSFS=n) then it falls back to using
209       the number of online cpus.
210
211       numa_all_nodes_ptr points to a bitmask that is allocated by the library
212       with bits representing all nodes on which the calling task may allocate
213       memory.  This set may be up to all nodes on the system, or  up  to  the
214       nodes  in  the  current  cpuset.  The bitmask is allocated by a call to
215       numa_allocate_nodemask() using size numa_max_possible_node().  The  set
216       of  nodes  to record is derived from /proc/self/status, field "Mems_al‐
217       lowed".  The user should not alter this bitmask.
218
219       numa_no_nodes_ptr points to a bitmask that is allocated by the  library
220       and  left all zeroes.  The bitmask is allocated by a call to numa_allo‐
221       cate_nodemask() using size numa_max_possible_node().  The  user  should
222       not alter this bitmask.
223
224       numa_all_cpus_ptr  points to a bitmask that is allocated by the library
225       with bits representing all cpus on which the calling task may  execute.
226       This  set may be up to all cpus on the system, or up to the cpus in the
227       current cpuset.  The bitmask is  allocated  by  a  call  to  numa_allo‐
228       cate_cpumask() using size numa_num_possible_cpus().  The set of cpus to
229       record is derived from /proc/self/status,  field  "Cpus_allowed".   The
230       user should not alter this bitmask.
231
232       numa_num_task_cpus()  returns  the number of cpus that the calling task
233       is allowed to use.  This count is derived from the map  /proc/self/sta‐
234       tus, field "Cpus_allowed". Also see the bitmask numa_all_cpus_ptr.
235
236       numa_num_task_nodes()  returns the number of nodes on which the calling
237       task is allowed to allocate memory.  This count is derived from the map
238       /proc/self/status,   field   "Mems_allowed".    Also  see  the  bitmask
239       numa_all_nodes_ptr.
240
241       numa_parse_bitmap() parses line , which is a character string  such  as
242       found  in  /sys/devices/system/node/nodeN/cpumap  into a bitmask struc‐
243       ture.  The string contains the hexadecimal representation of a bit map.
244       The  bitmask may be allocated with numa_allocate_cpumask().  Returns  0
245       on success.  Returns -1 on failure.  This function is probably of  lit‐
246       tle use to a user application, but it is used by libnuma internally.
247
248       numa_parse_nodestring()  parses a character string list of nodes into a
249       bit mask.  The bit mask is allocated by numa_allocate_nodemask().   The
250       string  is  a  comma-separated  list of node numbers or node ranges.  A
251       leading ! can be used to indicate "not" this list (in other words,  all
252       nodes  except  this list), and a leading + can be used to indicate that
253       the node numbers in the list are relative to the  task's  cpuset.   The
254       string  can  be  "all"  to specify all ( numa_num_task_nodes() ) nodes.
255       Node  numbers  are  limited  by  the  number  in   the   system.    See
256       numa_max_node() and numa_num_configured_nodes().
257       Examples:  1-5,7,10   !4-5   +0-3
258       If  the  string  is of 0 length, bitmask numa_no_nodes_ptr is returned.
259       Returns 0 if the string is invalid.
260
261       numa_parse_nodestring_all() is similar to numa_parse_nodestring  ,  but
262       can parse all possible nodes, not only current nodeset.
263
264       numa_parse_cpustring()  parses  a  character string list of cpus into a
265       bit mask.  The bit mask is allocated by  numa_allocate_cpumask().   The
266       string is a comma-separated list of cpu numbers or cpu ranges.  A lead‐
267       ing ! can be used to indicate "not" this list (in other words, all cpus
268       except this list), and a leading + can be used to indicate that the cpu
269       numbers in the list are relative to the task's cpuset.  The string  can
270       be "all" to specify all ( numa_num_task_cpus() ) cpus.  Cpu numbers are
271       limited by the number in  the  system.   See  numa_num_task_cpus()  and
272       numa_num_configured_cpus().
273       Examples:  1-5,7,10   !4-5   +0-3
274       Returns 0 if the string is invalid.
275
276       numa_parse_cpustring_all() is similar to numa_parse_cpustring , but can
277       parse all possible cpus, not only current cpuset.
278
279       numa_node_size() returns the memory size of a  node.  If  the  argument
280       freep  is  not NULL, it used to return the amount of free memory on the
281       node.  On error it returns -1.
282
283       numa_node_size64() works the same as numa_node_size().  This is  useful
284       on 32-bit architectures with large nodes.
285
286       numa_preferred()  returns the preferred node of the current task.  This
287       is the node on which the kernel  preferably  allocates  memory,  unless
288       some other policy overrides this.
289
290       numa_has_preferred_many()  Returns  > 0 if the system supports multiple
291       preferred nodes.
292
293       numa_preferred_many() Returns the current set of preferred nodes.  This
294       implies  the  empty  set  when the policy isn't one used for preference
295       (PREFERRED, PREFERRED_MANY, BIND).  The caller is responsible for free‐
296       ing the mask with numa_bitmask_free().
297
298       numa_set_preferred()  sets  the  preferred node for the current task to
299       node.  The system will attempt to allocate memory  from  the  preferred
300       node,  but  will  fall back to other nodes if no memory is available on
301       the the preferred node.  Passing a node of -1 argument specifies  local
302       allocation and is equivalent to calling numa_set_localalloc().
303
304       numa_set_preferred_many()  sets the preferred set of nodes for the cur‐
305       rent task to nodemask.  This is similar  to  numa_set_preferred()  with
306       the  exception that it utilizes a different kernel interface to specify
307       multiple preferred nodes.  The caller is responsible  for  freeing  the
308       mask with numa_bitmask_free().
309
310       numa_get_interleave_mask()  returns  the current interleave mask if the
311       task's memory allocation policy is page interleaved.   Otherwise,  this
312       function returns an empty mask.
313
314       numa_set_interleave_mask() sets the memory interleave mask for the cur‐
315       rent task to nodemask.  All new memory allocations are page interleaved
316       over  all  nodes in the interleave mask. Interleaving can be turned off
317       again by passing an empty mask (numa_no_nodes).  The page  interleaving
318       only occurs on the actual page fault that puts a new page into the cur‐
319       rent address space. It is also only a hint: the kernel will  fall  back
320       to other nodes if no memory is available on the interleave target.
321
322       numa_interleave_memory()  interleaves size bytes of memory page by page
323       from start on nodes specified in nodemask.  The size argument  will  be
324       rounded up to a multiple of the system page size.  If nodemask contains
325       nodes that are externally denied to this process, this call will  fail.
326       This  is  a  lower  level  function to interleave allocated but not yet
327       faulted in memory. Not yet faulted in means the memory is allocated us‐
328       ing  mmap(2)  or  shmat(2),  but  has  not been accessed by the current
329       process yet. The memory is page interleaved to all nodes  specified  in
330       nodemask.  Normally numa_alloc_interleaved() should be used for private
331       memory instead, but this function is useful to handle shared memory ar‐
332       eas.  To be useful the memory area should be several megabytes at least
333       (or tens of megabytes of hugetlbfs mappings) If  the  numa_set_strict()
334       flag  is  true then the operation will cause a numa_error if there were
335       already pages in the mapping that do not follow the policy.
336
337       numa_bind() binds the current task and its children to the nodes speci‐
338       fied  in  nodemask.   They  will  only run on the CPUs of the specified
339       nodes and only be able to allocate memory from them.  This function  is
340       equivalent   to  calling  numa_run_on_node_mask(nodemask)  followed  by
341       numa_set_membind(nodemask).  If tasks should  be  bound  to  individual
342       CPUs   inside   nodes   consider   using   numa_node_to_cpus   and  the
343       sched_setaffinity(2) syscall.
344
345       numa_set_localalloc() sets the memory allocation policy for the calling
346       task  to local allocation.  In this mode, the preferred node for memory
347       allocation is effectively the node where the task is executing  at  the
348       time of a page allocation.
349
350       numa_set_membind() sets the memory allocation mask.  The task will only
351       allocate memory from the nodes set in nodemask.  Passing an empty node‐
352       mask or a nodemask that contains nodes other than those in the mask re‐
353       turned by numa_get_mems_allowed() will result in an error.
354
355       numa_set_membind_balancing() sets the memory allocation mask and enable
356       the  Linux  kernel  NUMA  balancing for the task if the feature is sup‐
357       ported by the kernel.  The task will  only  allocate  memory  from  the
358       nodes  set  in  nodemask.  Passing an empty nodemask or a nodemask that
359       contains  nodes  other   than   those   in   the   mask   returned   by
360       numa_get_mems_allowed() will result in an error.
361
362       numa_get_membind() returns the mask of nodes from which memory can cur‐
363       rently be allocated.  If the returned mask is equal to  numa_all_nodes,
364       then memory allocation is allowed from all nodes.
365
366       numa_alloc_onnode()  allocates memory on a specific node.  The size ar‐
367       gument will be rounded up to a multiple of the system  page  size.   if
368       the specified node is externally denied to this process, this call will
369       fail.  This function is relatively slow compared to the malloc(3)  fam‐
370       ily  of  functions.  The memory must be freed with numa_free().  On er‐
371       rors NULL is returned.
372
373       numa_alloc_local() allocates size bytes of memory on  the  local  node.
374       The  size  argument will be rounded up to a multiple of the system page
375       size.  This function is relatively slow compared to the malloc(3)  fam‐
376       ily  of  functions.  The memory must be freed with numa_free().  On er‐
377       rors NULL is returned.
378
379       numa_alloc_interleaved() allocates size bytes  of  memory  page  inter‐
380       leaved  on  all nodes. This function is relatively slow and should only
381       be used for large areas consisting of multiple pages. The  interleaving
382       works  at  page  level  and  will  only show an effect when the area is
383       large.  The allocated memory must be freed with numa_free().  On error,
384       NULL is returned.
385
386       numa_alloc_interleaved_subset() attempts to allocate size bytes of mem‐
387       ory page interleaved on all nodes.  The size argument will  be  rounded
388       up to a multiple of the system page size.  The nodes on which a process
389       is allowed to allocate memory may be constrained externally.   If  this
390       is  the case, this function may fail.  This function is relatively slow
391       compared to the malloc(3) family of functions and should only  be  used
392       for  large  areas consisting of multiple pages.  The interleaving works
393       at page level and will only show an effect when the area is large.  The
394       allocated memory must be freed with numa_free().  On error, NULL is re‐
395       turned.
396
397       numa_alloc() allocates size bytes of memory with the current NUMA  pol‐
398       icy.   The size argument will be rounded up to a multiple of the system
399       page size.  This function is relatively slow compared to the  malloc(3)
400       family  of  functions.   The memory must be freed with numa_free().  On
401       errors NULL is returned.
402
403       numa_realloc() changes the size  of  the  memory  area  pointed  to  by
404       old_addr  from  old_size  to  new_size.   The memory area pointed to by
405       old_addr must have been allocated with one  of  the  numa_alloc*  func‐
406       tions.   The  new_size  will  be rounded up to a multiple of the system
407       page size. The contents of the memory area will  be  unchanged  to  the
408       minimum of the old and new sizes; newly allocated memory will be unini‐
409       tialized. The memory policy (and node  bindings)  associated  with  the
410       original  memory  area will be preserved in the resized area. For exam‐
411       ple, if the initial area was allocated with a call  to  numa_alloc_onn‐
412       ode(),  then  the new pages (if the area is enlarged) will be allocated
413       on the same node.  However, if no memory policy was set for the  origi‐
414       nal  area, then numa_realloc() cannot guarantee that the new pages will
415       be allocated on the same node. On success, the address of  the  resized
416       area  is  returned  (which  might be different from that of the initial
417       area), otherwise NULL is returned and errno is set to indicate the  er‐
418       ror.  The pointer returned by numa_realloc() is suitable for passing to
419       numa_free().
420
421
422       numa_free() frees size bytes of memory starting at start, allocated  by
423       the numa_alloc_* functions above.  The size argument will be rounded up
424       to a multiple of the system page size.
425
426       numa_run_on_node() runs the current task and its children on a specific
427       node.  They  will  not  migrate  to  CPUs of other nodes until the node
428       affinity is reset with a new call to numa_run_on_node_mask().   Passing
429       -1 permits the kernel to schedule on all nodes again.  On success, 0 is
430       returned; on error -1 is returned, and errno is set to indicate the er‐
431       ror.
432
433       numa_run_on_node_mask()  runs the current task and its children only on
434       nodes specified in nodemask.  They will not migrate to  CPUs  of  other
435       nodes   until   the   node  affinity  is  reset  with  a  new  call  to
436       numa_run_on_node_mask() or numa_run_on_node().  Passing  numa_all_nodes
437       permits  the  kernel  to schedule on all nodes again.  On success, 0 is
438       returned; on error -1 is returned, and errno is set to indicate the er‐
439       ror.
440
441       numa_run_on_node_mask_all() runs the current task and its children only
442       on nodes specified in nodemask like numa_run_on_node_mask  but  without
443       any cpuset awareness.
444
445       numa_get_run_node_mask()  returns  a  mask of CPUs on which the current
446       task is allowed to run.
447
448       numa_tonode_memory() put memory on a specific node. The constraints de‐
449       scribed for numa_interleave_memory() apply here too.
450
451       numa_tonodemask_memory()  put  memory  on  a specific set of nodes. The
452       constraints described for numa_interleave_memory() apply here too.
453
454       numa_setlocal_memory() locates memory on the  current  node.  The  con‐
455       straints described for numa_interleave_memory() apply here too.
456
457       numa_police_memory()  locates  memory with the current NUMA policy. The
458       constraints described for numa_interleave_memory() apply here too.
459
460       numa_distance() reports the distance in the  machine  topology  between
461       two  nodes.   The  factors  are a multiple of 10. It returns 0 when the
462       distance cannot be determined. A node has distance 10 to  itself.   Re‐
463       porting  the  distance  requires  a  Linux  kernel version of 2.6.10 or
464       newer.
465
466       numa_set_bind_policy() specifies whether calls that bind  memory  to  a
467       specific  node should use the preferred policy or a strict policy.  The
468       preferred policy allows the kernel to allocate memory  on  other  nodes
469       when  there  isn't enough free on the target node. strict will fail the
470       allocation in that case.  Setting the argument to specifies  strict,  0
471       preferred.  Note that specifying more than one node non strict may only
472       use the first node in some kernel versions.
473
474       numa_set_strict() sets a flag that says whether the functions  allocat‐
475       ing  on specific nodes should use use a strict policy. Strict means the
476       allocation will fail if the memory cannot be allocated  on  the  target
477       node.   Default operation is to fall back to other nodes.  This doesn't
478       apply to interleave and default.
479
480       numa_get_interleave_node() is used by libnuma internally. It is  proba‐
481       bly  not useful for user applications.  It uses the MPOL_F_NODE flag of
482       the get_mempolicy system call, which is not  intended  for  application
483       use (its operation may change or be removed altogether in future kernel
484       versions). See get_mempolicy(2).
485
486       numa_pagesize() returns the number of bytes in page. This  function  is
487       simply  a  fast alternative to repeated calls to the getpagesize system
488       call.  See getpagesize(2).
489
490       numa_sched_getaffinity() retrieves a bitmask of the  cpus  on  which  a
491       task  may run.  The task is specified by pid.  Returns the return value
492       of the sched_getaffinity system call.  See  sched_getaffinity(2).   The
493       bitmask  must  be at least the size of the kernel's cpu mask structure.
494       Use numa_allocate_cpumask() to allocate it.  Test the bits in the  mask
495       by calling numa_bitmask_isbitset().
496
497       numa_sched_setaffinity()  sets  a  task's  allowed cpu's to those cpu's
498       specified in mask.  The task is specified by pid.  Returns  the  return
499       value  of the sched_setaffinity system call.  See sched_setaffinity(2).
500       You may allocate the bitmask with numa_allocate_cpumask().  Or the bit‐
501       mask  may be smaller than the kernel's cpu mask structure. For example,
502       call  numa_bitmask_alloc()  using  a  maximum  number  of   cpus   from
503       numa_num_configured_cpus().   Set  the  bits  in  the  mask  by calling
504       numa_bitmask_setbit().
505
506       numa_node_to_cpus() converts a node number to a bitmask  of  CPUs.  The
507       user  must  pass  a bitmask structure with a mask buffer long enough to
508       represent all possible cpu's.  Use  numa_allocate_cpumask()  to  create
509       it.   If the bitmask is not long enough errno will be set to ERANGE and
510       -1 returned. On success 0 is returned.
511
512       numa_node_to_cpu_update() Mark cpus bitmask of all  nodes  stale,  then
513       get  the  latest  bitmask by calling numa_node_to_cpus() This allows to
514       update the libnuma state after a CPU hotplug event. The application  is
515       in charge of detecting CPU hotplug events.
516
517       numa_node_of_cpu()  returns the node that a cpu belongs to. If the user
518       supplies an invalid cpu errno will be set to EINVAL and -1 will be  re‐
519       turned.
520
521       numa_allocate_cpumask  () returns a bitmask of a size equal to the ker‐
522       nel's cpu mask (kernel type cpumask_t).  In other words,  large  enough
523       to  represent NR_CPUS cpus.  This number of cpus can be gotten by call‐
524       ing numa_num_possible_cpus().  The bitmask is zero-filled.
525
526       numa_free_cpumask frees a cpumask  previously  allocate  by  numa_allo‐
527       cate_cpumask.
528
529       numa_allocate_nodemask()  returns a bitmask of a size equal to the ker‐
530       nel's node mask (kernel type nodemask_t).  In other words, large enough
531       to represent MAX_NUMNODES nodes.  This number of nodes can be gotten by
532       calling numa_num_possible_nodes().  The bitmask is zero-filled.
533
534       numa_free_nodemask() frees a nodemask previous allocated by  numa_allo‐
535       cate_nodemask().
536
537       numa_bitmask_alloc()  allocates  a bitmask structure and its associated
538       bit mask.  The memory allocated for the bit mask contains enough  words
539       (type  unsigned  long) to contain n bits.  The bit mask is zero-filled.
540       The bitmask structure points to the bit mask and contains the n value.
541
542       numa_bitmask_clearall() sets all bits in the bit mask to 0.   The  bit‐
543       mask  structure  points  to  the  bit  mask and contains its size ( bmp
544       ->size).  The value of bmp is always  returned.   Note  that  numa_bit‐
545       mask_alloc() creates a zero-filled bit mask.
546
547       numa_bitmask_clearbit() sets a specified bit in a bit mask to 0.  Noth‐
548       ing is done if the n value is greater than the size of the bitmask (and
549       no error is returned). The value of bmp is always returned.
550
551       numa_bitmask_equal() returns 1 if two bitmasks are equal.  It returns 0
552       if they are not equal.  If the bitmask structures control bit masks  of
553       different  sizes,  the  "missing" trailing bits of the smaller bit mask
554       are considered to be 0.
555
556       numa_bitmask_free() deallocates the memory of both the  bitmask  struc‐
557       ture  pointed to by bmp and the bit mask.  It is an error to attempt to
558       free this bitmask twice.
559
560       numa_bitmask_isbitset() returns the value of a specified bit in  a  bit
561       mask.  If the n value is greater than the size of the bit map, 0 is re‐
562       turned.
563
564       numa_bitmask_nbytes() returns the size (in bytes) of the bit mask  con‐
565       trolled  by  bmp.   The  bit masks are always full words (type unsigned
566       long), and the returned size is the actual size of all those words.
567
568       numa_bitmask_setall() sets all bits in the bit mask to 1.  The  bitmask
569       structure  points  to the bit mask and contains its size ( bmp ->size).
570       The value of bmp is always returned.
571
572       numa_bitmask_setbit() sets a specified bit in a bit mask to 1.  Nothing
573       is  done  if n is greater than the size of the bitmask (and no error is
574       returned). The value of bmp is always returned.
575
576       copy_bitmask_to_nodemask() copies the body (the bit map itself) of  the
577       bitmask structure pointed to by bmp to the nodemask_t structure pointed
578       to by the nodemask pointer. If the two areas differ in size,  the  copy
579       is truncated to the size of the receiving field or zero-filled.
580
581       copy_nodemask_to_bitmask()  copies  the nodemask_t structure pointed to
582       by the nodemask pointer to the body (the bit map itself) of the bitmask
583       structure  pointed  to  by  the bmp pointer. If the two areas differ in
584       size, the copy is truncated to the size of the receiving field or zero-
585       filled.
586
587       copy_bitmask_to_bitmask()  copies  the body (the bit map itself) of the
588       bitmask structure pointed to by the bmpfrom pointer to the body of  the
589       bitmask  structure  pointed  to  by the bmpto pointer. If the two areas
590       differ in size, the copy is truncated to  the  size  of  the  receiving
591       field or zero-filled.
592
593       numa_bitmask_weight()  returns  a count of the bits that are set in the
594       body of the bitmask pointed to by the bmp argument.
595
596       numa_move_pages() moves a list of pages in the  address  space  of  the
597       currently  executing or current process.  It simply uses the move_pages
598       system call.
599       pid - ID of task.  If not valid, use the current task.
600       count - Number of pages.
601       pages - List of pages to move.
602       nodes - List of nodes to which pages can be moved.
603       status - Field to which status is to be returned.
604       flags - MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
605       See move_pages(2).
606
607       numa_migrate_pages() simply uses the migrate_pages system call to cause
608       the  pages of the calling task, or a specified task, to be migated from
609       one set of nodes to another.  See migrate_pages(2).  The bit masks rep‐
610       resenting the nodes should be allocated with numa_allocate_nodemask() ,
611       or  with  numa_bitmask_alloc()  using  an   n   value   returned   from
612       numa_num_possible_nodes().   A task's current node set can be gotten by
613       calling numa_get_membind().  Bits in the tonodes mask  can  be  set  by
614       calls to numa_bitmask_setbit().
615
616       numa_error()  is  a libnuma internal function that can be overridden by
617       the user program.  This function is called with a char * argument  when
618       a  libnuma  function fails.  Overriding the library internal definition
619       makes it possible to specify a different error handling strategy when a
620       libnuma  function  fails.  It  does  not  affect numa_available().  The
621       numa_error() function defined in libnuma prints an error on stderr  and
622       terminates  the  program  if  numa_exit_on_error  is  set to a non-zero
623       value.  The default value of numa_exit_on_error is zero.
624
625       numa_warn() is a libnuma internal function that can be also  overridden
626       by  the  user  program.   It  is called to warn the user when a libnuma
627       function encounters a  non-fatal  error.   The  default  implementation
628       prints  a  warning  to  stderr.   The first argument is a unique number
629       identifying each warning. After that there is a printf(3)-style  format
630       string and a variable number of arguments.  numa_warn exits the program
631       when numa_exit_on_warn is set to a non-zero value.  The  default  value
632       of numa_exit_on_warn is zero.
633
634

Compatibility with libnuma version 1

636       Binaries  that  were compiled for libnuma version 1 need not be re-com‐
637       piled to run with libnuma version 2.
638       Source codes written for libnuma version 1 may be  re-compiled  without
639       change  with  version 2 installed. To do so, in the code's Makefile add
640       this option to CFLAGS:  -DNUMA_VERSION1_COMPATIBILITY
641
642

THREAD SAFETY

644       numa_set_bind_policy and numa_exit_on_error  are  process  global.  The
645       other calls are thread safe.
646
647
649       Copyright 2002, 2004, 2007, 2008 Andi Kleen, SuSE Labs.  libnuma is un‐
650       der the GNU Lesser General Public License, v2.1.
651
652

SEE ALSO

654       get_mempolicy(2), set_mempolicy(2), getpagesize(2), mbind(2),  mmap(2),
655       shmat(2),    numactl(8),    sched_getaffinity(2)   sched_setaffinity(2)
656       move_pages(2) migrate_pages(2)
657
658
659
660SuSE Labs                        December 2007                         NUMA(3)
Impressum