1NUMA(3)                    Linux Programmer's Manual                   NUMA(3)
2
3
4

NAME

6       numa - NUMA policy library
7

SYNOPSIS

9       #include <numa.h>
10
11       cc ... -lnuma
12
13       int numa_available(void);
14
15       int numa_max_possible_node(void);
16       int numa_num_possible_nodes();
17
18       int numa_max_node(void);
19       int numa_num_configured_nodes();
20       struct bitmask *numa_get_mems_allowed(void);
21
22       int numa_num_configured_cpus(void);
23       struct bitmask *numa_all_nodes_ptr;
24       struct bitmask *numa_no_nodes_ptr;
25       struct bitmask *numa_all_cpus_ptr;
26
27       int numa_num_task_cpus();
28       int numa_num_task_nodes();
29
30       int numa_parse_bitmap(char *line , struct bitmask *mask);
31       struct bitmask *numa_parse_nodestring(const char *string);
32       struct bitmask *numa_parse_nodestring_all(const char *string);
33       struct bitmask *numa_parse_cpustring(const char *string);
34       struct bitmask *numa_parse_cpustring_all(const char *string);
35
36       long numa_node_size(int node, long *freep);
37       long long numa_node_size64(int node, long long *freep);
38
39       int numa_preferred(void);
40       void numa_set_preferred(int node);
41       int numa_get_interleave_node(void);
42       struct bitmask *numa_get_interleave_mask(void);
43       void numa_set_interleave_mask(struct bitmask *nodemask);
44       void  numa_interleave_memory(void  *start,  size_t size, struct bitmask
45       *nodemask);
46       void numa_bind(struct bitmask *nodemask);
47       void numa_set_localalloc(void);
48       void numa_set_membind(struct bitmask *nodemask);
49       struct bitmask *numa_get_membind(void);
50
51       void *numa_alloc_onnode(size_t size, int node);
52       void *numa_alloc_local(size_t size);
53       void *numa_alloc_interleaved(size_t size);
54       void *numa_alloc_interleaved_subset(size_t size,  struct bitmask *node‐
55       mask); void *numa_alloc(size_t size);
56       void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
57       void numa_free(void *start, size_t size);
58
59       int numa_run_on_node(int node);
60       int numa_run_on_node_mask(struct bitmask *nodemask);
61       int numa_run_on_node_mask_all(struct bitmask *nodemask);
62       struct bitmask *numa_get_run_node_mask(void);
63
64       void numa_tonode_memory(void *start, size_t size, int node);
65       void  numa_tonodemask_memory(void  *start,  size_t size, struct bitmask
66       *nodemask);
67       void numa_setlocal_memory(void *start, size_t size);
68       void numa_police_memory(void *start, size_t size);
69       void numa_set_bind_policy(int strict);
70       void numa_set_strict(int strict);
71
72       int numa_distance(int node1, int node2);
73
74       int numa_sched_getaffinity(pid_t pid, struct bitmask *mask);
75       int numa_sched_setaffinity(pid_t pid, struct bitmask *mask);
76       int numa_node_to_cpus(int node, struct bitmask *mask);
77       int numa_node_of_cpu(int cpu);
78
79       struct bitmask *numa_allocate_cpumask();
80
81       void numa_free_cpumask();
82       struct bitmask *numa_allocate_nodemask();
83
84       void numa_free_nodemask();
85       struct bitmask *numa_bitmask_alloc(unsigned int n);
86       struct bitmask *numa_bitmask_clearall(struct bitmask *bmp);
87       struct bitmask *numa_bitmask_clearbit(struct bitmask *bmp, unsigned int
88       n);
89       int numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask
90       *bmp2);
91       void numa_bitmask_free(struct bitmask *bmp);
92       int numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int n);
93       unsigned int numa_bitmask_nbytes(struct bitmask *bmp);
94       struct bitmask *numa_bitmask_setall(struct bitmask *bmp);
95       struct bitmask *numa_bitmask_setbit(struct bitmask *bmp,  unsigned  int
96       n);
97       void  copy_bitmask_to_nodemask(struct  bitmask  *bmp, nodemask_t *node‐
98       mask)
99       void  copy_nodemask_to_bitmask(nodemask_t  *nodemask,  struct   bitmask
100       *bmp)
101       void  copy_bitmask_to_bitmask(struct  bitmask  *bmpfrom, struct bitmask
102       *bmpto)
103       unsigned int numa_bitmask_weight(const struct bitmask *bmp )
104
105       int numa_move_pages(int pid, unsigned long count, void  **pages,  const
106       int *nodes, int *status, int flags);
107       int  numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bit‐
108       mask *tonodes);
109
110       void numa_error(char *where);
111
112       extern int numa_exit_on_error;
113       extern int numa_exit_on_warn;
114       void numa_warn(int number, char *where, ...);
115
116

DESCRIPTION

118       The libnuma library offers a simple programming interface to  the  NUMA
119       (Non  Uniform Memory Access) policy supported by the Linux kernel. On a
120       NUMA architecture some memory areas have different latency or bandwidth
121       than others.
122
123       Available  policies  are  page interleaving (i.e., allocate in a round-
124       robin fashion from all, or a subset, of the nodes on the system),  pre‐
125       ferred  node  allocation  (i.e.,  preferably  allocate  on a particular
126       node), local allocation (i.e., allocate on the node on which  the  task
127       is  currently  executing),  or allocation only on specific nodes (i.e.,
128       allocate on some subset of the available nodes).  It is  also  possible
129       to bind tasks to specific nodes.
130
131       Numa memory allocation policy may be specified as a per-task attribute,
132       that is inherited by children tasks and processes, or as  an  attribute
133       of  a  range  of  process  virtual address space.  Numa memory policies
134       specified for a range of virtual address space are shared by all  tasks
135       in the process.  Further more, memory policies specified for a range of
136       a shared memory attached using shmat(2) or mmap(2) from shmfs/hugetlbfs
137       are  shared  by all processes that attach to that region.  Memory poli‐
138       cies for shared disk backed file mappings are currently ignored.
139
140       The default memory allocation policy for tasks and all memory range  is
141       local  allocation.   This assumes that no ancestor has installed a non-
142       default policy.
143
144       For setting a specific policy globally for all memory allocations in  a
145       process  and its children it is easiest to start it with the numactl(8)
146       utility. For more finegrained policy inside an application this library
147       can be used.
148
149       All  numa  memory  allocation  policy  only takes effect when a page is
150       actually faulted into the address space of a process by  accessing  it.
151       The numa_alloc_* functions take care of this automatically.
152
153       A  node  is  defined  as an area where all memory has the same speed as
154       seen from a particular CPU.  A node can contain multiple CPUs.   Caches
155       are ignored for this definition.
156
157       Most  functions in this library are only concerned about numa nodes and
158       their  memory.   The  exceptions  to  this  are:   numa_node_to_cpus(),
159       numa_node_of_cpu(),           numa_bind(),          numa_run_on_node(),
160       numa_run_on_node_mask(),        numa_run_on_node_mask_all(),        and
161       numa_get_run_node_mask().   These  functions deal with the CPUs associ‐
162       ated with numa nodes.  See the descriptions below for more information.
163
164       Some of these functions accept or return a pointer to  struct  bitmask.
165       A  struct  bitmask  controls a bit map of arbitrary length containing a
166       bit    representation    of    nodes.     The    predefined    variable
167       numa_all_nodes_ptr  points  to  a bit mask that has all available nodes
168       set; numa_no_nodes_ptr points to the empty set.
169
170       Before any other calls in this library  can  be  used  numa_available()
171       must  be  called. If it returns -1, all other functions in this library
172       are undefined.
173
174       numa_max_possible_node() returns the number  of  the  highest  possible
175       node in a system.  In other words, the size of a kernel type nodemask_t
176       (in bits) minus 1.  This number can be gotten by calling  numa_num_pos‐
177       sible_nodes() and subtracting 1.
178
179       numa_num_possible_nodes()  returns the size of kernel's node mask (ker‐
180       nel type nodemask_t).  In other words, large enough  to  represent  the
181       maximum number of nodes that the kernel can handle. This will match the
182       kernel's   MAX_NUMNODES   value.    This   count   is   derived    from
183       /proc/self/status, field Mems_allowed.
184
185       numa_max_node()  returns  the highest node number available on the cur‐
186       rent system.  (See the node  numbers  in  /sys/devices/system/node/  ).
187       Also see numa_num_configured_nodes().
188
189       numa_num_configured_nodes()  returns  the number of memory nodes in the
190       system. This count includes any nodes that are currently disabled. This
191       count  is  derived  from  the node numbers in /sys/devices/system/node.
192       (Depends on the kernel being configured with /sys (CONFIG_SYSFS)).
193
194       numa_get_mems_allowed() returns  the  mask  of  nodes  from  which  the
195       process  is  allowed to allocate memory in it's current cpuset context.
196       Any nodes that are not included in the returned bitmask will be ignored
197       in any of the following libnuma memory policy calls.
198
199       numa_num_configured_cpus()  returns  the  number of cpus in the system.
200       This count includes any cpus that are currently disabled. This count is
201       derived  from the cpu numbers in /sys/devices/system/cpu. If the kernel
202       is configured without /sys (CONFIG_SYSFS=n) then it falls back to using
203       the number of online cpus.
204
205       numa_all_nodes_ptr points to a bitmask that is allocated by the library
206       with bits representing all nodes on which the calling task may allocate
207       memory.   This  set  may be up to all nodes on the system, or up to the
208       nodes in the current cpuset.  The bitmask is allocated  by  a  call  to
209       numa_allocate_nodemask()  using size numa_max_possible_node().  The set
210       of  nodes  to  record  is   derived   from   /proc/self/status,   field
211       "Mems_allowed".  The user should not alter this bitmask.
212
213       numa_no_nodes_ptr  points to a bitmask that is allocated by the library
214       and left all zeroes.  The bitmask is allocated by a call to  numa_allo‐
215       cate_nodemask()  using  size numa_max_possible_node().  The user should
216       not alter this bitmask.
217
218       numa_all_cpus_ptr points to a bitmask that is allocated by the  library
219       with  bits representing all cpus on which the calling task may execute.
220       This set may be up to all cpus on the system, or up to the cpus in  the
221       current  cpuset.   The  bitmask  is  allocated  by a call to numa_allo‐
222       cate_cpumask() using size numa_num_possible_cpus().  The set of cpus to
223       record  is  derived  from /proc/self/status, field "Cpus_allowed".  The
224       user should not alter this bitmask.
225
226       numa_num_task_cpus() returns the number of cpus that the  calling  task
227       is  allowed to use.  This count is derived from the map /proc/self/sta‐
228       tus, field "Cpus_allowed". Also see the bitmask numa_all_cpus_ptr.
229
230       numa_num_task_nodes() returns the number of nodes on which the  calling
231       task is allowed to allocate memory.  This count is derived from the map
232       /proc/self/status,  field  "Mems_allowed".   Also   see   the   bitmask
233       numa_all_nodes_ptr.
234
235       numa_parse_bitmap()  parses  line , which is a character string such as
236       found in /sys/devices/system/node/nodeN/cpumap into  a  bitmask  struc‐
237       ture.  The string contains the hexadecimal representation of a bit map.
238       The bitmask may be allocated with numa_allocate_cpumask().  Returns   0
239       on  success.  Returns -1 on failure.  This function is probably of lit‐
240       tle use to a user application, but it is used by libnuma internally.
241
242       numa_parse_nodestring() parses a character string list of nodes into  a
243       bit  mask.  The bit mask is allocated by numa_allocate_nodemask().  The
244       string is a comma-separated list of node numbers  or  node  ranges.   A
245       leading  ! can be used to indicate "not" this list (in other words, all
246       nodes except this list), and a leading + can be used to  indicate  that
247       the  node  numbers  in the list are relative to the task's cpuset.  The
248       string can be "all" to specify all  (  numa_num_task_nodes()  )  nodes.
249       Node   numbers   are   limited  by  the  number  in  the  system.   See
250       numa_max_node() and numa_num_configured_nodes().
251       Examples:  1-5,7,10   !4-5   +0-3
252       If the string is of 0 length, bitmask  numa_no_nodes_ptr  is  returned.
253       Returns 0 if the string is invalid.
254
255       numa_parse_nodestring_all()  is  similar to numa_parse_nodestring , but
256       can parse all possible nodes, not only current nodeset.
257
258       numa_parse_cpustring() parses a character string list of  cpus  into  a
259       bit  mask.   The bit mask is allocated by numa_allocate_cpumask().  The
260       string is a comma-separated list of cpu numbers or cpu ranges.  A lead‐
261       ing ! can be used to indicate "not" this list (in other words, all cpus
262       except this list), and a leading + can be used to indicate that the cpu
263       numbers  in the list are relative to the task's cpuset.  The string can
264       be "all" to specify all ( numa_num_task_cpus() ) cpus.  Cpu numbers are
265       limited  by  the  number  in  the system.  See numa_num_task_cpus() and
266       numa_num_configured_cpus().
267       Examples:  1-5,7,10   !4-5   +0-3
268       Returns 0 if the string is invalid.
269
270       numa_parse_cpustring_all() is similar to numa_parse_cpustring , but can
271       parse all possible cpus, not only current cpuset.
272
273       numa_node_size()  returns  the  memory  size of a node. If the argument
274       freep is not NULL, it used to return the amount of free memory  on  the
275       node.  On error it returns -1.
276
277       numa_node_size64()  works  the  same as numa_node_size() except that it
278       returns values as long long instead of long.  This is useful on  32-bit
279       architectures with large nodes.
280
281       numa_preferred()  returns the preferred node of the current task.  This
282       is the node on which the kernel  preferably  allocates  memory,  unless
283       some other policy overrides this.
284
285       numa_set_preferred()  sets  the  preferred node for the current task to
286       node.  The system will attempt to allocate memory  from  the  preferred
287       node,  but  will  fall back to other nodes if no memory is available on
288       the the preferred node.  Passing a node of -1 argument specifies  local
289       allocation and is equivalent to calling numa_set_localalloc().
290
291       numa_get_interleave_mask()  returns  the current interleave mask if the
292       task's memory allocation policy is page interleaved.   Otherwise,  this
293       function returns an empty mask.
294
295       numa_set_interleave_mask() sets the memory interleave mask for the cur‐
296       rent task to nodemask.  All new memory allocations are page interleaved
297       over  all  nodes in the interleave mask. Interleaving can be turned off
298       again by passing an empty mask (numa_no_nodes).  The page  interleaving
299       only occurs on the actual page fault that puts a new page into the cur‐
300       rent address space. It is also only a hint: the kernel will  fall  back
301       to other nodes if no memory is available on the interleave target.
302
303       numa_interleave_memory()  interleaves size bytes of memory page by page
304       from start on nodes specified in nodemask.  The size argument  will  be
305       rounded up to a multiple of the system page size.  If nodemask contains
306       nodes that are externally denied to this process, this call will  fail.
307       This  is  a  lower  level  function to interleave allocated but not yet
308       faulted in memory. Not yet faulted in means  the  memory  is  allocated
309       using  mmap(2)  or  shmat(2),  but has not been accessed by the current
310       process yet. The memory is page interleaved to all nodes  specified  in
311       nodemask.  Normally numa_alloc_interleaved() should be used for private
312       memory instead, but this function is useful  to  handle  shared  memory
313       areas.  To  be  useful  the  memory area should be several megabytes at
314       least  (or  tens  of  megabytes   of   hugetlbfs   mappings)   If   the
315       numa_set_strict()  flag  is  true  then  the  operation  will  cause  a
316       numa_error if there were already pages in the mapping that do not  fol‐
317       low the policy.
318
319       numa_bind() binds the current task and its children to the nodes speci‐
320       fied in nodemask.  They will only run on  the  CPUs  of  the  specified
321       nodes  and only be able to allocate memory from them.  This function is
322       equivalent  to  calling  numa_run_on_node_mask(nodemask)  followed   by
323       numa_set_membind(nodemask).   If  tasks  should  be bound to individual
324       CPUs  inside   nodes   consider   using   numa_node_to_cpus   and   the
325       sched_setaffinity(2) syscall.
326
327       numa_set_localalloc() sets the memory allocation policy for the calling
328       task to local allocation.  In this mode, the preferred node for  memory
329       allocation  is  effectively the node where the task is executing at the
330       time of a page allocation.
331
332       numa_set_membind() sets the memory allocation mask.  The task will only
333       allocate memory from the nodes set in nodemask.  Passing an empty node‐
334       mask or a nodemask that contains nodes other than  those  in  the  mask
335       returned by numa_get_mems_allowed() will result in an error.
336
337       numa_get_membind() returns the mask of nodes from which memory can cur‐
338       rently be allocated.  If the returned mask is equal to  numa_all_nodes,
339       then memory allocation is allowed from all nodes.
340
341       numa_alloc_onnode()  allocates  memory  on  a  specific node.  The size
342       argument will be rounded up to a multiple of the system page size.   if
343       the specified node is externally denied to this process, this call will
344       fail.  This function is relatively slow compared to the malloc(3), fam‐
345       ily  of  functions.   The  memory  must  be freed with numa_free().  On
346       errors NULL is returned.
347
348       numa_alloc_local() allocates size bytes of memory on  the  local  node.
349       The  size  argument will be rounded up to a multiple of the system page
350       size.  This function is relatively slow compared to the malloc(3)  fam‐
351       ily  of  functions.   The  memory  must  be freed with numa_free().  On
352       errors NULL is returned.
353
354       numa_alloc_interleaved() allocates size bytes  of  memory  page  inter‐
355       leaved  on  all nodes. This function is relatively slow and should only
356       be used for large areas consisting of multiple pages. The  interleaving
357       works  at  page  level  and  will  only show an effect when the area is
358       large.  The allocated memory must be freed with numa_free().  On error,
359       NULL is returned.
360
361       numa_alloc_interleaved_subset() attempts to allocate size bytes of mem‐
362       ory page interleaved on all nodes.  The size argument will  be  rounded
363       up to a multiple of the system page size.  The nodes on which a process
364       is allowed to allocate memory may be constrained externally.   If  this
365       is  the case, this function may fail.  This function is relatively slow
366       compare to malloc(3), family of functions and should only be  used  for
367       large  areas  consisting  of multiple pages.  The interleaving works at
368       page level and will only show an effect when the area  is  large.   The
369       allocated  memory  must  be  freed with numa_free().  On error, NULL is
370       returned.
371
372       numa_alloc() allocates size bytes of memory with the current NUMA  pol‐
373       icy.   The size argument will be rounded up to a multiple of the system
374       page size.  This function is relatively slow compare to  the  malloc(3)
375       family  of  functions.   The memory must be freed with numa_free().  On
376       errors NULL is returned.
377
378       numa_realloc() changes the size  of  the  memory  area  pointed  to  by
379       old_addr  from  old_size  to  new_size.   The memory area pointed to by
380       old_addr must have been allocated with one  of  the  numa_alloc*  func‐
381       tions.   The  new_size  will  be rounded up to a multiple of the system
382       page size. The contents of the memory area will  be  unchanged  to  the
383       minimum of the old and new sizes; newly allocated memory will be unini‐
384       tialized. The memory policy (and node  bindings)  associated  with  the
385       original  memory  area will be preserved in the resized area. For exam‐
386       ple, if the initial area was allocated with a call  to  numa_alloc_onn‐
387       ode(),  then  the new pages (if the area is enlarged) will be allocated
388       on the same node.  However, if no memory policy was set for the  origi‐
389       nal  area, then numa_realloc() cannot guarantee that the new pages will
390       be allocated on the same node. On success, the address of  the  resized
391       area  is  returned  (which  might be different from that of the initial
392       area), otherwise NULL is returned and errno  is  set  to  indicate  the
393       error.  The  pointer returned by numa_realloc() is suitable for passing
394       to numa_free().
395
396
397       numa_free() frees size bytes of memory starting at start, allocated  by
398       the numa_alloc_* functions above.  The size argument will be rounded up
399       to a multiple of the system page size.
400
401       numa_run_on_node() runs the current task and its children on a specific
402       node.  They  will  not  migrate  to  CPUs of other nodes until the node
403       affinity is reset with a new call to numa_run_on_node_mask().   Passing
404       -1 permits the kernel to schedule on all nodes again.  On success, 0 is
405       returned; on error -1 is returned, and errno is  set  to  indicate  the
406       error.
407
408       numa_run_on_node_mask()  runs the current task and its children only on
409       nodes specified in nodemask.  They will not migrate to  CPUs  of  other
410       nodes   until   the   node  affinity  is  reset  with  a  new  call  to
411       numa_run_on_node_mask() or numa_run_on_node().  Passing  numa_all_nodes
412       permits  the  kernel  to schedule on all nodes again.  On success, 0 is
413       returned; on error -1 is returned, and errno is  set  to  indicate  the
414       error.
415
416       numa_run_on_node_mask_all() runs the current task and its children only
417       on nodes specified in nodemask like numa_run_on_node_mask  but  without
418       any cpuset awareness.
419
420       numa_get_run_node_mask()  returns  a  mask of CPUs on which the current
421       task is allowed to run.
422
423       numa_tonode_memory() put memory on a  specific  node.  The  constraints
424       described for numa_interleave_memory() apply here too.
425
426       numa_tonodemask_memory()  put  memory  on  a specific set of nodes. The
427       constraints described for numa_interleave_memory() apply here too.
428
429       numa_setlocal_memory() locates memory on the  current  node.  The  con‐
430       straints described for numa_interleave_memory() apply here too.
431
432       numa_police_memory()  locates  memory with the current NUMA policy. The
433       constraints described for numa_interleave_memory() apply here too.
434
435       numa_distance() reports the distance in the  machine  topology  between
436       two  nodes.   The  factors  are a multiple of 10. It returns 0 when the
437       distance cannot be determined.  A  node  has  distance  10  to  itself.
438       Reporting  the  distance  requires  a Linux kernel version of 2.6.10 or
439       newer.
440
441       numa_set_bind_policy() specifies whether calls that bind  memory  to  a
442       specific  node should use the preferred policy or a strict policy.  The
443       preferred policy allows the kernel to allocate memory  on  other  nodes
444       when  there  isn't enough free on the target node. strict will fail the
445       allocation in that case.  Setting the argument to specifies  strict,  0
446       preferred.  Note that specifying more than one node non strict may only
447       use the first node in some kernel versions.
448
449       numa_set_strict() sets a flag that says whether the functions  allocat‐
450       ing  on specific nodes should use use a strict policy. Strict means the
451       allocation will fail if the memory cannot be allocated  on  the  target
452       node.   Default operation is to fall back to other nodes.  This doesn't
453       apply to interleave and default.
454
455       numa_get_interleave_node() is used by libnuma internally. It is  proba‐
456       bly  not useful for user applications.  It uses the MPOL_F_NODE flag of
457       the get_mempolicy system call, which is not  intended  for  application
458       use (its operation may change or be removed altogether in future kernel
459       versions). See get_mempolicy(2).
460
461       numa_pagesize() returns the number of bytes in page. This  function  is
462       simply  a  fast alternative to repeated calls to the getpagesize system
463       call.  See getpagesize(2).
464
465       numa_sched_getaffinity() retrieves a bitmask of the  cpus  on  which  a
466       task  may run.  The task is specified by pid.  Returns the return value
467       of the sched_getaffinity system call.  See  sched_getaffinity(2).   The
468       bitmask  must  be at least the size of the kernel's cpu mask structure.
469       Use numa_allocate_cpumask() to allocate it.  Test the bits in the  mask
470       by calling numa_bitmask_isbitset().
471
472       numa_sched_setaffinity()  sets  a  task's  allowed cpu's to those cpu's
473       specified in mask.  The task is specified by pid.  Returns  the  return
474       value  of the sched_setaffinity system call.  See sched_setaffinity(2).
475       You may allocate the bitmask with numa_allocate_cpumask().  Or the bit‐
476       mask  may be smaller than the kernel's cpu mask structure. For example,
477       call  numa_bitmask_alloc()  using  a  maximum  number  of   cpus   from
478       numa_num_configured_cpus().   Set  the  bits  in  the  mask  by calling
479       numa_bitmask_setbit().
480
481       numa_node_to_cpus() converts a node number to a bitmask  of  CPUs.  The
482       user  must  pass  a bitmask structure with a mask buffer long enough to
483       represent all possible cpu's.  Use  numa_allocate_cpumask()  to  create
484       it.   If the bitmask is not long enough errno will be set to ERANGE and
485       -1 returned. On success 0 is returned.
486
487       numa_node_of_cpu() returns the node that a cpu belongs to. If the  user
488       supplies  an  invalid  cpu  errno  will be set to EINVAL and -1 will be
489       returned.
490
491       numa_allocate_cpumask () returns a bitmask of a size equal to the  ker‐
492       nel's  cpu  mask (kernel type cpumask_t).  In other words, large enough
493       to represent NR_CPUS cpus.  This number of cpus can be gotten by  call‐
494       ing numa_num_possible_cpus().  The bitmask is zero-filled.
495
496       numa_free_cpumask  frees  a  cpumask  previously allocate by numa_allo‐
497       cate_cpumask.
498
499       numa_allocate_nodemask() returns a bitmask of a size equal to the  ker‐
500       nel's node mask (kernel type nodemask_t).  In other words, large enough
501       to represent MAX_NUMNODES nodes.  This number of nodes can be gotten by
502       calling numa_num_possible_nodes().  The bitmask is zero-filled.
503
504       numa_free_nodemask()  frees a nodemask previous allocated by numa_allo‐
505       cate_nodemask().
506
507       numa_bitmask_alloc() allocates a bitmask structure and  its  associated
508       bit  mask.  The memory allocated for the bit mask contains enough words
509       (type unsigned long) to contain n bits.  The bit mask  is  zero-filled.
510       The bitmask structure points to the bit mask and contains the n value.
511
512       numa_bitmask_clearall()  sets  all bits in the bit mask to 0.  The bit‐
513       mask structure points to the bit mask  and  contains  its  size  (  bmp
514       ->size).   The  value  of  bmp is always returned.  Note that numa_bit‐
515       mask_alloc() creates a zero-filled bit mask.
516
517       numa_bitmask_clearbit() sets a specified bit in a bit mask to 0.  Noth‐
518       ing is done if the n value is greater than the size of the bitmask (and
519       no error is returned). The value of bmp is always returned.
520
521       numa_bitmask_equal() returns 1 if two bitmasks are equal.  It returns 0
522       if  they are not equal.  If the bitmask structures control bit masks of
523       different sizes, the "missing" trailing bits of the  smaller  bit  mask
524       are considered to be 0.
525
526       numa_bitmask_free()  deallocates  the memory of both the bitmask struc‐
527       ture pointed to by bmp and the bit mask.  It is an error to attempt  to
528       free this bitmask twice.
529
530       numa_bitmask_isbitset()  returns  the value of a specified bit in a bit
531       mask.  If the n value is greater than the size of the  bit  map,  0  is
532       returned.
533
534       numa_bitmask_nbytes()  returns the size (in bytes) of the bit mask con‐
535       trolled by bmp.  The bit masks are always  full  words  (type  unsigned
536       long), and the returned size is the actual size of all those words.
537
538       numa_bitmask_setall()  sets all bits in the bit mask to 1.  The bitmask
539       structure points to the bit mask and contains its size (  bmp  ->size).
540       The value of bmp is always returned.
541
542       numa_bitmask_setbit() sets a specified bit in a bit mask to 1.  Nothing
543       is done if n is greater than the size of the bitmask (and no  error  is
544       returned). The value of bmp is always returned.
545
546       copy_bitmask_to_nodemask()  copies the body (the bit map itself) of the
547       bitmask structure pointed to by bmp to the nodemask_t structure pointed
548       to  by  the nodemask pointer. If the two areas differ in size, the copy
549       is truncated to the size of the receiving field or zero-filled.
550
551       copy_nodemask_to_bitmask() copies the nodemask_t structure  pointed  to
552       by the nodemask pointer to the body (the bit map itself) of the bitmask
553       structure pointed to by the bmp pointer. If the  two  areas  differ  in
554       size, the copy is truncated to the size of the receiving field or zero-
555       filled.
556
557       copy_bitmask_to_bitmask() copies the body (the bit map itself)  of  the
558       bitmask  structure pointed to by the bmpfrom pointer to the body of the
559       bitmask structure pointed to by the bmpto pointer.  If  the  two  areas
560       differ  in  size,  the  copy  is truncated to the size of the receiving
561       field or zero-filled.
562
563       numa_bitmask_weight() returns a count of the bits that are set  in  the
564       body of the bitmask pointed to by the bmp argument.
565
566       numa_move_pages()  moves  a  list  of pages in the address space of the
567       currently executing or current process.  It simply uses the  move_pages
568       system call.
569       pid - ID of task.  If not valid, use the current task.
570       count - Number of pages.
571       pages - List of pages to move.
572       nodes - List of nodes to which pages can be moved.
573       status - Field to which status is to be returned.
574       flags - MPOL_MF_MOVE or MPOL_MF_MOVE_ALL
575       See move_pages(2).
576
577       numa_migrate_pages() simply uses the migrate_pages system call to cause
578       the pages of the calling task, or a specified task, to be migated  from
579       one set of nodes to another.  See migrate_pages(2).  The bit masks rep‐
580       resenting the nodes should be allocated with numa_allocate_nodemask() ,
581       or   with   numa_bitmask_alloc()   using   an  n  value  returned  from
582       numa_num_possible_nodes().  A task's current node set can be gotten  by
583       calling  numa_get_membind().   Bits  in  the tonodes mask can be set by
584       calls to numa_bitmask_setbit().
585
586       numa_error() is a libnuma internal function that can be  overridden  by
587       the  user program.  This function is called with a char * argument when
588       a libnuma function fails.  Overriding the library  internal  definition
589       makes it possible to specify a different error handling strategy when a
590       libnuma function fails.  It  does  not  affect  numa_available().   The
591       numa_error()  function defined in libnuma prints an error on stderr and
592       terminates the program if  numa_exit_on_error  is  set  to  a  non-zero
593       value.  The default value of numa_exit_on_error is zero.
594
595       numa_warn()  is a libnuma internal function that can be also overridden
596       by the user program.  It is called to warn  the  user  when  a  libnuma
597       function  encounters  a  non-fatal  error.   The default implementation
598       prints a warning to stderr.  The first  argument  is  a  unique  number
599       identifying  each warning. After that there is a printf(3)-style format
600       string and a variable number of arguments.  numa_warn exits the program
601       when  numa_exit_on_warn  is set to a non-zero value.  The default value
602       of numa_exit_on_warn is zero.
603
604

Compatibility with libnuma version 1

606       Binaries that were compiled for libnuma version 1 need not  be  re-com‐
607       piled to run with libnuma version 2.
608       Source  codes  written for libnuma version 1 may be re-compiled without
609       change with version 2 installed. To do so, in the code's  Makefile  add
610       this option to CFLAGS:  -DNUMA_VERSION1_COMPATIBILITY
611
612

THREAD SAFETY

614       numa_set_bind_policy  and  numa_exit_on_error  are  process global. The
615       other calls are thread safe.
616
617
619       Copyright 2002, 2004, 2007, 2008 Andi Kleen,  SuSE  Labs.   libnuma  is
620       under the GNU Lesser General Public License, v2.1.
621
622

SEE ALSO

624       get_mempolicy(2),  set_mempolicy(2), getpagesize(2), mbind(2), mmap(2),
625       shmat(2),   numactl(8),    sched_getaffinity(2)    sched_setaffinity(2)
626       move_pages(2) migrate_pages(2)
627
628
629
630SuSE Labs                        December 2007                         NUMA(3)
Impressum