1fi_atomic(3)                   Libfabric v1.8.0                   fi_atomic(3)
2
3
4

NAME

6       fi_atomic - Remote atomic functions
7
8       fi_atomic / fi_atomicv / fi_atomicmsg / fi_inject_atomic
9              Initiates an atomic operation to remote memory
10
11       fi_fetch_atomic / fi_fetch_atomicv / fi_fetch_atomicmsg
12              Initiates  an  atomic operation to remote memory, retrieving the
13              initial value.
14
15       fi_compare_atomic / fi_compare_atomicv / fi_compare_atomicmsg
16              Initiates an atomic compare-operation to remote memory, retriev‐
17              ing the initial value.
18
19       fi_atomicvalid   /   fi_fetch_atomicvalid  /  fi_compare_atomicvalid  /
20       fi_query_atomic : Indicates if a provider supports  a  specific  atomic
21       operation
22

SYNOPSIS

24              #include <rdma/fi_atomic.h>
25
26              ssize_t fi_atomic(struct fid_ep *ep, const void *buf,
27                  size_t count, void *desc, fi_addr_t dest_addr,
28                  uint64_t addr, uint64_t key,
29                  enum fi_datatype datatype, enum fi_op op, void *context);
30
31              ssize_t fi_atomicv(struct fid_ep *ep, const struct fi_ioc *iov,
32                  void **desc, size_t count, fi_addr_t dest_addr,
33                  uint64_t addr, uint64_t key,
34                  enum fi_datatype datatype, enum fi_op op, void *context);
35
36              ssize_t fi_atomicmsg(struct fid_ep *ep, const struct fi_msg_atomic *msg,
37                  uint64_t flags);
38
39              ssize_t fi_inject_atomic(struct fid_ep *ep, const void *buf,
40                  size_t count, fi_addr_t dest_addr,
41                  uint64_t addr, uint64_t key,
42                  enum fi_datatype datatype, enum fi_op op);
43
44              ssize_t fi_fetch_atomic(struct fid_ep *ep, const void *buf,
45                  size_t count, void *desc, void *result, void *result_desc,
46                  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
47                  enum fi_datatype datatype, enum fi_op op, void *context);
48
49              ssize_t fi_fetch_atomicv(struct fid_ep *ep, const struct fi_ioc *iov,
50                  void **desc, size_t count, struct fi_ioc *resultv,
51                  void **result_desc, size_t result_count, fi_addr_t dest_addr,
52                  uint64_t addr, uint64_t key, enum fi_datatype datatype,
53                  enum fi_op op, void *context);
54
55              ssize_t fi_fetch_atomicmsg(struct fid_ep *ep,
56                  const struct fi_msg_atomic *msg, struct fi_ioc *resultv,
57                  void **result_desc, size_t result_count, uint64_t flags);
58
59              ssize_t fi_compare_atomic(struct fid_ep *ep, const void *buf,
60                  size_t count, void *desc, const void *compare,
61                  void *compare_desc, void *result, void *result_desc,
62                  fi_addr_t dest_addr, uint64_t addr, uint64_t key,
63                  enum fi_datatype datatype, enum fi_op op, void *context);
64
65              size_t fi_compare_atomicv(struct fid_ep *ep, const struct fi_ioc *iov,
66                     void **desc, size_t count, const struct fi_ioc *comparev,
67                     void **compare_desc, size_t compare_count, struct fi_ioc *resultv,
68                     void **result_desc, size_t result_count, fi_addr_t dest_addr,
69                     uint64_t addr, uint64_t key, enum fi_datatype datatype,
70                     enum fi_op op, void *context);
71
72              ssize_t fi_compare_atomicmsg(struct fid_ep *ep,
73                  const struct fi_msg_atomic *msg, const struct fi_ioc *comparev,
74                  void **compare_desc, size_t compare_count,
75                  struct fi_ioc *resultv, void **result_desc, size_t result_count,
76                  uint64_t flags);
77
78              int fi_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype,
79                  enum fi_op op, size_t *count);
80
81              int fi_fetch_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype,
82                  enum fi_op op, size_t *count);
83
84              int fi_compare_atomicvalid(struct fid_ep *ep, enum fi_datatype datatype,
85                  enum fi_op op, size_t *count);
86
87              int fi_query_atomic(struct fid_domain *domain,
88                  enum fi_datatype datatype, enum fi_op op,
89                  struct fi_atomic_attr *attr, uint64_t flags);
90

ARGUMENTS

92       ep     Fabric endpoint on which to initiate atomic operation.
93
94       buf    Local  data buffer that specifies first operand of atomic opera‐
95              tion
96
97       iov / comparev / resultv
98              Vectored data buffer(s).
99
100       count / compare_count / result_count
101              Count of vectored data entries.  The number of  elements  refer‐
102              enced, where each element is the indicated datatype.
103
104       addr   Address of remote memory to access.
105
106       key    Protection key associated with the remote memory.
107
108       datatype
109              Datatype associated with atomic operands
110
111       op     Atomic operation to perform
112
113       compare
114              Local compare buffer, containing comparison data.
115
116       result Local data buffer to store initial value of remote buffer
117
118       desc / compare_desc / result_desc
119              Data  descriptor  associated  with  the local data buffer, local
120              compare buffer, and local result buffer, respectively.
121
122       dest_addr
123              Destination address for connectionless atomic  operations.   Ig‐
124              nored for connected endpoints.
125
126       msg    Message descriptor for atomic operations
127
128       flags  Additional flags to apply for the atomic operation
129
130       context
131              User  specified  pointer  to associate with the operation.  This
132              parameter is ignored if the operation will not generate  a  suc‐
133              cessful  completion, unless an op flag specifies the context pa‐
134              rameter be used for required input.
135

DESCRIPTION

137       Atomic transfers are used to read and update  data  located  in  remote
138       memory regions in an atomic fashion.  Conceptually, they are similar to
139       local atomic operations of a similar nature  (e.g.   atomic  increment,
140       compare and swap, etc.).  Updates to remote data involve one of several
141       operations on the data, and act on specific types of  data,  as  listed
142       below.   As  such, atomic transfers have knowledge of the format of the
143       data being accessed.  A single atomic function may  operate  across  an
144       array of data applying an atomic operation to each entry, but the atom‐
145       icity of an operation is limited to a single datatype or entry.
146
147   Atomic Data Types
148       Atomic functions may operate on one of the  following  identified  data
149       types.   A  given  atomic function may support any datatype, subject to
150       provider implementation constraints.
151
152       FI_INT8
153              Signed 8-bit integer.
154
155       FI_UINT8
156              Unsigned 8-bit integer.
157
158       FI_INT16
159              Signed 16-bit integer.
160
161       FI_UINT16
162              Unsigned 16-bit integer.
163
164       FI_INT32
165              Signed 32-bit integer.
166
167       FI_UINT32
168              Unsigned 32-bit integer.
169
170       FI_INT64
171              Signed 64-bit integer.
172
173       FI_UINT64
174              Unsigned 64-bit integer.
175
176       FI_FLOAT
177              A single-precision floating point value (IEEE 754).
178
179       FI_DOUBLE
180              A double-precision floating point value (IEEE 754).
181
182       FI_FLOAT_COMPLEX
183              An ordered pair of single-precision floating point values  (IEEE
184              754),  with  the  first value representing the real portion of a
185              complex number and the second representing  the  imaginary  por‐
186              tion.
187
188       FI_DOUBLE_COMPLEX
189              An  ordered pair of double-precision floating point values (IEEE
190              754), with the first value representing the real  portion  of  a
191              complex  number  and  the second representing the imaginary por‐
192              tion.
193
194       FI_LONG_DOUBLE
195              A double-extended precision floating  point  value  (IEEE  754).
196              Note  that the size of a long double and number of bits used for
197              precision is compiler, platform, and/or provider specific.   De‐
198              velopers  that  use  long double should ensure that libfabric is
199              built using a long double format that is compatible  with  their
200              application,  and that format is supported by the provider.  The
201              mechanism used for this validation is currently beyond the scope
202              of the libfabric API.
203
204       FI_LONG_DOUBLE_COMPLEX
205              An ordered pair of double-extended precision floating point val‐
206              ues (IEEE 754), with the first value representing the real  por‐
207              tion  of a complex number and the second representing the imagi‐
208              nary portion.
209
210   Atomic Operations
211       The following atomic operations are defined.  An atomic operation often
212       acts against a target value in the remote memory buffer and source val‐
213       ue provided with the atomic function.  It may also carry source data to
214       replace  the target value in compare and swap operations.  A conceptual
215       description of each operation is provided.
216
217       FI_MIN Minimum
218
219              if (buf[i] < addr[i])
220                  addr[i] = buf[i]
221
222       FI_MAX Maximum
223
224              if (buf[i] > addr[i])
225                  addr[i] = buf[i]
226
227       FI_SUM Sum
228
229              addr[i] = addr[i] + buf[i]
230
231       FI_PROD
232              Product
233
234              addr[i] = addr[i] * buf[i]
235
236       FI_LOR Logical OR
237
238              addr[i] = (addr[i] || buf[i])
239
240       FI_LAND
241              Logical AND
242
243              addr[i] = (addr[i] && buf[i])
244
245       FI_BOR Bitwise OR
246
247              addr[i] = addr[i] | buf[i]
248
249       FI_BAND
250              Bitwise AND
251
252              addr[i] = addr[i] & buf[i]
253
254       FI_LXOR
255              Logical exclusive-OR (XOR)
256
257              addr[i] = ((addr[i] && !buf[i]) || (!addr[i] && buf[i]))
258
259       FI_BXOR
260              Bitwise exclusive-OR (XOR)
261
262              addr[i] = addr[i] ^ buf[i]
263
264       FI_ATOMIC_READ
265              Read data atomically
266
267              result[i] = addr[i]
268
269       FI_ATOMIC_WRITE
270              Write data atomically
271
272              addr[i] = buf[i]
273
274       FI_CSWAP
275              Compare values and if equal swap with data
276
277              if (compare[i] == addr[i])
278                  addr[i] = buf[i]
279
280       FI_CSWAP_NE
281              Compare values and if not equal swap with data
282
283              if (compare[i] != addr[i])
284                  addr[i] = buf[i]
285
286       FI_CSWAP_LE
287              Compare values and if less than or equal swap with data
288
289              if (compare[i] <= addr[i])
290                  addr[i] = buf[i]
291
292       FI_CSWAP_LT
293              Compare values and if less than swap with data
294
295              if (compare[i] < addr[i])
296                  addr[i] = buf[i]
297
298       FI_CSWAP_GE
299              Compare values and if greater than or equal swap with data
300
301              if (compare[i] >= addr[i])
302                  addr[i] = buf[i]
303
304       FI_CSWAP_GT
305              Compare values and if greater than swap with data
306
307              if (compare[i] > addr[i])
308                  addr[i] = buf[i]
309
310       FI_MSWAP
311              Swap masked bits with data
312
313              addr[i] = (buf[i] & compare[i]) | (addr[i] & ~compare[i])
314
315   Base Atomic Functions
316       The base atomic functions -- fi_atomic, fi_atomicv, fi_atomicmsg -- are
317       used  to transmit data to a remote node, where the specified atomic op‐
318       eration is performed against the target data.  The  result  of  a  base
319       atomic  function  is stored at the remote memory region.  The main dif‐
320       ference between atomic functions are the number and type of  parameters
321       that  they  accept  as input.  Otherwise, they perform the same general
322       function.
323
324       The call fi_atomic transfers the data contained in  the  user-specified
325       data  buffer to a remote node.  For unconnected endpoints, the destina‐
326       tion endpoint is specified through the dest_addr parameter.  Unless the
327       endpoint  has  been configured differently, the data buffer passed into
328       fi_atomic must not be touched by the application  until  the  fi_atomic
329       call  completes asynchronously.  The target buffer of a base atomic op‐
330       eration must allow for remote read an/or write access, as appropriate.
331
332       The fi_atomicv call adds support for a scatter-gather list to  fi_atom‐
333       ic.  The fi_atomicv transfers the set of data buffers referenced by the
334       ioc parameter to the remote node for processing.
335
336       The fi_inject_atomic call is an optimized version  of  fi_atomic.   The
337       fi_inject_atomic  function  behaves  as  if the FI_INJECT transfer flag
338       were set, and FI_COMPLETION were not.  That  is,  the  data  buffer  is
339       available  for reuse immediately on returning from from fi_inject_atom‐
340       ic, and no completion event will be generated  for  this  atomic.   The
341       completion  event  will be suppressed even if the endpoint has not been
342       configured with FI_SELECTIVE_COMPLETION.  See the flags discussion  be‐
343       low for more details.  The requested message size that can be used with
344       fi_inject_atomic is limited by inject_size.
345
346       The fi_atomicmsg call supports atomic functions over both connected and
347       unconnected endpoints, with the ability to control the atomic operation
348       per call through the use of flags.  The fi_atomicmsg function  takes  a
349       struct fi_msg_atomic as input.
350
351              struct fi_msg_atomic {
352                  const struct fi_ioc *msg_iov; /* local scatter-gather array */
353                  void                **desc;   /* local access descriptors */
354                  size_t              iov_count;/* # elements in ioc */
355                  const void          *addr;    /* optional endpoint address */
356                  const struct fi_rma_ioc *rma_iov; /* remote SGL */
357                  size_t              rma_iov_count;/* # elements in remote SGL */
358                  enum fi_datatype    datatype; /* operand datatype */
359                  enum fi_op          op;       /* atomic operation */
360                  void                *context; /* user-defined context */
361                  uint64_t            data;     /* optional data */
362              };
363
364              struct fi_ioc {
365                  void        *addr;    /* local address */
366                  size_t      count;    /* # target operands */
367              };
368
369              struct fi_rma_ioc {
370                  uint64_t    addr;     /* target address */
371                  size_t      count;    /* # target operands */
372                  uint64_t    key;      /* access key */
373              };
374
375       The following list of atomic operations are usable with base atomic op‐
376       erations: FI_MIN, FI_MAX, FI_SUM,  FI_PROD,  FI_LOR,  FI_LAND,  FI_BOR,
377       FI_BAND, FI_LXOR, FI_BXOR, and FI_ATOMIC_WRITE.
378
379   Fetch-Atomic Functions
380       The  fetch  atomic  functions -- fi_fetch_atomic, fi_fetch_atomicv, and
381       fi_fetch atomicmsg -- behave similar  to  the  equivalent  base  atomic
382       function.   The  difference between the fetch and base atomic calls are
383       the fetch atomic routines return the initial value that was  stored  at
384       the  target  to the user.  The initial value is read into the user pro‐
385       vided result buffer.  The target buffer of fetch-atomic operations must
386       be enabled for remote read access.
387
388       The  following  list  of atomic operations are usable with fetch atomic
389       operations: FI_MIN, FI_MAX, FI_SUM, FI_PROD, FI_LOR,  FI_LAND,  FI_BOR,
390       FI_BAND, FI_LXOR, FI_BXOR, FI_ATOMIC_READ, and FI_ATOMIC_WRITE.
391
392       For   FI_ATOMIC_READ   operations,  the  source  buffer  operand  (e.g.
393       fi_fetch_atomic buf parameter) is ignored and may be NULL.  The results
394       are written into the result buffer.
395
396   Compare-Atomic Functions
397       The  compare atomic functions -- fi_compare_atomic, fi_compare_atomicv,
398       and fi_compare atomicmsg -- are used for operations that  require  com‐
399       paring  the target data against a value before performing a swap opera‐
400       tion.  The compare atomic  functions  support:  FI_CSWAP,  FI_CSWAP_NE,
401       FI_CSWAP_LE, FI_CSWAP_LT, FI_CSWAP_GE, FI_CSWAP_GT, and FI_MSWAP.
402
403   Atomic Valid Functions
404       The atomic valid functions -- fi_atomicvalid, fi_fetch_atomicvalid, and
405       fi_compare_atomicvalid --indicate which operations the  local  provider
406       supports.  Needed operations not supported by the provider must be emu‐
407       lated by the application.  Each valid call  corresponds  to  a  set  of
408       atomic  functions.  fi_atomicvalid checks whether a provider supports a
409       specific base atomic operation for  a  given  datatype  and  operation.
410       fi_fetch_atomicvalid  indicates  if  a  provider  supports  a  specific
411       fetch-atomic operation for a given datatype and operation.  And fi_com‐
412       pare_atomicvalid  checks  if  a  provider  supports  a  specified  com‐
413       pare-atomic operation for a given datatype and operation.
414
415       If an operation is supported, an atomic valid call will return 0, along
416       with  a count of atomic data units that a single function call will op‐
417       erate on.
418
419   Query Atomic Attributes
420       The fi_query_atomic call acts as an  enhanced  atomic  valid  operation
421       (see  the atomic valid function definitions above).  It is provided, in
422       part, for future extensibility.   The  query  operation  reports  which
423       atomic  operations are supported by the domain, for suitably configured
424       endpoints.
425
426       The behavior of fi_query_atomic is adjusted based on the flags  parame‐
427       ter.   If  flags  is 0, then the operation reports the supported atomic
428       attributes for base atomic operations, similar  to  fi_atomicvalid  for
429       endpoints.  If flags has the FI_FETCH_ATOMIC bit set, the operation be‐
430       haves similar to fi_fetch_atomicvalid.  Similarly, the flag bit FI_COM‐
431       PARE_ATOMIC  results  in  query  acting as fi_compare_atomicvalid.  The
432       FI_FETCH_ATOMIC and FI_COMPARE_ATOMIC bits may not both be set.
433
434       If the FI_TAGGED bit is set, the provider will indicate if it  supports
435       atomic  operations to tagged receive buffers.  The FI_TAGGED bit may be
436       used by itself, or in conjunction with the FI_FETCH_ATOMIC and  FI_COM‐
437       PARE_ATOMIC flags.
438
439       The output of fi_query_atomic is struct fi_atomic_attr:
440
441              struct fi_atomic_attr {
442                  size_t count;
443                  size_t size;
444              };
445
446       The  count  attribute  field  is as defined for the atomic valid calls.
447       The size field indicates the size in bytes of the atomic datatype.
448
449   Completions
450       Completed atomic operations are reported to the initiator  of  the  re‐
451       quest through an associated completion queue or counter.  Any user pro‐
452       vided context specified with the request will be returned  as  part  of
453       any  completion  event written to a CQ.  See fi_cq for completion event
454       details.
455
456       Any results returned to the initiator as part of  an  atomic  operation
457       will  be  available  prior to a completion event being generated.  This
458       will be true even if the requested completion semantic provides a weak‐
459       er  guarantee.   That is, atomic fetch operations have FI_DELIVERY_COM‐
460       PLETE semantics.  Completions generated for other types of atomic oper‐
461       ations indicate that it is safe to re-use the source data buffers.
462
463       Any  updates to data at the target of an atomic operation will be visi‐
464       ble to agents (CPU processes, NICs, and other devices)  on  the  target
465       node  prior to one of the following occurring.  If the atomic operation
466       generates a completion event or updates a  completion  counter  at  the
467       target  endpoint, the results will be available prior to the completion
468       notification.  After processing a completion for  the  atomic,  if  the
469       initiator  submits a transfer between the same endpoints that generates
470       a completion at the target, the results will be available prior to  the
471       subsequent  transfer's  event.   Or, if a fenced data transfer from the
472       initiator follows the atomic request, the  results  will  be  available
473       prior to a completion at the target for the fenced transfer.
474
475       The correctness of atomic operations on a target memory region is guar‐
476       anteed only when performed by a single actor  for  a  given  window  of
477       time.   An actor is defined as a single libfabric domain (identified by
478       the domain name, and not an open instance of that domain),  a  coherent
479       CPU  complex,  or other device (e.g.  GPU) capable of performing atomic
480       operations on the target memory.  The results of atomic operations per‐
481       formed  by  multiple actors simultaneously are undefined.  For example,
482       issuing CPU based atomic operations to a target region concurrently be‐
483       ing  updated by NIC based atomics may leave the region's data in an un‐
484       known state.  The results of a first actor's atomic operations must  be
485       visible  to  a  second  actor prior to the second actor issuing its own
486       atomics.
487

FLAGS

489       The fi_atomicmsg, fi_fetch_atomicmsg,  and  fi_compare_atomicmsg  calls
490       allow  the  user  to  specify  flags  which can change the default data
491       transfer operation.  Flags specified  with  atomic  message  operations
492       override  most  flags  previously  configured with the endpoint, except
493       where noted (see fi_control).  The following list of flags  are  usable
494       with atomic message calls.
495
496       FI_COMPLETION
497              Indicates  that  a  completion entry should be generated for the
498              specified operation.  The endpoint must be bound to a completion
499              queue with FI_SELECTIVE_COMPLETION that corresponds to the spec‐
500              ified operation, or this flag is ignored.
501
502       FI_MORE
503              Indicates that the user has additional requests that will  imme‐
504              diately  be  posted after the current call returns.  Use of this
505              flag may improve performance by enabling the provider  to  opti‐
506              mize its access to the fabric hardware.
507
508       FI_INJECT
509              Indicates  that  the  control of constant data buffers should be
510              returned to the user immediately after the call returns, even if
511              the  operation is handled asynchronously.  This may require that
512              the underlying provider implementation copy the data into a  lo‐
513              cal  buffer and transfer out of that buffer.  Constant data buf‐
514              fers refers to any data buffer or iovec used by the atomic  APIs
515              that  are marked as 'const'.  Non-constant or output buffers are
516              unaffected by this flag and may be accessed by the  provider  at
517              anytime  until  the operation has completed.  This flag can only
518              be used with messages smaller than inject_size.
519
520       FI_FENCE
521              Applies to transmits.  Indicates that the  requested  operation,
522              also known as the fenced operation, and any operation posted af‐
523              ter the fenced operation will be deferred until all previous op‐
524              erations targeting the same peer endpoint have completed.  Oper‐
525              ations posted after the fencing will see and/or replace the  re‐
526              sults of any operations initiated prior to the fenced operation.
527
528       The ordering of operations starting at the posting of the fenced opera‐
529       tion (inclusive) to the posting of a subsequent fenced  operation  (ex‐
530       clusive) is controlled by the endpoint's ordering semantics.
531
532       FI_TAGGED
533              Specifies  that  the  target of the atomic operation is a tagged
534              receive buffer instead of an RMA buffer.  When a  tagged  buffer
535              is  the  target  memory  region, the addr parameter is used as a
536              0-based byte offset into the tagged buffer, with the key parame‐
537              ter specifying the tag.
538

RETURN VALUE

540       Returns 0 on success.  On error, a negative value corresponding to fab‐
541       ric errno is returned.  Fabric errno values are defined in  rdma/fi_er‐
542       rno.h.
543

ERRORS

545       -FI_EAGAIN
546              See fi_msg(3) for a detailed description of handling FI_EAGAIN.
547
548       -FI_EOPNOTSUPP
549              The  requested  atomic  operation  is not supported on this end‐
550              point.
551
552       -FI_EMSGSIZE
553              The number of atomic operations in a single request exceeds that
554              supported by the underlying provider.
555

NOTES

557       Atomic  operations  operate  on  an  array of values of a specific data
558       type.  Atomicity is only guaranteed for each data type  operation,  not
559       across  the  entire array.  The following pseudo-code demonstrates this
560       operation for 64-bit unsigned  atomic  write.   ATOMIC_WRITE_U64  is  a
561       platform  dependent  macro that atomically writes 8 bytes to an aligned
562       memory location.
563
564              fi_atomic(ep, buf, count, NULL, dest_addr, addr, key,
565                    FI_UINT64, FI_ATOMIC_WRITE, context)
566              {
567                  for (i = 1; i < count; i ++)
568                      ATOMIC_WRITE_U64(((uint64_t *) addr)[i],
569                               ((uint64_t *) buf)[i]);
570              }
571
572       The number of array elements to operate on is specified through a count
573       parameter.  This must be between 1 and the maximum returned through the
574       relevant valid operation, inclusive.  The requested operation and  data
575       type must also be valid for the given provider.
576
577       The  ordering of atomic operations carried as part of different request
578       messages is subject to the message and data  ordering  definitions  as‐
579       signed  to  the transmitting and receiving endpoints.  Both message and
580       data ordering are required if the results of two atomic  operations  to
581       the  same  memory buffers are to reflect the second operation acting on
582       the results of the first.  See fi_endpoint(3) for further  details  and
583       message size restrictions.
584

SEE ALSO

586       fi_getinfo(3), fi_endpoint(3), fi_domain(3), fi_cq(3), fi_rma(3)
587

AUTHORS

589       OpenFabrics.
590
591
592
593Libfabric Programmer's Manual     2019-02-27                      fi_atomic(3)
Impressum