1string_copying(7)      Miscellaneous Information Manual      string_copying(7)
2
3
4

NAME

6       stpcpy,  strcpy,  strcat,  stpecpy, strlcpy, strlcat, stpncpy, strncpy,
7       zustr2ustp, zustr2stp, strncat, ustpcpy, ustr2stp - copying strings and
8       character sequences
9

SYNOPSIS

11   Strings
12       // Chain-copy a string.
13       char *stpcpy(char *restrict dst, const char *restrict src);
14
15       // Copy/catenate a string.
16       char *strcpy(char *restrict dst, const char *restrict src);
17       char *strcat(char *restrict dst, const char *restrict src);
18
19       // Chain-copy a string with truncation.
20       char *stpecpy(char *dst, char end[0], const char *restrict src);
21
22       // Copy/catenate a string with truncation.
23       size_t strlcpy(char dst[restrict .sz], const char *restrict src,
24                      size_t sz);
25       size_t strlcat(char dst[restrict .sz], const char *restrict src,
26                      size_t sz);
27
28   Null-padded character sequences
29       // Zero a fixed-width buffer, and
30       // copy a string into a character sequence with truncation.
31       char *stpncpy(char dst[restrict .sz], const char *restrict src,
32                      size_t sz);
33
34       // Zero a fixed-width buffer, and
35       // copy a string into a character sequence with truncation.
36       char *strncpy(char dst[restrict .sz], const char *restrict src,
37                      size_t sz);
38
39       // Chain-copy a null-padded character sequence into a character sequence.
40       char *zustr2ustp(char *restrict dst, const char src[restrict .sz],
41                      size_t sz);
42
43       // Chain-copy a null-padded character sequence into a string.
44       char *zustr2stp(char *restrict dst, const char src[restrict .sz],
45                      size_t sz);
46
47       // Catenate a null-padded character sequence into a string.
48       char *strncat(char *restrict dst, const char src[restrict .sz],
49                      size_t sz);
50
51   Measured character sequences
52       // Chain-copy a measured character sequence.
53       char *ustpcpy(char *restrict dst, const char src[restrict .len],
54                      size_t len);
55
56       // Chain-copy a measured character sequence into a string.
57       char *ustr2stp(char *restrict dst, const char src[restrict .len],
58                      size_t len);
59

DESCRIPTION

61   Terms (and abbreviations)
62       string (str)
63              is  a sequence of zero or more non-null characters followed by a
64              null byte.
65
66       character sequence
67              is a sequence of zero or more non-null  characters.   A  program
68              should  never  use  a  character  sequence where a string is re‐
69              quired.  However, with appropriate care, a string can be used in
70              the place of a character sequence.
71
72              null-padded character sequence (zustr)
73                     Character  sequences can be contained in fixed-width buf‐
74                     fers, which contain padding null bytes after the  charac‐
75                     ter  sequence, to fill the rest of the buffer without af‐
76                     fecting the character sequence;  however,  those  padding
77                     null bytes are not part of the character sequence.
78
79              measured character sequence (ustr)
80                     Character  sequence delimited by its length.  It may be a
81                     slice of a  larger  character  sequence,  or  even  of  a
82                     string.
83
84       length (len)
85              is  the  number  of non-null characters in a string or character
86              sequence.   It  is  the  return  value  of  strlen(str)  and  of
87              strnlen(ustr, sz).
88
89       size (sz)
90              refers  to  the  entire buffer where the string or character se‐
91              quence is contained.
92
93       end    is the name of a pointer to one past the last element of a  buf‐
94              fer.   It  is  equivalent to &str[sz].  It is used as a sentinel
95              value, to be able to truncate strings or character sequences in‐
96              stead of overrunning the containing buffer.
97
98       copy   This  term  is used when the writing starts at the first element
99              pointed to by dst.
100
101       catenate
102              This term is used when a function first  finds  the  terminating
103              null byte in dst, and then starts writing at that position.
104
105       chain  This  term  is  used  when  it's  the  programmer who provides a
106              pointer to the terminating null byte in the string dst  (or  one
107              after the last character in a character sequence), and the func‐
108              tion starts writing at that location.  The  function  returns  a
109              pointer to the new location of the terminating null byte (or one
110              after the last character in  a  character  sequence)  after  the
111              call, so that the programmer can use it to chain such calls.
112
113   Copy, catenate, and chain-copy
114       Originally,  there  was  a  distinction between functions that copy and
115       those that catenate.  However, newer functions that copy while allowing
116       chaining  cover  both use cases with a single API.  They are also algo‐
117       rithmically faster, since they don't need to search for the terminating
118       null  byte  of  the  existing string.  However, functions that catenate
119       have a much simpler use, so if performance is  not  important,  it  can
120       make sense to use them for improving readability.
121
122       The pointer returned by functions that allow chaining is a byproduct of
123       the copy operation, so it has no performance costs.  Functions that re‐
124       turn  such  a  pointer, and thus can be chained, have names of the form
125       *stp*(), since it's common to name the pointer just p.
126
127       Chain-copying functions that truncate should accept a  pointer  to  the
128       end  of  the  destination  buffer, and have names of the form *stpe*().
129       This allows not having to recalculate the  remaining  size  after  each
130       call.
131
132   Truncate or not?
133       The first thing to note is that programmers should be careful with buf‐
134       fers, so they always have the correct size, and truncation is not  nec‐
135       essary.
136
137       In  most cases, truncation is not desired, and it is simpler to just do
138       the copy.  Simpler code is safer code.  Programming against programming
139       mistakes  by  adding more code just adds more points where mistakes can
140       be made.
141
142       Nowadays, compilers can detect most  programmer  errors  with  features
143       like  compiler  warnings,  static  analyzers,  and _FORTIFY_SOURCE (see
144       ftm(7)).  Keeping the code simple helps these  overflow-detection  fea‐
145       tures be more precise.
146
147       When  validating  user input, however, it makes sense to truncate.  Re‐
148       member to check the return value of such function calls.
149
150       Functions that truncate:
151
152stpecpy(3) is the most efficient string copy function that  performs
153          truncation.  It only requires to check for truncation once after all
154          chained calls.
155
156strlcpy(3bsd) and strlcat(3bsd) are similar, but less efficient when
157          chained.
158
159stpncpy(3)  and  strncpy(3)  also  truncate,  but  they  don't write
160          strings, but rather null-padded character sequences.
161
162   Null-padded character sequences
163       For historic reasons, some standard APIs, such as utmpx(5),  use  null-
164       padded  character  sequences in fixed-width buffers.  To interface with
165       them, specialized functions need to be used.
166
167       To copy strings into them, use stpncpy(3).
168
169       To copy from an unterminated string within a fixed-width buffer into  a
170       string, ignoring any trailing null bytes in the source fixed-width buf‐
171       fer, you should use zustr2stp(3) or strncat(3).
172
173       To copy from an unterminated string within a fixed-width buffer into  a
174       character  sequence,  ignoring  any  trailing  null bytes in the source
175       fixed-width buffer, you should use zustr2ustp(3).
176
177   Measured character sequences
178       The simplest character sequence copying function is mempcpy(3).  It re‐
179       quires always knowing the length of your character sequences, for which
180       structures can be used.  It makes the code much faster, since  you  al‐
181       ways  know the length of your character sequences, and can do the mini‐
182       mal copies and length measurements.  mempcpy(3)  copies  character  se‐
183       quences, so you need to explicitly set the terminating null byte if you
184       need a string.
185
186       However, for keeping type safety, it's good to add a wrapper that  uses
187       char * instead of void *: ustpcpy(3).
188
189       In  programs  that  make  considerable  use of strings or character se‐
190       quences, and need the best performance, using overlapping character se‐
191       quences can make a big difference.  It allows holding subsequences of a
192       larger character sequence, while not duplicating memory nor using  time
193       to do a copy.
194
195       However, this is delicate, since it requires using character sequences.
196       C library APIs use strings, so programs that  use  character  sequences
197       will  have  to  take care of differentiating strings from character se‐
198       quences.
199
200       To copy a measured character sequence, use ustpcpy(3).
201
202       To copy a measured character sequence into a string, use ustr2stp(3).
203
204       Because these functions ask for the length, and a string is  by  nature
205       composed  of a character sequence of the same length plus a terminating
206       null byte, a string is also accepted as input.
207
208   String vs character sequence
209       Some functions only operate on strings.  Those require that  the  input
210       src  is  a string, and guarantee an output string (even when truncation
211       occurs).  Functions that catenate also require that dst holds a  string
212       before the call.  List of functions:
213
214stpcpy(3)
215strcpy(3), strcat(3)
216stpecpy(3)
217strlcpy(3bsd), strlcat(3bsd)
218
219       Other  functions  require  an  input string, but create a character se‐
220       quence as output.  These functions have confusing  names,  and  have  a
221       long history of misuse.  List of functions:
222
223stpncpy(3)
224strncpy(3)
225
226       Other  functions  operate on an input character sequence, and create an
227       output string.  Functions that catenate also require that dst  holds  a
228       string  before  the  call.  strncat(3) has an even more misleading name
229       than the functions above.  List of functions:
230
231zustr2stp(3)
232strncat(3)
233ustr2stp(3)
234
235       Other functions operate on an input character  sequence  to  create  an
236       output character sequence.  List of functions:
237
238ustpcpy(3)
239zustr2stp(3)
240
241   Functions
242       stpcpy(3)
243              This function copies the input string into a destination string.
244              The programmer is responsible  for  allocating  a  buffer  large
245              enough.  It returns a pointer suitable for chaining.
246
247       strcpy(3)
248       strcat(3)
249              These functions copy and catenate the input string into a desti‐
250              nation string.  The programmer is responsible for  allocating  a
251              buffer large enough.  The return value is useless.
252
253              stpcpy(3) is a faster alternative to these functions.
254
255       stpecpy(3)
256              This function copies the input string into a destination string.
257              If the destination buffer, limited by  a  pointer  to  its  end,
258              isn't  large  enough  to  hold the copy, the resulting string is
259              truncated (but it is guaranteed to be null-terminated).  It  re‐
260              turns  a  pointer suitable for chaining.  Truncation needs to be
261              detected only once after the last chained call.
262
263              This function is not provided by any library; see EXAMPLES for a
264              reference implementation.
265
266       strlcpy(3bsd)
267       strlcat(3bsd)
268              These functions copy and catenate the input string into a desti‐
269              nation string.  If the destination buffer, limited by its  size,
270              isn't  large  enough  to  hold the copy, the resulting string is
271              truncated (but it is guaranteed to  be  null-terminated).   They
272              return the length of the total string they tried to create.
273
274              stpecpy(3) is a simpler alternative to these functions.
275
276       stpncpy(3)
277              This  function  copies the input string into a destination null-
278              padded character sequence in a fixed-width buffer.  If the  des‐
279              tination buffer, limited by its size, isn't large enough to hold
280              the copy, the resulting character sequence is truncated.   Since
281              it creates a character sequence, it doesn't need to write a ter‐
282              minating null byte.  It's impossible to  distinguish  truncation
283              by  the  result of the call, from a character sequence that just
284              fits the destination buffer; truncation should  be  detected  by
285              comparing  the  length  of the input string with the size of the
286              destination buffer.
287
288       strncpy(3)
289              This function is identical to stpncpy(3) except for the  useless
290              return value.
291
292              stpncpy(3) is a more useful alternative to this function.
293
294       zustr2ustp(3)
295              This  function copies the input character sequence, contained in
296              a null-padded fixed-width buffer, into a  destination  character
297              sequence.  The programmer is responsible for allocating a buffer
298              large enough.  It returns a pointer suitable for chaining.
299
300              A truncating version of this function doesn't exist,  since  the
301              size  of  the original character sequence is always known, so it
302              wouldn't be very useful.
303
304              This function is not provided by any library; see EXAMPLES for a
305              reference implementation.
306
307       zustr2stp(3)
308              This  function copies the input character sequence, contained in
309              a null-padded fixed-width buffer,  into  a  destination  string.
310              The  programmer  is  responsible  for  allocating a buffer large
311              enough.  It returns a pointer suitable for chaining.
312
313              A truncating version of this function doesn't exist,  since  the
314              size  of  the original character sequence is always known, so it
315              wouldn't be very useful.
316
317              This function is not provided by any library; see EXAMPLES for a
318              reference implementation.
319
320       strncat(3)
321              Do  not  confuse this function with strncpy(3); they are not re‐
322              lated at all.
323
324              This function catenates the input character sequence,  contained
325              in  a null-padded fixed-width buffer, into a destination string.
326              The programmer is responsible  for  allocating  a  buffer  large
327              enough.  The return value is useless.
328
329              zustr2stp(3) is a faster alternative to this function.
330
331       ustpcpy(3)
332              This  function  copies  the input character sequence, limited by
333              its length, into a destination character sequence.  The program‐
334              mer is responsible for allocating a buffer large enough.  It re‐
335              turns a pointer suitable for chaining.
336
337       ustr2stp(3)
338              This function copies the input character  sequence,  limited  by
339              its  length,  into  a destination string.  The programmer is re‐
340              sponsible for allocating a buffer large enough.   It  returns  a
341              pointer suitable for chaining.
342

RETURN VALUE

344       The  following  functions return a pointer to the terminating null byte
345       in the destination string.
346
347stpcpy(3)
348ustr2stp(3)
349zustr2stp(3)
350
351       The following function returns a pointer to the terminating  null  byte
352       in the destination string, except when truncation occurs; if truncation
353       occurs, it returns a pointer to the end of the destination buffer.
354
355stpecpy(3)
356
357       The following function returns a pointer to one after the last  charac‐
358       ter  in  the destination character sequence; if truncation occurs, that
359       pointer is equivalent to a pointer to the end of the  destination  buf‐
360       fer.
361
362stpncpy(3)
363
364       The  following functions return a pointer to one after the last charac‐
365       ter in the destination character sequence.
366
367zustr2ustp(3)
368ustpcpy(3)
369
370       The following functions return the length of the total string that they
371       tried to create (as if truncation didn't occur).
372
373strlcpy(3bsd), strlcat(3bsd)
374
375       The following functions return the dst pointer, which is useless.
376
377strcpy(3), strcat(3)
378strncpy(3)
379strncat(3)
380

NOTES

382       The Linux kernel has an internal function for copying strings, which is
383       similar to stpecpy(3), except that it can't be chained:
384
385       strscpy(9)
386              This function copies the input string into a destination string.
387              If  the  destination  buffer,  limited  by its size, isn't large
388              enough to hold the copy, the resulting string is truncated  (but
389              it  is guaranteed to be null-terminated).  It returns the length
390              of the destination string, or -E2BIG on truncation.
391
392              stpecpy(3) is a simpler and faster alternative to this function.
393

CAVEATS

395       Don't mix chain calls to truncating and non-truncating  functions.   It
396       is  conceptually  wrong  unless  you know that the first part of a copy
397       will always fit.  Anyway, the performance difference will  probably  be
398       negligible, so it will probably be more clear if you use consistent se‐
399       mantics: either truncating or non-truncating.  Calling a non-truncating
400       function after a truncating one is necessarily wrong.
401

BUGS

403       All  catenation  functions share the same performance problem: Shlemiel
404       the painter                 ⟨https://www.joelonsoftware.com/2001/12/11/
405       back-to-basics/⟩.
406

EXAMPLES

408       The following are examples of correct use of each of these functions.
409
410       stpcpy(3)
411              p = buf;
412              p = stpcpy(p, "Hello ");
413              p = stpcpy(p, "world");
414              p = stpcpy(p, "!");
415              len = p - buf;
416              puts(buf);
417
418       strcpy(3)
419       strcat(3)
420              strcpy(buf, "Hello ");
421              strcat(buf, "world");
422              strcat(buf, "!");
423              len = strlen(buf);
424              puts(buf);
425
426       stpecpy(3)
427              end = buf + sizeof(buf);
428              p = buf;
429              p = stpecpy(p, end, "Hello ");
430              p = stpecpy(p, end, "world");
431              p = stpecpy(p, end, "!");
432              if (p == end) {
433                  p--;
434                  goto toolong;
435              }
436              len = p - buf;
437              puts(buf);
438
439       strlcpy(3bsd)
440       strlcat(3bsd)
441              if (strlcpy(buf, "Hello ", sizeof(buf)) >= sizeof(buf))
442                  goto toolong;
443              if (strlcat(buf, "world", sizeof(buf)) >= sizeof(buf))
444                  goto toolong;
445              len = strlcat(buf, "!", sizeof(buf));
446              if (len >= sizeof(buf))
447                  goto toolong;
448              puts(buf);
449
450       strscpy(9)
451              len = strscpy(buf, "Hello world!", sizeof(buf));
452              if (len == -E2BIG)
453                  goto toolong;
454              puts(buf);
455
456       stpncpy(3)
457              p = stpncpy(buf, "Hello world!", sizeof(buf));
458              if (sizeof(buf) < strlen("Hello world!"))
459                  goto toolong;
460              len = p - buf;
461              for (size_t i = 0; i < sizeof(buf); i++)
462                  putchar(buf[i]);
463
464       strncpy(3)
465              strncpy(buf, "Hello world!", sizeof(buf));
466              if (sizeof(buf) < strlen("Hello world!"))
467                  goto toolong;
468              len = strnlen(buf, sizeof(buf));
469              for (size_t i = 0; i < sizeof(buf); i++)
470                  putchar(buf[i]);
471
472       zustr2ustp(3)
473              p = buf;
474              p = zustr2ustp(p, "Hello ", 6);
475              p = zustr2ustp(p, "world", 42);  // Padding null bytes ignored.
476              p = zustr2ustp(p, "!", 1);
477              len = p - buf;
478              printf("%.*s\n", (int) len, buf);
479
480       zustr2stp(3)
481              p = buf;
482              p = zustr2stp(p, "Hello ", 6);
483              p = zustr2stp(p, "world", 42);  // Padding null bytes ignored.
484              p = zustr2stp(p, "!", 1);
485              len = p - buf;
486              puts(buf);
487
488       strncat(3)
489              buf[0] = '\0';  // There's no 'cpy' function to this 'cat'.
490              strncat(buf, "Hello ", 6);
491              strncat(buf, "world", 42);  // Padding null bytes ignored.
492              strncat(buf, "!", 1);
493              len = strlen(buf);
494              puts(buf);
495
496       ustpcpy(3)
497              p = buf;
498              p = ustpcpy(p, "Hello ", 6);
499              p = ustpcpy(p, "world", 5);
500              p = ustpcpy(p, "!", 1);
501              len = p - buf;
502              printf("%.*s\n", (int) len, buf);
503
504       ustr2stp(3)
505              p = buf;
506              p = ustr2stp(p, "Hello ", 6);
507              p = ustr2stp(p, "world", 5);
508              p = ustr2stp(p, "!", 1);
509              len = p - buf;
510              puts(buf);
511
512   Implementations
513       Here are reference implementations for functions not provided by libc.
514
515           /* This code is in the public domain. */
516
517           char *
518           stpecpy(char *dst, char end[0], const char *restrict src)
519           {
520               char *p;
521
522               if (dst == NULL)
523                   return NULL;
524               if (dst == end)
525                   return end;
526
527               p = memccpy(dst, src, '\0', end - dst);
528               if (p != NULL)
529                   return p - 1;
530
531               /* truncation detected */
532               end[-1] = '\0';
533               return end;
534           }
535
536           char *
537           zustr2ustp(char *restrict dst, const char *restrict src, size_t sz)
538           {
539               return ustpcpy(dst, src, strnlen(src, sz));
540           }
541
542           char *
543           zustr2stp(char *restrict dst, const char *restrict src, size_t sz)
544           {
545               char  *p;
546
547               p = zustr2ustp(dst, src, sz);
548               *p = '\0';
549
550               return p;
551           }
552
553           char *
554           ustpcpy(char *restrict dst, const char *restrict src, size_t len)
555           {
556               return mempcpy(dst, src, len);
557           }
558
559           char *
560           ustr2stp(char *restrict dst, const char *restrict src, size_t len)
561           {
562               char  *p;
563
564               p = ustpcpy(dst, src, len);
565               *p = '\0';
566
567               return p;
568           }
569

SEE ALSO

571       bzero(3),  memcpy(3), memccpy(3), mempcpy(3), stpcpy(3), strlcpy(3bsd),
572       strncat(3), stpncpy(3), string(3)
573
574
575
576Linux man-pages 6.05              2023-07-29                 string_copying(7)
Impressum