1string_copying(7)      Miscellaneous Information Manual      string_copying(7)
2
3
4

NAME

6       stpcpy,  strcpy,  strcat,  stpecpy, strlcpy, strlcat, stpncpy, strncpy,
7       zustr2ustp, zustr2stp, strncat, ustpcpy, ustr2stp - copying strings and
8       character sequences
9

SYNOPSIS

11   Strings
12       // Chain-copy a string.
13       char *stpcpy(char *restrict dst, const char *restrict src);
14
15       // Copy/catenate a string.
16       char *strcpy(char *restrict dst, const char *restrict src);
17       char *strcat(char *restrict dst, const char *restrict src);
18
19       // Chain-copy a string with truncation.
20       char *stpecpy(char *dst, char end[0], const char *restrict src);
21
22       // Copy/catenate a string with truncation.
23       size_t strlcpy(char dst[restrict .sz], const char *restrict src,
24                      size_t sz);
25       size_t strlcat(char dst[restrict .sz], const char *restrict src,
26                      size_t sz);
27
28   Null-padded character sequences
29       // Zero a fixed-width buffer, and
30       // copy a string into a character sequence with truncation.
31       char *stpncpy(char dst[restrict .sz], const char *restrict src,
32                      size_t sz);
33
34       // Zero a fixed-width buffer, and
35       // copy a string into a character sequence with truncation.
36       char *strncpy(char dest[restrict .sz], const char *restrict src,
37                      size_t sz);
38
39       // Chain-copy a null-padded character sequence into a character sequence.
40       char *zustr2ustp(char *restrict dst, const char src[restrict .sz],
41                      size_t sz);
42
43       // Chain-copy a null-padded character sequence into a string.
44       char *zustr2stp(char *restrict dst, const char src[restrict .sz],
45                      size_t sz);
46
47       // Catenate a null-padded character sequence into a string.
48       char *strncat(char *restrict dst, const char src[restrict .sz],
49                      size_t sz);
50
51   Measured character sequences
52       // Chain-copy a measured character sequence.
53       char *ustpcpy(char *restrict dst, const char src[restrict .len],
54                      size_t len);
55
56       // Chain-copy a measured character sequence into a string.
57       char *ustr2stp(char *restrict dst, const char src[restrict .len],
58                      size_t len);
59

DESCRIPTION

61   Terms (and abbreviations)
62       string (str)
63              is  a sequence of zero or more non-null characters followed by a
64              null byte.
65
66       character sequence
67              is a sequence of zero or more non-null  characters.   A  program
68              should  never  use  a  character  sequence where a string is re‐
69              quired.  However, with appropriate care, a string can be used in
70              the place of a character sequence.
71
72              null-padded character sequence (zustr)
73                     Character  sequences can be contained in fixed-width buf‐
74                     fers, which contain padding null bytes after the  charac‐
75                     ter  sequence, to fill the rest of the buffer without af‐
76                     fecting the character sequence;  however,  those  padding
77                     null bytes are not part of the character sequence.
78
79              measured character sequence (ustr)
80                     Character  sequence delimited by its length.  It may be a
81                     slice of a  larger  character  sequence,  or  even  of  a
82                     string.
83
84       length (len)
85              is  the  number  of non-null characters in a string or character
86              sequence.   It  is  the  return  value  of  strlen(str)  and  of
87              strnlen(ustr, sz).
88
89       size (sz)
90              refers  to  the  entire buffer where the string or character se‐
91              quence is contained.
92
93       end    is the name of a pointer to one past the last element of a  buf‐
94              fer.   It  is  equivalent to &str[sz].  It is used as a sentinel
95              value, to be able to truncate strings or character sequences in‐
96              stead of overrunning the containing buffer.
97
98       copy   This  term  is used when the writing starts at the first element
99              pointed to by dst.
100
101       catenate
102              This term is used when a function first  finds  the  terminating
103              null byte in dst, and then starts writing at that position.
104
105       chain  This  term  is  used  when  it's  the  programmer who provides a
106              pointer to the terminating null byte in the string dst  (or  one
107              after the last character in a character sequence), and the func‐
108              tion starts writing at that location.  The  function  returns  a
109              pointer to the new location of the terminating null byte (or one
110              after the last character in  a  character  sequence)  after  the
111              call, so that the programmer can use it to chain such calls.
112
113   Copy, catenate, and chain-copy
114       Originally,  there  was  a  distinction between functions that copy and
115       those that catenate.  However, newer functions that copy while allowing
116       chaining  cover  both use cases with a single API.  They are also algo‐
117       rithmically faster, since they don't need to search for the terminating
118       null  byte  of  the  existing string.  However, functions that catenate
119       have a much simpler use, so if performance is  not  important,  it  can
120       make sense to use them for improving readability.
121
122       The pointer returned by functions that allow chaining is a byproduct of
123       the copy operation, so it has no performance costs.  Functions that re‐
124       turn  such  a  pointer, and thus can be chained, have names of the form
125       *stp*(), since it's common to name the pointer just p.
126
127       Chain-copying functions that truncate should accept a  pointer  to  the
128       end  of  the  destination  buffer, and have names of the form *stpe*().
129       This allows not having to recalculate the  remaining  size  after  each
130       call.
131
132   Truncate or not?
133       The first thing to note is that programmers should be careful with buf‐
134       fers, so they always have the correct size, and truncation is not  nec‐
135       essary.
136
137       In  most cases, truncation is not desired, and it is simpler to just do
138       the copy.  Simpler code is safer code.  Programming against programming
139       mistakes  by  adding more code just adds more points where mistakes can
140       be made.
141
142       Nowadays, compilers can detect most  programmer  errors  with  features
143       like  compiler  warnings,  static  analyzers,  and _FORTIFY_SOURCE (see
144       ftm(7)).  Keeping the code simple helps these  overflow-detection  fea‐
145       tures be more precise.
146
147       When  validating  user input, however, it makes sense to truncate.  Re‐
148       member to check the return value of such function calls.
149
150       Functions that truncate:
151
152stpecpy(3) is the most efficient string copy function that  performs
153          truncation.  It only requires to check for truncation once after all
154          chained calls.
155
156strlcpy(3bsd) and strlcat(3bsd) are designed to crash if  the  input
157          string is invalid (doesn't contain a terminating null byte).
158
159stpncpy(3)  and  strncpy(3)  also  truncate,  but  they  don't write
160          strings, but rather null-padded character sequences.
161
162   Null-padded character sequences
163       For historic reasons, some standard APIs, such as utmpx(5),  use  null-
164       padded  character  sequences in fixed-width buffers.  To interface with
165       them, specialized functions need to be used.
166
167       To copy strings into them, use stpncpy(3).
168
169       To copy from an unterminated string within a fixed-width buffer into  a
170       string, ignoring any trailing null bytes in the source fixed-width buf‐
171       fer, you should use zustr2stp(3) or strncat(3).
172
173       To copy from an unterminated string within a fixed-width buffer into  a
174       character  sequence,  ignoring  any  trailing  null bytes in the source
175       fixed-width buffer, you should use zustr2ustp(3).
176
177   Measured character sequences
178       The simplest character sequence copying function is mempcpy(3).  It re‐
179       quires always knowing the length of your character sequences, for which
180       structures can be used.  It makes the code much faster, since  you  al‐
181       ways  know the length of your character sequences, and can do the mini‐
182       mal copies and length measurements.  mempcpy(3)  copies  character  se‐
183       quences, so you need to explicitly set the terminating null byte if you
184       need a string.
185
186       However, for keeping type safety, it's good to add a wrapper that  uses
187       char * instead of void *: ustpcpy(3).
188
189       In  programs  that  make  considerable  use of strings or character se‐
190       quences, and need the best performance, using overlapping character se‐
191       quences can make a big difference.  It allows holding subsequences of a
192       larger character sequence.  while not duplicating memory nor using time
193       to do a copy.
194
195       However, this is delicate, since it requires using character sequences.
196       C library APIs use strings, so programs that  use  character  sequences
197       will  have  to  take care of differentiating strings from character se‐
198       quences.
199
200       To copy a measured character sequence, use ustpcpy(3).
201
202       To copy a measured character sequence into a string, use ustr2stp(3).
203
204       Because these functions ask for the length, and a string is  by  nature
205       composed  of a character sequence of the same length plus a terminating
206       null byte, a string is also accepted as input.
207
208   String vs character sequence
209       Some functions only operate on strings.  Those require that  the  input
210       src  is  a string, and guarantee an output string (even when truncation
211       occurs).  Functions that catenate also require that dst holds a  string
212       before the call.  List of functions:
213
214stpcpy(3)
215strcpy(3), strcat(3)
216stpecpy(3)
217strlcpy(3bsd), strlcat(3bsd)
218
219       Other  functions  require  an  input string, but create a character se‐
220       quence as output.  These functions have confusing  names,  and  have  a
221       long history of misuse.  List of functions:
222
223stpncpy(3)
224strncpy(3)
225
226       Other  functions  operate on an input character sequence, and create an
227       output string.  Functions that catenate also require that dst  holds  a
228       string  before  the  call.  strncat(3) has an even more misleading name
229       than the functions above.  List of functions:
230
231zustr2stp(3)
232strncat(3)
233ustr2stp(3)
234
235       Other functions operate on an input character  sequence  to  create  an
236       output character sequence.  List of functions:
237
238ustpcpy(3)
239zustr2stp(3)
240
241   Functions
242       stpcpy(3)
243              This function copies the input string into a destination string.
244              The programmer is responsible  for  allocating  a  buffer  large
245              enough.  It returns a pointer suitable for chaining.
246
247       strcpy(3)
248       strcat(3)
249              These functions copy and catenate the input string into a desti‐
250              nation string.  The programmer is responsible for  allocating  a
251              buffer large enough.  The return value is useless.
252
253              stpcpy(3) is a faster alternative to these functions.
254
255       stpecpy(3)
256              This function copies the input string into a destination string.
257              If the destination buffer, limited by  a  pointer  to  its  end,
258              isn't  large  enough  to  hold the copy, the resulting string is
259              truncated (but it is guaranteed to be null-terminated).  It  re‐
260              turns  a  pointer suitable for chaining.  Truncation needs to be
261              detected only once after the last chained call.
262
263              This function is not provided by any library; See EXAMPLES for a
264              reference implementation.
265
266       strlcpy(3bsd)
267       strlcat(3bsd)
268              These functions copy and catenate the input string into a desti‐
269              nation string.  If the destination buffer, limited by its  size,
270              isn't  large  enough  to  hold the copy, the resulting string is
271              truncated (but it is guaranteed to  be  null-terminated).   They
272              return  the  length  of  the  total string they tried to create.
273              These functions force a SIGSEGV if the  src  pointer  is  not  a
274              string.
275
276              stpecpy(3) is a simpler alternative to these functions.
277
278       stpncpy(3)
279              This  function  copies the input string into a destination null-
280              padded character sequence in a fixed-width buffer.  If the  des‐
281              tination buffer, limited by its size, isn't large enough to hold
282              the copy, the resulting character sequence is truncated.   Since
283              it creates a character sequence, it doesn't need to write a ter‐
284              minating null byte.  It's impossible to  distinguish  truncation
285              by  the  result of the call, from a character sequence that just
286              fits the destination buffer; truncation should  be  detected  by
287              comparing  the  length  of the input string with the size of the
288              destination buffer.
289
290       strncpy(3)
291              This function is identical to stpncpy(3) except for the  useless
292              return value.
293
294              stpncpy(3) is a more useful alternative to this function.
295
296       zustr2ustp(3)
297              This function copies the input character sequence contained in a
298              null-padded wixed-width buffer, into a destination character se‐
299              quence.   The  programmer is responsible for allocating a buffer
300              large enough.  It returns a pointer suitable for chaining.
301
302              A truncating version of this function doesn't exist,  since  the
303              size  of  the original character sequence is always known, so it
304              wouldn't be very useful.
305
306              This function is not provided by any library; See EXAMPLES for a
307              reference implementation.
308
309       zustr2stp(3)
310              This function copies the input character sequence contained in a
311              null-padded wixed-width buffer, into a destination string.   The
312              programmer  is responsible for allocating a buffer large enough.
313              It returns a pointer suitable for chaining.
314
315              A truncating version of this function doesn't exist,  since  the
316              size  of  the original character sequence is always known, so it
317              wouldn't be very useful.
318
319              This function is not provided by any library; See EXAMPLES for a
320              reference implementation.
321
322       strncat(3)
323              Do  not  confuse this function with strncpy(3); they are not re‐
324              lated at all.
325
326              This function catenates the input character  sequence  contained
327              in  a null-padded wixed-width buffer, into a destination string.
328              The programmer is responsible  for  allocating  a  buffer  large
329              enough.  The return value is useless.
330
331              zustr2stp(3) is a faster alternative to this function.
332
333       ustpcpy(3)
334              This  function  copies  the input character sequence, limited by
335              its length, into a destination character sequence.  The program‐
336              mer is responsible for allocating a buffer large enough.  It re‐
337              turns a pointer suitable for chaining.
338
339       ustr2stp(3)
340              This function copies the input character  sequence,  limited  by
341              its  length,  into  a destination string.  The programmer is re‐
342              sponsible for allocating a buffer large enough.   It  returns  a
343              pointer suitable for chaining.
344

RETURN VALUE

346       The  following  functions return a pointer to the terminating null byte
347       in the destination string.
348
349stpcpy(3)
350ustr2stp(3)
351zustr2stp(3)
352
353       The following function returns a pointer to the terminating  null  byte
354       in the destination string, except when truncation occurs; if truncation
355       occurs, it returns a pointer to the end of the destination buffer.
356
357stpecpy(3)
358
359       The following function returns a pointer to one after the last  charac‐
360       ter  in  the destination character sequence; if truncation occurs, that
361       pointer is equivalent to a pointer to the end of the  destination  buf‐
362       fer.
363
364stpncpy(3)
365
366       The  following functions return a pointer to one after the last charac‐
367       ter in the destination character sequence.
368
369zustr2ustp(3)
370ustpcpy(3)
371
372       The following functions return the length of the total string that they
373       tried to create (as if truncation didn't occur).
374
375strlcpy(3bsd), strlcat(3bsd)
376
377       The following functions return the dst pointer, which is useless.
378
379strcpy(3), strcat(3)
380strncpy(3)
381strncat(3)
382

NOTES

384       The Linux kernel has an internal function for copying strings, which is
385       similar to stpecpy(3), except that it can't be chained:
386
387       strscpy(9)
388              This function copies the input string into a destination string.
389              If  the  destination  buffer,  limited  by its size, isn't large
390              enough to hold the copy, the resulting string is truncated  (but
391              it  is guaranteed to be null-terminated).  It returns the length
392              of the destination string, or -E2BIG on truncation.
393
394              stpecpy(3) is a simpler and faster alternative to this function.
395

CAVEATS

397       Don't mix chain calls to truncating and non-truncating  functions.   It
398       is  conceptually  wrong  unless  you know that the first part of a copy
399       will always fit.  Anyway, the performance difference will  probably  be
400       negligible, so it will probably be more clear if you use consistent se‐
401       mantics: either truncating or non-truncating.  Calling a non-truncating
402       function after a truncating one is necessarily wrong.
403

BUGS

405       All  catenation  functions share the same performance problem: Shlemiel
406       the painter                 ⟨https://www.joelonsoftware.com/2001/12/11/
407       back-to-basics/⟩.
408

EXAMPLES

410       The following are examples of correct use of each of these functions.
411
412       stpcpy(3)
413              p = buf;
414              p = stpcpy(p, "Hello ");
415              p = stpcpy(p, "world");
416              p = stpcpy(p, "!");
417              len = p - buf;
418              puts(buf);
419
420       strcpy(3)
421       strcat(3)
422              strcpy(buf, "Hello ");
423              strcat(buf, "world");
424              strcat(buf, "!");
425              len = strlen(buf);
426              puts(buf);
427
428       stpecpy(3)
429              end = buf + sizeof(buf);
430              p = buf;
431              p = stpecpy(p, end, "Hello ");
432              p = stpecpy(p, end, "world");
433              p = stpecpy(p, end, "!");
434              if (p == end) {
435                  p--;
436                  goto toolong;
437              }
438              len = p - buf;
439              puts(buf);
440
441       strlcpy(3bsd)
442       strlcat(3bsd)
443              if (strlcpy(buf, "Hello ", sizeof(buf)) >= sizeof(buf))
444                  goto toolong;
445              if (strlcat(buf, "world", sizeof(buf)) >= sizeof(buf))
446                  goto toolong;
447              len = strlcat(buf, "!", sizeof(buf));
448              if (len >= sizeof(buf))
449                  goto toolong;
450              puts(buf);
451
452       strscpy(9)
453              len = strscpy(buf, "Hello world!", sizeof(buf));
454              if (len == -E2BIG)
455                  goto toolong;
456              puts(buf);
457
458       stpncpy(3)
459              p = stpncpy(buf, "Hello world!", sizeof(buf));
460              if (sizeof(buf) < strlen("Hello world!"))
461                  goto toolong;
462              len = p - buf;
463              for (size_t i = 0; i < sizeof(buf); i++)
464                  putchar(buf[i]);
465
466       strncpy(3)
467              strncpy(buf, "Hello world!", sizeof(buf));
468              if (sizeof(buf) < strlen("Hello world!"))
469                  goto toolong;
470              len = strnlen(buf, sizeof(buf));
471              for (size_t i = 0; i < sizeof(buf); i++)
472                  putchar(buf[i]);
473
474       zustr2ustp(3)
475              p = buf;
476              p = zustr2ustp(p, "Hello ", 6);
477              p = zustr2ustp(p, "world", 42);  // Padding null bytes ignored.
478              p = zustr2ustp(p, "!", 1);
479              len = p - buf;
480              printf("%.*s\n", (int) len, buf);
481
482       zustr2stp(3)
483              p = buf;
484              p = zustr2stp(p, "Hello ", 6);
485              p = zustr2stp(p, "world", 42);  // Padding null bytes ignored.
486              p = zustr2stp(p, "!", 1);
487              len = p - buf;
488              puts(buf);
489
490       strncat(3)
491              buf[0] = '\0';  // There's no 'cpy' function to this 'cat'.
492              strncat(buf, "Hello ", 6);
493              strncat(buf, "world", 42);  // Padding null bytes ignored.
494              strncat(buf, "!", 1);
495              len = strlen(buf);
496              puts(buf);
497
498       ustpcpy(3)
499              p = buf;
500              p = ustpcpy(p, "Hello ", 6);
501              p = ustpcpy(p, "world", 5);
502              p = ustpcpy(p, "!", 1);
503              len = p - buf;
504              printf("%.*s\n", (int) len, buf);
505
506       ustr2stp(3)
507              p = buf;
508              p = ustr2stp(p, "Hello ", 6);
509              p = ustr2stp(p, "world", 5);
510              p = ustr2stp(p, "!", 1);
511              len = p - buf;
512              puts(buf);
513
514   Implementations
515       Here are reference implementations for functions not provided by libc.
516
517           /* This code is in the public domain. */
518
519           char *
520           stpecpy(char *dst, char end[0], const char *restrict src)
521           {
522               char *p;
523
524               if (dst == NULL)
525                   return NULL;
526               if (dst == end)
527                   return end;
528
529               p = memccpy(dst, src, '\0', end - dst);
530               if (p != NULL)
531                   return p - 1;
532
533               /* truncation detected */
534               end[-1] = '\0';
535               return end;
536           }
537
538           char *
539           zustr2ustp(char *restrict dst, const char *restrict src, size_t sz)
540           {
541               return ustpcpy(dst, src, strnlen(src, sz));
542           }
543
544           char *
545           zustr2stp(char *restrict dst, const char *restrict src, size_t sz)
546           {
547               char  *p;
548
549               p = zustr2ustp(dst, src, sz);
550               *p = '\0';
551
552               return p;
553           }
554
555           char *
556           ustpcpy(char *restrict dst, const char *restrict src, size_t len)
557           {
558               return mempcpy(dst, src, len);
559           }
560
561           char *
562           ustr2stp(char *restrict dst, const char *restrict src, size_t len)
563           {
564               char  *p;
565
566               p = ustpcpy(dst, src, len);
567               *p = '\0';
568
569               return p;
570           }
571

SEE ALSO

573       bzero(3),  memcpy(3), memccpy(3), mempcpy(3), stpcpy(3), strlcpy(3bsd),
574       strncat(3), stpncpy(3), string(3)
575
576
577
578Linux man-pages 6.04              2023-03-08                 string_copying(7)
Impressum