1string_copying(7) Miscellaneous Information Manual string_copying(7)
2
3
4
6 stpcpy, strcpy, strcat, stpecpy, strlcpy, strlcat, stpncpy, strncpy,
7 zustr2ustp, zustr2stp, strncat, ustpcpy, ustr2stp - copying strings and
8 character sequences
9
11 Strings
12 // Chain-copy a string.
13 char *stpcpy(char *restrict dst, const char *restrict src);
14
15 // Copy/catenate a string.
16 char *strcpy(char *restrict dst, const char *restrict src);
17 char *strcat(char *restrict dst, const char *restrict src);
18
19 // Chain-copy a string with truncation.
20 char *stpecpy(char *dst, char end[0], const char *restrict src);
21
22 // Copy/catenate a string with truncation.
23 size_t strlcpy(char dst[restrict .sz], const char *restrict src,
24 size_t sz);
25 size_t strlcat(char dst[restrict .sz], const char *restrict src,
26 size_t sz);
27
28 Null-padded character sequences
29 // Zero a fixed-width buffer, and
30 // copy a string into a character sequence with truncation.
31 char *stpncpy(char dst[restrict .sz], const char *restrict src,
32 size_t sz);
33
34 // Zero a fixed-width buffer, and
35 // copy a string into a character sequence with truncation.
36 char *strncpy(char dest[restrict .sz], const char *restrict src,
37 size_t sz);
38
39 // Chain-copy a null-padded character sequence into a character sequence.
40 char *zustr2ustp(char *restrict dst, const char src[restrict .sz],
41 size_t sz);
42
43 // Chain-copy a null-padded character sequence into a string.
44 char *zustr2stp(char *restrict dst, const char src[restrict .sz],
45 size_t sz);
46
47 // Catenate a null-padded character sequence into a string.
48 char *strncat(char *restrict dst, const char src[restrict .sz],
49 size_t sz);
50
51 Measured character sequences
52 // Chain-copy a measured character sequence.
53 char *ustpcpy(char *restrict dst, const char src[restrict .len],
54 size_t len);
55
56 // Chain-copy a measured character sequence into a string.
57 char *ustr2stp(char *restrict dst, const char src[restrict .len],
58 size_t len);
59
61 Terms (and abbreviations)
62 string (str)
63 is a sequence of zero or more non-null characters followed by a
64 null byte.
65
66 character sequence
67 is a sequence of zero or more non-null characters. A program
68 should never use a character sequence where a string is re‐
69 quired. However, with appropriate care, a string can be used in
70 the place of a character sequence.
71
72 null-padded character sequence (zustr)
73 Character sequences can be contained in fixed-width buf‐
74 fers, which contain padding null bytes after the charac‐
75 ter sequence, to fill the rest of the buffer without af‐
76 fecting the character sequence; however, those padding
77 null bytes are not part of the character sequence.
78
79 measured character sequence (ustr)
80 Character sequence delimited by its length. It may be a
81 slice of a larger character sequence, or even of a
82 string.
83
84 length (len)
85 is the number of non-null characters in a string or character
86 sequence. It is the return value of strlen(str) and of
87 strnlen(ustr, sz).
88
89 size (sz)
90 refers to the entire buffer where the string or character se‐
91 quence is contained.
92
93 end is the name of a pointer to one past the last element of a buf‐
94 fer. It is equivalent to &str[sz]. It is used as a sentinel
95 value, to be able to truncate strings or character sequences in‐
96 stead of overrunning the containing buffer.
97
98 copy This term is used when the writing starts at the first element
99 pointed to by dst.
100
101 catenate
102 This term is used when a function first finds the terminating
103 null byte in dst, and then starts writing at that position.
104
105 chain This term is used when it's the programmer who provides a
106 pointer to the terminating null byte in the string dst (or one
107 after the last character in a character sequence), and the func‐
108 tion starts writing at that location. The function returns a
109 pointer to the new location of the terminating null byte (or one
110 after the last character in a character sequence) after the
111 call, so that the programmer can use it to chain such calls.
112
113 Copy, catenate, and chain-copy
114 Originally, there was a distinction between functions that copy and
115 those that catenate. However, newer functions that copy while allowing
116 chaining cover both use cases with a single API. They are also algo‐
117 rithmically faster, since they don't need to search for the terminating
118 null byte of the existing string. However, functions that catenate
119 have a much simpler use, so if performance is not important, it can
120 make sense to use them for improving readability.
121
122 The pointer returned by functions that allow chaining is a byproduct of
123 the copy operation, so it has no performance costs. Functions that re‐
124 turn such a pointer, and thus can be chained, have names of the form
125 *stp*(), since it's common to name the pointer just p.
126
127 Chain-copying functions that truncate should accept a pointer to the
128 end of the destination buffer, and have names of the form *stpe*().
129 This allows not having to recalculate the remaining size after each
130 call.
131
132 Truncate or not?
133 The first thing to note is that programmers should be careful with buf‐
134 fers, so they always have the correct size, and truncation is not nec‐
135 essary.
136
137 In most cases, truncation is not desired, and it is simpler to just do
138 the copy. Simpler code is safer code. Programming against programming
139 mistakes by adding more code just adds more points where mistakes can
140 be made.
141
142 Nowadays, compilers can detect most programmer errors with features
143 like compiler warnings, static analyzers, and _FORTIFY_SOURCE (see
144 ftm(7)). Keeping the code simple helps these overflow-detection fea‐
145 tures be more precise.
146
147 When validating user input, however, it makes sense to truncate. Re‐
148 member to check the return value of such function calls.
149
150 Functions that truncate:
151
152 • stpecpy(3) is the most efficient string copy function that performs
153 truncation. It only requires to check for truncation once after all
154 chained calls.
155
156 • strlcpy(3bsd) and strlcat(3bsd) are designed to crash if the input
157 string is invalid (doesn't contain a terminating null byte).
158
159 • stpncpy(3) and strncpy(3) also truncate, but they don't write
160 strings, but rather null-padded character sequences.
161
162 Null-padded character sequences
163 For historic reasons, some standard APIs, such as utmpx(5), use null-
164 padded character sequences in fixed-width buffers. To interface with
165 them, specialized functions need to be used.
166
167 To copy strings into them, use stpncpy(3).
168
169 To copy from an unterminated string within a fixed-width buffer into a
170 string, ignoring any trailing null bytes in the source fixed-width buf‐
171 fer, you should use zustr2stp(3) or strncat(3).
172
173 To copy from an unterminated string within a fixed-width buffer into a
174 character sequence, ignoring any trailing null bytes in the source
175 fixed-width buffer, you should use zustr2ustp(3).
176
177 Measured character sequences
178 The simplest character sequence copying function is mempcpy(3). It re‐
179 quires always knowing the length of your character sequences, for which
180 structures can be used. It makes the code much faster, since you al‐
181 ways know the length of your character sequences, and can do the mini‐
182 mal copies and length measurements. mempcpy(3) copies character se‐
183 quences, so you need to explicitly set the terminating null byte if you
184 need a string.
185
186 However, for keeping type safety, it's good to add a wrapper that uses
187 char * instead of void *: ustpcpy(3).
188
189 In programs that make considerable use of strings or character se‐
190 quences, and need the best performance, using overlapping character se‐
191 quences can make a big difference. It allows holding subsequences of a
192 larger character sequence. while not duplicating memory nor using time
193 to do a copy.
194
195 However, this is delicate, since it requires using character sequences.
196 C library APIs use strings, so programs that use character sequences
197 will have to take care of differentiating strings from character se‐
198 quences.
199
200 To copy a measured character sequence, use ustpcpy(3).
201
202 To copy a measured character sequence into a string, use ustr2stp(3).
203
204 Because these functions ask for the length, and a string is by nature
205 composed of a character sequence of the same length plus a terminating
206 null byte, a string is also accepted as input.
207
208 String vs character sequence
209 Some functions only operate on strings. Those require that the input
210 src is a string, and guarantee an output string (even when truncation
211 occurs). Functions that catenate also require that dst holds a string
212 before the call. List of functions:
213
214 • stpcpy(3)
215 • strcpy(3), strcat(3)
216 • stpecpy(3)
217 • strlcpy(3bsd), strlcat(3bsd)
218
219 Other functions require an input string, but create a character se‐
220 quence as output. These functions have confusing names, and have a
221 long history of misuse. List of functions:
222
223 • stpncpy(3)
224 • strncpy(3)
225
226 Other functions operate on an input character sequence, and create an
227 output string. Functions that catenate also require that dst holds a
228 string before the call. strncat(3) has an even more misleading name
229 than the functions above. List of functions:
230
231 • zustr2stp(3)
232 • strncat(3)
233 • ustr2stp(3)
234
235 Other functions operate on an input character sequence to create an
236 output character sequence. List of functions:
237
238 • ustpcpy(3)
239 • zustr2stp(3)
240
241 Functions
242 stpcpy(3)
243 This function copies the input string into a destination string.
244 The programmer is responsible for allocating a buffer large
245 enough. It returns a pointer suitable for chaining.
246
247 strcpy(3)
248 strcat(3)
249 These functions copy and catenate the input string into a desti‐
250 nation string. The programmer is responsible for allocating a
251 buffer large enough. The return value is useless.
252
253 stpcpy(3) is a faster alternative to these functions.
254
255 stpecpy(3)
256 This function copies the input string into a destination string.
257 If the destination buffer, limited by a pointer to its end,
258 isn't large enough to hold the copy, the resulting string is
259 truncated (but it is guaranteed to be null-terminated). It re‐
260 turns a pointer suitable for chaining. Truncation needs to be
261 detected only once after the last chained call.
262
263 This function is not provided by any library; See EXAMPLES for a
264 reference implementation.
265
266 strlcpy(3bsd)
267 strlcat(3bsd)
268 These functions copy and catenate the input string into a desti‐
269 nation string. If the destination buffer, limited by its size,
270 isn't large enough to hold the copy, the resulting string is
271 truncated (but it is guaranteed to be null-terminated). They
272 return the length of the total string they tried to create.
273 These functions force a SIGSEGV if the src pointer is not a
274 string.
275
276 stpecpy(3) is a simpler alternative to these functions.
277
278 stpncpy(3)
279 This function copies the input string into a destination null-
280 padded character sequence in a fixed-width buffer. If the des‐
281 tination buffer, limited by its size, isn't large enough to hold
282 the copy, the resulting character sequence is truncated. Since
283 it creates a character sequence, it doesn't need to write a ter‐
284 minating null byte. It's impossible to distinguish truncation
285 by the result of the call, from a character sequence that just
286 fits the destination buffer; truncation should be detected by
287 comparing the length of the input string with the size of the
288 destination buffer.
289
290 strncpy(3)
291 This function is identical to stpncpy(3) except for the useless
292 return value.
293
294 stpncpy(3) is a more useful alternative to this function.
295
296 zustr2ustp(3)
297 This function copies the input character sequence contained in a
298 null-padded wixed-width buffer, into a destination character se‐
299 quence. The programmer is responsible for allocating a buffer
300 large enough. It returns a pointer suitable for chaining.
301
302 A truncating version of this function doesn't exist, since the
303 size of the original character sequence is always known, so it
304 wouldn't be very useful.
305
306 This function is not provided by any library; See EXAMPLES for a
307 reference implementation.
308
309 zustr2stp(3)
310 This function copies the input character sequence contained in a
311 null-padded wixed-width buffer, into a destination string. The
312 programmer is responsible for allocating a buffer large enough.
313 It returns a pointer suitable for chaining.
314
315 A truncating version of this function doesn't exist, since the
316 size of the original character sequence is always known, so it
317 wouldn't be very useful.
318
319 This function is not provided by any library; See EXAMPLES for a
320 reference implementation.
321
322 strncat(3)
323 Do not confuse this function with strncpy(3); they are not re‐
324 lated at all.
325
326 This function catenates the input character sequence contained
327 in a null-padded wixed-width buffer, into a destination string.
328 The programmer is responsible for allocating a buffer large
329 enough. The return value is useless.
330
331 zustr2stp(3) is a faster alternative to this function.
332
333 ustpcpy(3)
334 This function copies the input character sequence, limited by
335 its length, into a destination character sequence. The program‐
336 mer is responsible for allocating a buffer large enough. It re‐
337 turns a pointer suitable for chaining.
338
339 ustr2stp(3)
340 This function copies the input character sequence, limited by
341 its length, into a destination string. The programmer is re‐
342 sponsible for allocating a buffer large enough. It returns a
343 pointer suitable for chaining.
344
346 The following functions return a pointer to the terminating null byte
347 in the destination string.
348
349 • stpcpy(3)
350 • ustr2stp(3)
351 • zustr2stp(3)
352
353 The following function returns a pointer to the terminating null byte
354 in the destination string, except when truncation occurs; if truncation
355 occurs, it returns a pointer to the end of the destination buffer.
356
357 • stpecpy(3)
358
359 The following function returns a pointer to one after the last charac‐
360 ter in the destination character sequence; if truncation occurs, that
361 pointer is equivalent to a pointer to the end of the destination buf‐
362 fer.
363
364 • stpncpy(3)
365
366 The following functions return a pointer to one after the last charac‐
367 ter in the destination character sequence.
368
369 • zustr2ustp(3)
370 • ustpcpy(3)
371
372 The following functions return the length of the total string that they
373 tried to create (as if truncation didn't occur).
374
375 • strlcpy(3bsd), strlcat(3bsd)
376
377 The following functions return the dst pointer, which is useless.
378
379 • strcpy(3), strcat(3)
380 • strncpy(3)
381 • strncat(3)
382
384 The Linux kernel has an internal function for copying strings, which is
385 similar to stpecpy(3), except that it can't be chained:
386
387 strscpy(9)
388 This function copies the input string into a destination string.
389 If the destination buffer, limited by its size, isn't large
390 enough to hold the copy, the resulting string is truncated (but
391 it is guaranteed to be null-terminated). It returns the length
392 of the destination string, or -E2BIG on truncation.
393
394 stpecpy(3) is a simpler and faster alternative to this function.
395
397 Don't mix chain calls to truncating and non-truncating functions. It
398 is conceptually wrong unless you know that the first part of a copy
399 will always fit. Anyway, the performance difference will probably be
400 negligible, so it will probably be more clear if you use consistent se‐
401 mantics: either truncating or non-truncating. Calling a non-truncating
402 function after a truncating one is necessarily wrong.
403
405 All catenation functions share the same performance problem: Shlemiel
406 the painter ⟨https://www.joelonsoftware.com/2001/12/11/
407 back-to-basics/⟩.
408
410 The following are examples of correct use of each of these functions.
411
412 stpcpy(3)
413 p = buf;
414 p = stpcpy(p, "Hello ");
415 p = stpcpy(p, "world");
416 p = stpcpy(p, "!");
417 len = p - buf;
418 puts(buf);
419
420 strcpy(3)
421 strcat(3)
422 strcpy(buf, "Hello ");
423 strcat(buf, "world");
424 strcat(buf, "!");
425 len = strlen(buf);
426 puts(buf);
427
428 stpecpy(3)
429 end = buf + sizeof(buf);
430 p = buf;
431 p = stpecpy(p, end, "Hello ");
432 p = stpecpy(p, end, "world");
433 p = stpecpy(p, end, "!");
434 if (p == end) {
435 p--;
436 goto toolong;
437 }
438 len = p - buf;
439 puts(buf);
440
441 strlcpy(3bsd)
442 strlcat(3bsd)
443 if (strlcpy(buf, "Hello ", sizeof(buf)) >= sizeof(buf))
444 goto toolong;
445 if (strlcat(buf, "world", sizeof(buf)) >= sizeof(buf))
446 goto toolong;
447 len = strlcat(buf, "!", sizeof(buf));
448 if (len >= sizeof(buf))
449 goto toolong;
450 puts(buf);
451
452 strscpy(9)
453 len = strscpy(buf, "Hello world!", sizeof(buf));
454 if (len == -E2BIG)
455 goto toolong;
456 puts(buf);
457
458 stpncpy(3)
459 p = stpncpy(buf, "Hello world!", sizeof(buf));
460 if (sizeof(buf) < strlen("Hello world!"))
461 goto toolong;
462 len = p - buf;
463 for (size_t i = 0; i < sizeof(buf); i++)
464 putchar(buf[i]);
465
466 strncpy(3)
467 strncpy(buf, "Hello world!", sizeof(buf));
468 if (sizeof(buf) < strlen("Hello world!"))
469 goto toolong;
470 len = strnlen(buf, sizeof(buf));
471 for (size_t i = 0; i < sizeof(buf); i++)
472 putchar(buf[i]);
473
474 zustr2ustp(3)
475 p = buf;
476 p = zustr2ustp(p, "Hello ", 6);
477 p = zustr2ustp(p, "world", 42); // Padding null bytes ignored.
478 p = zustr2ustp(p, "!", 1);
479 len = p - buf;
480 printf("%.*s\n", (int) len, buf);
481
482 zustr2stp(3)
483 p = buf;
484 p = zustr2stp(p, "Hello ", 6);
485 p = zustr2stp(p, "world", 42); // Padding null bytes ignored.
486 p = zustr2stp(p, "!", 1);
487 len = p - buf;
488 puts(buf);
489
490 strncat(3)
491 buf[0] = '\0'; // There's no 'cpy' function to this 'cat'.
492 strncat(buf, "Hello ", 6);
493 strncat(buf, "world", 42); // Padding null bytes ignored.
494 strncat(buf, "!", 1);
495 len = strlen(buf);
496 puts(buf);
497
498 ustpcpy(3)
499 p = buf;
500 p = ustpcpy(p, "Hello ", 6);
501 p = ustpcpy(p, "world", 5);
502 p = ustpcpy(p, "!", 1);
503 len = p - buf;
504 printf("%.*s\n", (int) len, buf);
505
506 ustr2stp(3)
507 p = buf;
508 p = ustr2stp(p, "Hello ", 6);
509 p = ustr2stp(p, "world", 5);
510 p = ustr2stp(p, "!", 1);
511 len = p - buf;
512 puts(buf);
513
514 Implementations
515 Here are reference implementations for functions not provided by libc.
516
517 /* This code is in the public domain. */
518
519 char *
520 stpecpy(char *dst, char end[0], const char *restrict src)
521 {
522 char *p;
523
524 if (dst == NULL)
525 return NULL;
526 if (dst == end)
527 return end;
528
529 p = memccpy(dst, src, '\0', end - dst);
530 if (p != NULL)
531 return p - 1;
532
533 /* truncation detected */
534 end[-1] = '\0';
535 return end;
536 }
537
538 char *
539 zustr2ustp(char *restrict dst, const char *restrict src, size_t sz)
540 {
541 return ustpcpy(dst, src, strnlen(src, sz));
542 }
543
544 char *
545 zustr2stp(char *restrict dst, const char *restrict src, size_t sz)
546 {
547 char *p;
548
549 p = zustr2ustp(dst, src, sz);
550 *p = '\0';
551
552 return p;
553 }
554
555 char *
556 ustpcpy(char *restrict dst, const char *restrict src, size_t len)
557 {
558 return mempcpy(dst, src, len);
559 }
560
561 char *
562 ustr2stp(char *restrict dst, const char *restrict src, size_t len)
563 {
564 char *p;
565
566 p = ustpcpy(dst, src, len);
567 *p = '\0';
568
569 return p;
570 }
571
573 bzero(3), memcpy(3), memccpy(3), mempcpy(3), stpcpy(3), strlcpy(3bsd),
574 strncat(3), stpncpy(3), string(3)
575
576
577
578Linux man-pages 6.04 2023-03-08 string_copying(7)