1regex(3) Library Functions Manual regex(3)
2
3
4
6 regcomp, regexec, regerror, regfree - POSIX regex functions
7
9 Standard C library (libc, -lc)
10
12 #include <regex.h>
13
14 int regcomp(regex_t *restrict preg, const char *restrict regex,
15 int cflags);
16 int regexec(const regex_t *restrict preg, const char *restrict string,
17 size_t nmatch, regmatch_t pmatch[_Nullable restrict .nmatch],
18 int eflags);
19
20 size_t regerror(int errcode, const regex_t *_Nullable restrict preg,
21 char errbuf[_Nullable restrict .errbuf_size],
22 size_t errbuf_size);
23 void regfree(regex_t *preg);
24
25 typedef struct {
26 size_t re_nsub;
27 } regex_t;
28
29 typedef struct {
30 regoff_t rm_so;
31 regoff_t rm_eo;
32 } regmatch_t;
33
34 typedef /* ... */ regoff_t;
35
37 Compilation
38 regcomp() is used to compile a regular expression into a form that is
39 suitable for subsequent regexec() searches.
40
41 On success, the pattern buffer at *preg is initialized. regex is a
42 null-terminated string. The locale must be the same when running
43 regexec().
44
45 After regcomp() succeeds, preg->re_nsub holds the number of subexpres‐
46 sions in regex. Thus, a value of preg->re_nsub + 1 passed as nmatch to
47 regexec() is sufficient to capture all matches.
48
49 cflags is the bitwise OR of zero or more of the following:
50
51 REG_EXTENDED
52 Use POSIX Extended Regular Expression syntax when interpreting
53 regex. If not set, POSIX Basic Regular Expression syntax is
54 used.
55
56 REG_ICASE
57 Do not differentiate case. Subsequent regexec() searches using
58 this pattern buffer will be case insensitive.
59
60 REG_NOSUB
61 Report only overall success. regexec() will use only pmatch for
62 REG_STARTEND, ignoring nmatch.
63
64 REG_NEWLINE
65 Match-any-character operators don't match a newline.
66
67 A nonmatching list ([^...]) not containing a newline does not
68 match a newline.
69
70 Match-beginning-of-line operator (^) matches the empty string
71 immediately after a newline, regardless of whether eflags, the
72 execution flags of regexec(), contains REG_NOTBOL.
73
74 Match-end-of-line operator ($) matches the empty string immedi‐
75 ately before a newline, regardless of whether eflags contains
76 REG_NOTEOL.
77
78 Matching
79 regexec() is used to match a null-terminated string against the com‐
80 piled pattern buffer in *preg, which must have been initialised with
81 regexec(). eflags is the bitwise OR of zero or more of the following
82 flags:
83
84 REG_NOTBOL
85 The match-beginning-of-line operator always fails to match (but
86 see the compilation flag REG_NEWLINE above). This flag may be
87 used when different portions of a string are passed to regexec()
88 and the beginning of the string should not be interpreted as the
89 beginning of the line.
90
91 REG_NOTEOL
92 The match-end-of-line operator always fails to match (but see
93 the compilation flag REG_NEWLINE above).
94
95 REG_STARTEND
96 Match [string + pmatch[0].rm_so, string + pmatch[0].rm_eo) in‐
97 stead of [string, string + strlen(string)). This allows match‐
98 ing embedded NUL bytes and avoids a strlen(3) on known-length
99 strings. If any matches are returned (REG_NOSUB wasn't passed
100 to regcomp(), the match succeeded, and nmatch > 0), they over‐
101 write pmatch as usual, and the match offsets remain relative to
102 string (not string + pmatch[0].rm_so). This flag is a BSD ex‐
103 tension, not present in POSIX.
104
105 Match offsets
106 Unless REG_NOSUB was passed to regcomp(), it is possible to obtain the
107 locations of matches within string: regexec() fills nmatch elements of
108 pmatch with results: pmatch[0] corresponds to the entire match,
109 pmatch[1] to the first subexpression, etc. If there were more matches
110 than nmatch, they are discarded; if fewer, unused elements of pmatch
111 are filled with -1s.
112
113 Each returned valid (non--1) match corresponds to the range [string +
114 rm_so, string + rm_eo).
115
116 regoff_t is a signed integer type capable of storing the largest value
117 that can be stored in either an ptrdiff_t type or a ssize_t type.
118
119 Error reporting
120 regerror() is used to turn the error codes that can be returned by both
121 regcomp() and regexec() into error message strings.
122
123 If preg isn't a null pointer, errcode must be the latest error returned
124 from an operation on preg.
125
126 If errbuf_size isn't 0, up to errbuf_size bytes are copied to errbuf;
127 the error string is always null-terminated, and truncated to fit.
128
129 Freeing
130 regfree() deinitializes the pattern buffer at *preg, freeing any asso‐
131 ciated memory; *preg must have been initialized via regcomp().
132
134 regcomp() returns zero for a successful compilation or an error code
135 for failure.
136
137 regexec() returns zero for a successful match or REG_NOMATCH for fail‐
138 ure.
139
140 regerror() returns the size of the buffer required to hold the string.
141
143 The following errors can be returned by regcomp():
144
145 REG_BADBR
146 Invalid use of back reference operator.
147
148 REG_BADPAT
149 Invalid use of pattern operators such as group or list.
150
151 REG_BADRPT
152 Invalid use of repetition operators such as using '*' as the
153 first character.
154
155 REG_EBRACE
156 Un-matched brace interval operators.
157
158 REG_EBRACK
159 Un-matched bracket list operators.
160
161 REG_ECOLLATE
162 Invalid collating element.
163
164 REG_ECTYPE
165 Unknown character class name.
166
167 REG_EEND
168 Nonspecific error. This is not defined by POSIX.
169
170 REG_EESCAPE
171 Trailing backslash.
172
173 REG_EPAREN
174 Un-matched parenthesis group operators.
175
176 REG_ERANGE
177 Invalid use of the range operator; for example, the ending point
178 of the range occurs prior to the starting point.
179
180 REG_ESIZE
181 Compiled regular expression requires a pattern buffer larger
182 than 64 kB. This is not defined by POSIX.
183
184 REG_ESPACE
185 The regex routines ran out of memory.
186
187 REG_ESUBREG
188 Invalid back reference to a subexpression.
189
191 For an explanation of the terms used in this section, see at‐
192 tributes(7).
193
194 ┌─────────────────────────────────────┬───────────────┬────────────────┐
195 │Interface │ Attribute │ Value │
196 ├─────────────────────────────────────┼───────────────┼────────────────┤
197 │regcomp(), regexec() │ Thread safety │ MT-Safe locale │
198 ├─────────────────────────────────────┼───────────────┼────────────────┤
199 │regerror() │ Thread safety │ MT-Safe env │
200 ├─────────────────────────────────────┼───────────────┼────────────────┤
201 │regfree() │ Thread safety │ MT-Safe │
202 └─────────────────────────────────────┴───────────────┴────────────────┘
203
205 POSIX.1-2008.
206
208 POSIX.1-2001.
209
210 Prior to POSIX.1-2008, regoff_t was required to be capable of storing
211 the largest value that can be stored in either an off_t type or a
212 ssize_t type.
213
215 re_nsub is only required to be initialized if REG_NOSUB wasn't
216 specified, but all known implementations initialize it regardless.
217
218 Both regex_t and regmatch_t may (and do) have more members, in any
219 order. Always reference them by name.
220
222 #include <stdint.h>
223 #include <stdio.h>
224 #include <stdlib.h>
225 #include <regex.h>
226
227 #define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0]))
228
229 static const char *const str =
230 "1) John Driverhacker;\n2) John Doe;\n3) John Foo;\n";
231 static const char *const re = "John.*o";
232
233 int main(void)
234 {
235 static const char *s = str;
236 regex_t regex;
237 regmatch_t pmatch[1];
238 regoff_t off, len;
239
240 if (regcomp(®ex, re, REG_NEWLINE))
241 exit(EXIT_FAILURE);
242
243 printf("String = \"%s\"\n", str);
244 printf("Matches:\n");
245
246 for (unsigned int i = 0; ; i++) {
247 if (regexec(®ex, s, ARRAY_SIZE(pmatch), pmatch, 0))
248 break;
249
250 off = pmatch[0].rm_so + (s - str);
251 len = pmatch[0].rm_eo - pmatch[0].rm_so;
252 printf("#%zu:\n", i);
253 printf("offset = %jd; length = %jd\n", (intmax_t) off,
254 (intmax_t) len);
255 printf("substring = \"%.*s\"\n", len, s + pmatch[0].rm_so);
256
257 s += pmatch[0].rm_eo;
258 }
259
260 exit(EXIT_SUCCESS);
261 }
262
264 grep(1), regex(7)
265
266 The glibc manual section, Regular Expressions
267
268
269
270Linux man-pages 6.05 2023-07-20 regex(3)