1regex(3)                   Library Functions Manual                   regex(3)
2
3
4

NAME

6       regcomp, regexec, regerror, regfree - POSIX regex functions
7

LIBRARY

9       Standard C library (libc, -lc)
10

SYNOPSIS

12       #include <regex.h>
13
14       int regcomp(regex_t *restrict preg, const char *restrict regex,
15                   int cflags);
16       int regexec(const regex_t *restrict preg, const char *restrict string,
17                   size_t nmatch, regmatch_t pmatch[_Nullable restrict .nmatch],
18                   int eflags);
19
20       size_t regerror(int errcode, const regex_t *_Nullable restrict preg,
21                   char errbuf[_Nullable restrict .errbuf_size],
22                   size_t errbuf_size);
23       void regfree(regex_t *preg);
24
25       typedef struct {
26           size_t    re_nsub;
27       } regex_t;
28
29       typedef struct {
30           regoff_t  rm_so;
31           regoff_t  rm_eo;
32       } regmatch_t;
33
34       typedef /* ... */  regoff_t;
35

DESCRIPTION

37   Compilation
38       regcomp()  is  used to compile a regular expression into a form that is
39       suitable for subsequent regexec() searches.
40
41       On success, the pattern buffer at *preg is  initialized.   regex  is  a
42       null-terminated  string.   The  locale  must  be  the same when running
43       regexec().
44
45       After regcomp() succeeds, preg->re_nsub holds the number of  subexpres‐
46       sions in regex.  Thus, a value of preg->re_nsub + 1 passed as nmatch to
47       regexec() is sufficient to capture all matches.
48
49       cflags is the bitwise OR of zero or more of the following:
50
51       REG_EXTENDED
52              Use POSIX Extended Regular Expression syntax  when  interpreting
53              regex.   If  not  set,  POSIX Basic Regular Expression syntax is
54              used.
55
56       REG_ICASE
57              Do not differentiate case.  Subsequent regexec() searches  using
58              this pattern buffer will be case insensitive.
59
60       REG_NOSUB
61              Report only overall success.  regexec() will use only pmatch for
62              REG_STARTEND, ignoring nmatch.
63
64       REG_NEWLINE
65              Match-any-character operators don't match a newline.
66
67              A nonmatching list ([^...]) not containing a  newline  does  not
68              match a newline.
69
70              Match-beginning-of-line  operator  (^)  matches the empty string
71              immediately after a newline, regardless of whether  eflags,  the
72              execution flags of regexec(), contains REG_NOTBOL.
73
74              Match-end-of-line  operator ($) matches the empty string immedi‐
75              ately before a newline, regardless of  whether  eflags  contains
76              REG_NOTEOL.
77
78   Matching
79       regexec()  is  used  to match a null-terminated string against the com‐
80       piled pattern buffer in *preg, which must have  been  initialised  with
81       regexec().   eflags  is the bitwise OR of zero or more of the following
82       flags:
83
84       REG_NOTBOL
85              The match-beginning-of-line operator always fails to match  (but
86              see  the  compilation flag REG_NEWLINE above).  This flag may be
87              used when different portions of a string are passed to regexec()
88              and the beginning of the string should not be interpreted as the
89              beginning of the line.
90
91       REG_NOTEOL
92              The match-end-of-line operator always fails to  match  (but  see
93              the compilation flag REG_NEWLINE above).
94
95       REG_STARTEND
96              Match  [string  + pmatch[0].rm_so, string + pmatch[0].rm_eo) in‐
97              stead of [string, string + strlen(string)).  This allows  match‐
98              ing  embedded  NUL  bytes and avoids a strlen(3) on known-length
99              strings.  If any matches are returned (REG_NOSUB  wasn't  passed
100              to  regcomp(),  the match succeeded, and nmatch > 0), they over‐
101              write pmatch as usual, and the match offsets remain relative  to
102              string  (not  string + pmatch[0].rm_so).  This flag is a BSD ex‐
103              tension, not present in POSIX.
104
105   Match offsets
106       Unless REG_NOSUB was passed to regcomp(), it is possible to obtain  the
107       locations  of matches within string: regexec() fills nmatch elements of
108       pmatch  with  results:  pmatch[0]  corresponds  to  the  entire  match,
109       pmatch[1]  to the first subexpression, etc.  If there were more matches
110       than nmatch, they are discarded; if fewer, unused  elements  of  pmatch
111       are filled with -1s.
112
113       Each  returned  valid (non--1) match corresponds to the range [string +
114       rm_so, string + rm_eo).
115
116       regoff_t is a signed integer type capable of storing the largest  value
117       that can be stored in either an ptrdiff_t type or a ssize_t type.
118
119   Error reporting
120       regerror() is used to turn the error codes that can be returned by both
121       regcomp() and regexec() into error message strings.
122
123       If preg isn't a null pointer, errcode must be the latest error returned
124       from an operation on preg.
125
126       If  errbuf_size  isn't 0, up to errbuf_size bytes are copied to errbuf;
127       the error string is always null-terminated, and truncated to fit.
128
129   Freeing
130       regfree() deinitializes the pattern buffer at *preg, freeing any  asso‐
131       ciated memory; *preg must have been initialized via regcomp().
132

RETURN VALUE

134       regcomp()  returns  zero  for a successful compilation or an error code
135       for failure.
136
137       regexec() returns zero for a successful match or REG_NOMATCH for  fail‐
138       ure.
139
140       regerror() returns the size of the buffer required to hold the string.
141

ERRORS

143       The following errors can be returned by regcomp():
144
145       REG_BADBR
146              Invalid use of back reference operator.
147
148       REG_BADPAT
149              Invalid use of pattern operators such as group or list.
150
151       REG_BADRPT
152              Invalid  use  of  repetition  operators such as using '*' as the
153              first character.
154
155       REG_EBRACE
156              Un-matched brace interval operators.
157
158       REG_EBRACK
159              Un-matched bracket list operators.
160
161       REG_ECOLLATE
162              Invalid collating element.
163
164       REG_ECTYPE
165              Unknown character class name.
166
167       REG_EEND
168              Nonspecific error.  This is not defined by POSIX.
169
170       REG_EESCAPE
171              Trailing backslash.
172
173       REG_EPAREN
174              Un-matched parenthesis group operators.
175
176       REG_ERANGE
177              Invalid use of the range operator; for example, the ending point
178              of the range occurs prior to the starting point.
179
180       REG_ESIZE
181              Compiled  regular  expression  requires  a pattern buffer larger
182              than 64 kB.  This is not defined by POSIX.
183
184       REG_ESPACE
185              The regex routines ran out of memory.
186
187       REG_ESUBREG
188              Invalid back reference to a subexpression.
189

ATTRIBUTES

191       For an  explanation  of  the  terms  used  in  this  section,  see  at‐
192       tributes(7).
193
194       ┌─────────────────────────────────────┬───────────────┬────────────────┐
195Interface                            Attribute     Value          
196       ├─────────────────────────────────────┼───────────────┼────────────────┤
197regcomp(), regexec()                 │ Thread safety │ MT-Safe locale │
198       ├─────────────────────────────────────┼───────────────┼────────────────┤
199regerror()                           │ Thread safety │ MT-Safe env    │
200       ├─────────────────────────────────────┼───────────────┼────────────────┤
201regfree()                            │ Thread safety │ MT-Safe        │
202       └─────────────────────────────────────┴───────────────┴────────────────┘
203

STANDARDS

205       POSIX.1-2008.
206

HISTORY

208       POSIX.1-2001.
209
210       Prior  to  POSIX.1-2008, regoff_t was required to be capable of storing
211       the largest value that can be stored in  either  an  off_t  type  or  a
212       ssize_t type.
213

CAVEATS

215       re_nsub  is  only  required  to  be  initialized  if  REG_NOSUB  wasn't
216       specified, but all known implementations initialize it regardless.
217
218       Both regex_t and regmatch_t may (and do)  have  more  members,  in  any
219       order.  Always reference them by name.
220

EXAMPLES

222       #include <stdint.h>
223       #include <stdio.h>
224       #include <stdlib.h>
225       #include <regex.h>
226
227       #define ARRAY_SIZE(arr) (sizeof((arr)) / sizeof((arr)[0]))
228
229       static const char *const str =
230               "1) John Driverhacker;\n2) John Doe;\n3) John Foo;\n";
231       static const char *const re = "John.*o";
232
233       int main(void)
234       {
235           static const char *s = str;
236           regex_t     regex;
237           regmatch_t  pmatch[1];
238           regoff_t    off, len;
239
240           if (regcomp(&regex, re, REG_NEWLINE))
241               exit(EXIT_FAILURE);
242
243           printf("String = \"%s\"\n", str);
244           printf("Matches:\n");
245
246           for (unsigned int i = 0; ; i++) {
247               if (regexec(&regex, s, ARRAY_SIZE(pmatch), pmatch, 0))
248                   break;
249
250               off = pmatch[0].rm_so + (s - str);
251               len = pmatch[0].rm_eo - pmatch[0].rm_so;
252               printf("#%zu:\n", i);
253               printf("offset = %jd; length = %jd\n", (intmax_t) off,
254                       (intmax_t) len);
255               printf("substring = \"%.*s\"\n", len, s + pmatch[0].rm_so);
256
257               s += pmatch[0].rm_eo;
258           }
259
260           exit(EXIT_SUCCESS);
261       }
262

SEE ALSO

264       grep(1), regex(7)
265
266       The glibc manual section, Regular Expressions
267
268
269
270Linux man-pages 6.05              2023-07-20                          regex(3)
Impressum