1PCRE2API(3)                Library Functions Manual                PCRE2API(3)
2
3
4

NAME

6       PCRE2 - Perl-compatible regular expressions (revised API)
7
8       #include <pcre2.h>
9
10       PCRE2  is  a  new API for PCRE, starting at release 10.0. This document
11       contains a description of all its native functions. See the pcre2 docu‐
12       ment for an overview of all the PCRE2 documentation.
13

PCRE2 NATIVE API BASIC FUNCTIONS

15
16       pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
17         uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
18         pcre2_compile_context *ccontext);
19
20       void pcre2_code_free(pcre2_code *code);
21
22       pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize,
23         pcre2_general_context *gcontext);
24
25       pcre2_match_data *pcre2_match_data_create_from_pattern(
26         const pcre2_code *code, pcre2_general_context *gcontext);
27
28       int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
29         PCRE2_SIZE length, PCRE2_SIZE startoffset,
30         uint32_t options, pcre2_match_data *match_data,
31         pcre2_match_context *mcontext);
32
33       int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject,
34         PCRE2_SIZE length, PCRE2_SIZE startoffset,
35         uint32_t options, pcre2_match_data *match_data,
36         pcre2_match_context *mcontext,
37         int *workspace, PCRE2_SIZE wscount);
38
39       void pcre2_match_data_free(pcre2_match_data *match_data);
40

PCRE2 NATIVE API AUXILIARY MATCH FUNCTIONS

42
43       PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data);
44
45       uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data);
46
47       PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
48
49       PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);
50

PCRE2 NATIVE API GENERAL CONTEXT FUNCTIONS

52
53       pcre2_general_context *pcre2_general_context_create(
54         void *(*private_malloc)(PCRE2_SIZE, void *),
55         void (*private_free)(void *, void *), void *memory_data);
56
57       pcre2_general_context *pcre2_general_context_copy(
58         pcre2_general_context *gcontext);
59
60       void pcre2_general_context_free(pcre2_general_context *gcontext);
61

PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS

63
64       pcre2_compile_context *pcre2_compile_context_create(
65         pcre2_general_context *gcontext);
66
67       pcre2_compile_context *pcre2_compile_context_copy(
68         pcre2_compile_context *ccontext);
69
70       void pcre2_compile_context_free(pcre2_compile_context *ccontext);
71
72       int pcre2_set_bsr(pcre2_compile_context *ccontext,
73         uint32_t value);
74
75       int pcre2_set_character_tables(pcre2_compile_context *ccontext,
76         const uint8_t *tables);
77
78       int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext,
79         uint32_t extra_options);
80
81       int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext,
82         PCRE2_SIZE value);
83
84       int pcre2_set_newline(pcre2_compile_context *ccontext,
85         uint32_t value);
86
87       int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
88         uint32_t value);
89
90       int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
91         int (*guard_function)(uint32_t, void *), void *user_data);
92

PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS

94
95       pcre2_match_context *pcre2_match_context_create(
96         pcre2_general_context *gcontext);
97
98       pcre2_match_context *pcre2_match_context_copy(
99         pcre2_match_context *mcontext);
100
101       void pcre2_match_context_free(pcre2_match_context *mcontext);
102
103       int pcre2_set_callout(pcre2_match_context *mcontext,
104         int (*callout_function)(pcre2_callout_block *, void *),
105         void *callout_data);
106
107       int pcre2_set_substitute_callout(pcre2_match_context *mcontext,
108         int (*callout_function)(pcre2_substitute_callout_block *, void *),
109         void *callout_data);
110
111       int pcre2_set_offset_limit(pcre2_match_context *mcontext,
112         PCRE2_SIZE value);
113
114       int pcre2_set_heap_limit(pcre2_match_context *mcontext,
115         uint32_t value);
116
117       int pcre2_set_match_limit(pcre2_match_context *mcontext,
118         uint32_t value);
119
120       int pcre2_set_depth_limit(pcre2_match_context *mcontext,
121         uint32_t value);
122

PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS

124
125       int pcre2_substring_copy_byname(pcre2_match_data *match_data,
126         PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen);
127
128       int pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
129         uint32_t number, PCRE2_UCHAR *buffer,
130         PCRE2_SIZE *bufflen);
131
132       void pcre2_substring_free(PCRE2_UCHAR *buffer);
133
134       int pcre2_substring_get_byname(pcre2_match_data *match_data,
135         PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen);
136
137       int pcre2_substring_get_bynumber(pcre2_match_data *match_data,
138         uint32_t number, PCRE2_UCHAR **bufferptr,
139         PCRE2_SIZE *bufflen);
140
141       int pcre2_substring_length_byname(pcre2_match_data *match_data,
142         PCRE2_SPTR name, PCRE2_SIZE *length);
143
144       int pcre2_substring_length_bynumber(pcre2_match_data *match_data,
145         uint32_t number, PCRE2_SIZE *length);
146
147       int pcre2_substring_nametable_scan(const pcre2_code *code,
148         PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
149
150       int pcre2_substring_number_from_name(const pcre2_code *code,
151         PCRE2_SPTR name);
152
153       void pcre2_substring_list_free(PCRE2_SPTR *list);
154
155       int pcre2_substring_list_get(pcre2_match_data *match_data,
156         PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr);
157

PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION

159
160       int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
161         PCRE2_SIZE length, PCRE2_SIZE startoffset,
162         uint32_t options, pcre2_match_data *match_data,
163         pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP,
164         PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer,
165         PCRE2_SIZE *outlengthptr);
166

PCRE2 NATIVE API JIT FUNCTIONS

168
169       int pcre2_jit_compile(pcre2_code *code, uint32_t options);
170
171       int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
172         PCRE2_SIZE length, PCRE2_SIZE startoffset,
173         uint32_t options, pcre2_match_data *match_data,
174         pcre2_match_context *mcontext);
175
176       void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
177
178       pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE startsize,
179         PCRE2_SIZE maxsize, pcre2_general_context *gcontext);
180
181       void pcre2_jit_stack_assign(pcre2_match_context *mcontext,
182         pcre2_jit_callback callback_function, void *callback_data);
183
184       void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
185

PCRE2 NATIVE API SERIALIZATION FUNCTIONS

187
188       int32_t pcre2_serialize_decode(pcre2_code **codes,
189         int32_t number_of_codes, const uint8_t *bytes,
190         pcre2_general_context *gcontext);
191
192       int32_t pcre2_serialize_encode(const pcre2_code **codes,
193         int32_t number_of_codes, uint8_t **serialized_bytes,
194         PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext);
195
196       void pcre2_serialize_free(uint8_t *bytes);
197
198       int32_t pcre2_serialize_get_number_of_codes(const uint8_t *bytes);
199

PCRE2 NATIVE API AUXILIARY FUNCTIONS

201
202       pcre2_code *pcre2_code_copy(const pcre2_code *code);
203
204       pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code);
205
206       int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer,
207         PCRE2_SIZE bufflen);
208
209       const uint8_t *pcre2_maketables(pcre2_general_context *gcontext);
210
211       void pcre2_maketables_free(pcre2_general_context *gcontext,
212         const uint8_t *tables);
213
214       int pcre2_pattern_info(const pcre2_code *code, uint32_t what,
215         void *where);
216
217       int pcre2_callout_enumerate(const pcre2_code *code,
218         int (*callback)(pcre2_callout_enumerate_block *, void *),
219         void *user_data);
220
221       int pcre2_config(uint32_t what, void *where);
222

PCRE2 NATIVE API OBSOLETE FUNCTIONS

224
225       int pcre2_set_recursion_limit(pcre2_match_context *mcontext,
226         uint32_t value);
227
228       int pcre2_set_recursion_memory_management(
229         pcre2_match_context *mcontext,
230         void *(*private_malloc)(PCRE2_SIZE, void *),
231         void (*private_free)(void *, void *), void *memory_data);
232
233       These  functions became obsolete at release 10.30 and are retained only
234       for backward compatibility. They should not be used in  new  code.  The
235       first  is  replaced by pcre2_set_depth_limit(); the second is no longer
236       needed and has no effect (it always returns zero).
237

PCRE2 EXPERIMENTAL PATTERN CONVERSION FUNCTIONS

239
240       pcre2_convert_context *pcre2_convert_context_create(
241         pcre2_general_context *gcontext);
242
243       pcre2_convert_context *pcre2_convert_context_copy(
244         pcre2_convert_context *cvcontext);
245
246       void pcre2_convert_context_free(pcre2_convert_context *cvcontext);
247
248       int pcre2_set_glob_escape(pcre2_convert_context *cvcontext,
249         uint32_t escape_char);
250
251       int pcre2_set_glob_separator(pcre2_convert_context *cvcontext,
252         uint32_t separator_char);
253
254       int pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE length,
255         uint32_t options, PCRE2_UCHAR **buffer,
256         PCRE2_SIZE *blength, pcre2_convert_context *cvcontext);
257
258       void pcre2_converted_pattern_free(PCRE2_UCHAR *converted_pattern);
259
260       These functions provide a way of  converting  non-PCRE2  patterns  into
261       patterns  that  can  be  processed by pcre2_compile(). This facility is
262       experimental and may be changed in future releases. At present, "globs"
263       and  POSIX  basic  and  extended patterns can be converted. Details are
264       given in the pcre2convert documentation.
265

PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES

267
268       There are three PCRE2 libraries, supporting 8-bit, 16-bit,  and  32-bit
269       code  units,  respectively.  However,  there  is  just one header file,
270       pcre2.h.  This contains the function prototypes and  other  definitions
271       for all three libraries. One, two, or all three can be installed simul‐
272       taneously. On Unix-like systems the libraries  are  called  libpcre2-8,
273       libpcre2-16, and libpcre2-32, and they can also co-exist with the orig‐
274       inal PCRE libraries.
275
276       Character strings are passed to and from a PCRE2 library as a  sequence
277       of  unsigned  integers  in  code  units of the appropriate width. Every
278       PCRE2 function comes in three different forms, one  for  each  library,
279       for example:
280
281         pcre2_compile_8()
282         pcre2_compile_16()
283         pcre2_compile_32()
284
285       There are also three different sets of data types:
286
287         PCRE2_UCHAR8, PCRE2_UCHAR16, PCRE2_UCHAR32
288         PCRE2_SPTR8,  PCRE2_SPTR16,  PCRE2_SPTR32
289
290       The  UCHAR  types define unsigned code units of the appropriate widths.
291       For example, PCRE2_UCHAR16 is usually defined as `uint16_t'.  The  SPTR
292       types  are  constant  pointers  to the equivalent UCHAR types, that is,
293       they are pointers to vectors of unsigned code units.
294
295       Many applications use only one code unit width. For their  convenience,
296       macros are defined whose names are the generic forms such as pcre2_com‐
297       pile() and  PCRE2_SPTR.  These  macros  use  the  value  of  the  macro
298       PCRE2_CODE_UNIT_WIDTH  to generate the appropriate width-specific func‐
299       tion and macro names.  PCRE2_CODE_UNIT_WIDTH is not defined by default.
300       An  application  must  define  it  to  be 8, 16, or 32 before including
301       pcre2.h in order to make use of the generic names.
302
303       Applications that use more than one code unit width can be linked  with
304       more  than  one PCRE2 library, but must define PCRE2_CODE_UNIT_WIDTH to
305       be 0 before including pcre2.h, and then use the  real  function  names.
306       Any  code  that  is to be included in an environment where the value of
307       PCRE2_CODE_UNIT_WIDTH is unknown should  also  use  the  real  function
308       names. (Unfortunately, it is not possible in C code to save and restore
309       the value of a macro.)
310
311       If PCRE2_CODE_UNIT_WIDTH is not defined  before  including  pcre2.h,  a
312       compiler error occurs.
313
314       When  using  multiple  libraries  in an application, you must take care
315       when processing any particular pattern to use  only  functions  from  a
316       single  library.   For example, if you want to run a match using a pat‐
317       tern that was compiled with pcre2_compile_16(), you  must  do  so  with
318       pcre2_match_16(), not pcre2_match_8() or pcre2_match_32().
319
320       In  the  function summaries above, and in the rest of this document and
321       other PCRE2 documents, functions and data  types  are  described  using
322       their generic names, without the _8, _16, or _32 suffix.
323

PCRE2 API OVERVIEW

325
326       PCRE2  has  its  own  native  API, which is described in this document.
327       There are also some wrapper functions for the 8-bit library that corre‐
328       spond  to the POSIX regular expression API, but they do not give access
329       to all the functionality of PCRE2. They are described in the pcre2posix
330       documentation. Both these APIs define a set of C function calls.
331
332       The  native  API  C data types, function prototypes, option values, and
333       error codes are defined in the header file pcre2.h, which also contains
334       definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release
335       numbers for the library. Applications can use these to include  support
336       for different releases of PCRE2.
337
338       In a Windows environment, if you want to statically link an application
339       program against a non-dll PCRE2 library, you must  define  PCRE2_STATIC
340       before including pcre2.h.
341
342       The  functions pcre2_compile() and pcre2_match() are used for compiling
343       and matching regular expressions in a Perl-compatible manner. A  sample
344       program that demonstrates the simplest way of using them is provided in
345       the file called pcre2demo.c in the PCRE2 source distribution. A listing
346       of  this  program  is  given  in  the  pcre2demo documentation, and the
347       pcre2sample documentation describes how to compile and run it.
348
349       The compiling and matching functions recognize various options that are
350       passed as bits in an options argument. There are also some more compli‐
351       cated  parameters  such  as  custom  memory  management  functions  and
352       resource  limits  that  are passed in "contexts" (which are just memory
353       blocks, described below). Simple applications do not need to  make  use
354       of contexts.
355
356       Just-in-time  (JIT)  compiler  support  is an optional feature of PCRE2
357       that can be built in  appropriate  hardware  environments.  It  greatly
358       speeds  up  the  matching  performance  of  many patterns. Programs can
359       request that it be used if  available  by  calling  pcre2_jit_compile()
360       after a pattern has been successfully compiled by pcre2_compile(). This
361       does nothing if JIT support is not available.
362
363       More complicated programs might need to  make  use  of  the  specialist
364       functions    pcre2_jit_stack_create(),    pcre2_jit_stack_free(),   and
365       pcre2_jit_stack_assign() in order to  control  the  JIT  code's  memory
366       usage.
367
368       JIT matching is automatically used by pcre2_match() if it is available,
369       unless the PCRE2_NO_JIT option is set. There is also a direct interface
370       for  JIT  matching,  which gives improved performance at the expense of
371       less sanity checking. The JIT-specific functions are discussed  in  the
372       pcre2jit documentation.
373
374       A  second  matching function, pcre2_dfa_match(), which is not Perl-com‐
375       patible, is also provided. This uses  a  different  algorithm  for  the
376       matching.  The  alternative  algorithm finds all possible matches (at a
377       given point in the subject), and scans the subject  just  once  (unless
378       there  are  lookaround  assertions).  However,  this algorithm does not
379       return captured substrings. A description of  the  two  matching  algo‐
380       rithms   and  their  advantages  and  disadvantages  is  given  in  the
381       pcre2matching   documentation.   There   is   no   JIT   support    for
382       pcre2_dfa_match().
383
384       In  addition  to  the  main compiling and matching functions, there are
385       convenience functions for extracting captured substrings from a subject
386       string that has been matched by pcre2_match(). They are:
387
388         pcre2_substring_copy_byname()
389         pcre2_substring_copy_bynumber()
390         pcre2_substring_get_byname()
391         pcre2_substring_get_bynumber()
392         pcre2_substring_list_get()
393         pcre2_substring_length_byname()
394         pcre2_substring_length_bynumber()
395         pcre2_substring_nametable_scan()
396         pcre2_substring_number_from_name()
397
398       pcre2_substring_free()  and  pcre2_substring_list_free()  are also pro‐
399       vided, to free memory used for extracted strings. If  either  of  these
400       functions  is called with a NULL argument, the function returns immedi‐
401       ately without doing anything.
402
403       The function pcre2_substitute() can be called to match  a  pattern  and
404       return  a  copy of the subject string with substitutions for parts that
405       were matched.
406
407       Functions whose names begin with pcre2_serialize_ are used  for  saving
408       compiled patterns on disc or elsewhere, and reloading them later.
409
410       Finally,  there  are functions for finding out information about a com‐
411       piled pattern (pcre2_pattern_info()) and about the  configuration  with
412       which PCRE2 was built (pcre2_config()).
413
414       Functions  with  names  ending with _free() are used for freeing memory
415       blocks of various sorts. In all cases, if one  of  these  functions  is
416       called with a NULL argument, it does nothing.
417

STRING LENGTHS AND OFFSETS

419
420       The  PCRE2  API  uses  string  lengths and offsets into strings of code
421       units in several places. These values are always  of  type  PCRE2_SIZE,
422       which  is an unsigned integer type, currently always defined as size_t.
423       The largest  value  that  can  be  stored  in  such  a  type  (that  is
424       ~(PCRE2_SIZE)0)  is reserved as a special indicator for zero-terminated
425       strings and unset offsets.  Therefore, the longest string that  can  be
426       handled is one less than this maximum.
427

NEWLINES

429
430       PCRE2 supports five different conventions for indicating line breaks in
431       strings: a single CR (carriage return) character, a  single  LF  (line‐
432       feed) character, the two-character sequence CRLF, any of the three pre‐
433       ceding, or any Unicode newline sequence. The Unicode newline  sequences
434       are  the  three just mentioned, plus the single characters VT (vertical
435       tab, U+000B), FF (form feed, U+000C), NEL (next line, U+0085), LS (line
436       separator, U+2028), and PS (paragraph separator, U+2029).
437
438       Each  of  the first three conventions is used by at least one operating
439       system as its standard newline sequence. When PCRE2 is built, a default
440       can be specified.  If it is not, the default is set to LF, which is the
441       Unix standard. However, the newline convention can  be  changed  by  an
442       application  when  calling  pcre2_compile(),  or it can be specified by
443       special text at the start of the pattern  itself;  this  overrides  any
444       other  settings.  See  the pcre2pattern page for details of the special
445       character sequences.
446
447       In the PCRE2 documentation the word "newline"  is  used  to  mean  "the
448       character or pair of characters that indicate a line break". The choice
449       of newline convention affects the handling of the dot, circumflex,  and
450       dollar metacharacters, the handling of #-comments in /x mode, and, when
451       CRLF is a recognized line ending sequence, the match position  advance‐
452       ment for a non-anchored pattern. There is more detail about this in the
453       section on pcre2_match() options below.
454
455       The choice of newline convention does not affect the interpretation  of
456       the \n or \r escape sequences, nor does it affect what \R matches; this
457       has its own separate convention.
458

MULTITHREADING

460
461       In a multithreaded application it is important to keep  thread-specific
462       data  separate  from data that can be shared between threads. The PCRE2
463       library code itself is thread-safe: it contains  no  static  or  global
464       variables.  The  API  is  designed to be fairly simple for non-threaded
465       applications while at the same time ensuring that multithreaded  appli‐
466       cations can use it.
467
468       There are several different blocks of data that are used to pass infor‐
469       mation between the application and the PCRE2 libraries.
470
471   The compiled pattern
472
473       A pointer to the compiled form of a pattern is  returned  to  the  user
474       when pcre2_compile() is successful. The data in the compiled pattern is
475       fixed, and does not change when the pattern is matched.  Therefore,  it
476       is  thread-safe, that is, the same compiled pattern can be used by more
477       than one thread simultaneously. For example, an application can compile
478       all its patterns at the start, before forking off multiple threads that
479       use them. However, if the just-in-time (JIT)  optimization  feature  is
480       being  used,  it needs separate memory stack areas for each thread. See
481       the pcre2jit documentation for more details.
482
483       In a more complicated situation, where patterns are compiled only  when
484       they  are  first needed, but are still shared between threads, pointers
485       to compiled patterns must be protected  from  simultaneous  writing  by
486       multiple threads, at least until a pattern has been compiled. The logic
487       can be something like this:
488
489         Get a read-only (shared) lock (mutex) for pointer
490         if (pointer == NULL)
491           {
492           Get a write (unique) lock for pointer
493           pointer = pcre2_compile(...
494           }
495         Release the lock
496         Use pointer in pcre2_match()
497
498       Of course, testing for compilation errors should also  be  included  in
499       the code.
500
501       If JIT is being used, but the JIT compilation is not being done immedi‐
502       ately, (perhaps waiting to see if the pattern  is  used  often  enough)
503       similar logic is required. JIT compilation updates a pointer within the
504       compiled code block, so a thread must gain unique write access  to  the
505       pointer     before    calling    pcre2_jit_compile().    Alternatively,
506       pcre2_code_copy()  or  pcre2_code_copy_with_tables()  can  be  used  to
507       obtain  a private copy of the compiled code before calling the JIT com‐
508       piler.
509
510   Context blocks
511
512       The next main section below introduces the idea of "contexts" in  which
513       PCRE2 functions are called. A context is nothing more than a collection
514       of parameters that control the way PCRE2 operates. Grouping a number of
515       parameters together in a context is a convenient way of passing them to
516       a PCRE2 function without using lots of arguments. The  parameters  that
517       are  stored  in  contexts  are in some sense "advanced features" of the
518       API. Many straightforward applications will not need to use contexts.
519
520       In a multithreaded application, if the parameters in a context are val‐
521       ues  that  are  never  changed, the same context can be used by all the
522       threads. However, if any thread needs to change any value in a context,
523       it must make its own thread-specific copy.
524
525   Match blocks
526
527       The  matching  functions need a block of memory for storing the results
528       of a match. This includes details of what was matched, as well as addi‐
529       tional  information  such as the name of a (*MARK) setting. Each thread
530       must provide its own copy of this memory.
531

PCRE2 CONTEXTS

533
534       Some PCRE2 functions have a lot of parameters, many of which  are  used
535       only  by  specialist  applications,  for example, those that use custom
536       memory management or non-standard character tables.  To  keep  function
537       argument  lists  at a reasonable size, and at the same time to keep the
538       API extensible, "uncommon" parameters are passed to  certain  functions
539       in  a  context instead of directly. A context is just a block of memory
540       that holds the parameter values.  Applications  that  do  not  need  to
541       adjust  any  of  the  context  parameters  can pass NULL when a context
542       pointer is required.
543
544       There are three different types of context: a general context  that  is
545       relevant  for  several  PCRE2 operations, a compile-time context, and a
546       match-time context.
547
548   The general context
549
550       At present, this context just  contains  pointers  to  (and  data  for)
551       external  memory  management  functions  that  are  called from several
552       places in the PCRE2 library. The context is named `general' rather than
553       specifically  `memory'  because in future other fields may be added. If
554       you do not want to supply your own custom memory management  functions,
555       you  do not need to bother with a general context. A general context is
556       created by:
557
558       pcre2_general_context *pcre2_general_context_create(
559         void *(*private_malloc)(PCRE2_SIZE, void *),
560         void (*private_free)(void *, void *), void *memory_data);
561
562       The two function pointers specify custom memory  management  functions,
563       whose prototypes are:
564
565         void *private_malloc(PCRE2_SIZE, void *);
566         void  private_free(void *, void *);
567
568       Whenever code in PCRE2 calls these functions, the final argument is the
569       value of memory_data. Either of the first two arguments of the creation
570       function  may be NULL, in which case the system memory management func‐
571       tions malloc() and free() are used. (This is not currently  useful,  as
572       there  are  no  other  fields in a general context, but in future there
573       might be.)  The private_malloc() function  is  used  (if  supplied)  to
574       obtain  memory  for storing the context, and all three values are saved
575       as part of the context.
576
577       Whenever PCRE2 creates a data block of any kind, the block  contains  a
578       pointer  to the free() function that matches the malloc() function that
579       was used. When the time comes to  free  the  block,  this  function  is
580       called.
581
582       A general context can be copied by calling:
583
584       pcre2_general_context *pcre2_general_context_copy(
585         pcre2_general_context *gcontext);
586
587       The memory used for a general context should be freed by calling:
588
589       void pcre2_general_context_free(pcre2_general_context *gcontext);
590
591       If  this  function  is  passed  a NULL argument, it returns immediately
592       without doing anything.
593
594   The compile context
595
596       A compile context is required if you want to provide an external  func‐
597       tion  for  stack  checking  during compilation or to change the default
598       values of any of the following compile-time parameters:
599
600         What \R matches (Unicode newlines or CR, LF, CRLF only)
601         PCRE2's character tables
602         The newline character sequence
603         The compile time nested parentheses limit
604         The maximum length of the pattern string
605         The extra options bits (none set by default)
606
607       A compile context is also required if you are using custom memory  man‐
608       agement.   If  none of these apply, just pass NULL as the context argu‐
609       ment of pcre2_compile().
610
611       A compile context is created, copied, and freed by the following  func‐
612       tions:
613
614       pcre2_compile_context *pcre2_compile_context_create(
615         pcre2_general_context *gcontext);
616
617       pcre2_compile_context *pcre2_compile_context_copy(
618         pcre2_compile_context *ccontext);
619
620       void pcre2_compile_context_free(pcre2_compile_context *ccontext);
621
622       A  compile  context  is created with default values for its parameters.
623       These can be changed by calling the following functions, which return 0
624       on success, or PCRE2_ERROR_BADDATA if invalid data is detected.
625
626       int pcre2_set_bsr(pcre2_compile_context *ccontext,
627         uint32_t value);
628
629       The  value  must  be PCRE2_BSR_ANYCRLF, to specify that \R matches only
630       CR, LF, or CRLF, or PCRE2_BSR_UNICODE, to specify that \R  matches  any
631       Unicode line ending sequence. The value is used by the JIT compiler and
632       by  the  two  interpreted   matching   functions,   pcre2_match()   and
633       pcre2_dfa_match().
634
635       int pcre2_set_character_tables(pcre2_compile_context *ccontext,
636         const uint8_t *tables);
637
638       The  value  must  be  the result of a call to pcre2_maketables(), whose
639       only argument is a general context. This function builds a set of char‐
640       acter tables in the current locale.
641
642       int pcre2_set_compile_extra_options(pcre2_compile_context *ccontext,
643         uint32_t extra_options);
644
645       As  PCRE2  has developed, almost all the 32 option bits that are avail‐
646       able in the options argument of pcre2_compile() have been used  up.  To
647       avoid  running  out, the compile context contains a set of extra option
648       bits which are used for some newer, assumed rarer, options. This  func‐
649       tion  sets  those bits. It always sets all the bits (either on or off).
650       It does not modify any existing  setting.  The  available  options  are
651       defined in the section entitled "Extra compile options" below.
652
653       int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext,
654         PCRE2_SIZE value);
655
656       This  sets a maximum length, in code units, for any pattern string that
657       is compiled with this context. If the pattern is longer,  an  error  is
658       generated.   This facility is provided so that applications that accept
659       patterns from external sources can limit their size. The default is the
660       largest  number  that  a  PCRE2_SIZE variable can hold, which is effec‐
661       tively unlimited.
662
663       int pcre2_set_newline(pcre2_compile_context *ccontext,
664         uint32_t value);
665
666       This specifies which characters or character sequences are to be recog‐
667       nized  as newlines. The value must be one of PCRE2_NEWLINE_CR (carriage
668       return only), PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the
669       two-character  sequence  CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any
670       of the above), PCRE2_NEWLINE_ANY (any  Unicode  newline  sequence),  or
671       PCRE2_NEWLINE_NUL (the NUL character, that is a binary zero).
672
673       A pattern can override the value set in the compile context by starting
674       with a sequence such as (*CRLF). See the pcre2pattern page for details.
675
676       When   a   pattern   is   compiled   with   the    PCRE2_EXTENDED    or
677       PCRE2_EXTENDED_MORE option, the newline convention affects the recogni‐
678       tion of the end of internal comments starting  with  #.  The  value  is
679       saved  with the compiled pattern for subsequent use by the JIT compiler
680       and by  the  two  interpreted  matching  functions,  pcre2_match()  and
681       pcre2_dfa_match().
682
683       int pcre2_set_parens_nest_limit(pcre2_compile_context *ccontext,
684         uint32_t value);
685
686       This  parameter  adjusts  the  limit,  set when PCRE2 is built (default
687       250), on the depth of parenthesis nesting  in  a  pattern.  This  limit
688       stops  rogue  patterns  using  up too much system stack when being com‐
689       piled. The limit applies to parentheses of all kinds, not just  captur‐
690       ing parentheses.
691
692       int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
693         int (*guard_function)(uint32_t, void *), void *user_data);
694
695       There  is at least one application that runs PCRE2 in threads with very
696       limited system stack, where running out of stack is to  be  avoided  at
697       all  costs. The parenthesis limit above cannot take account of how much
698       stack is actually available during compilation. For  a  finer  control,
699       you  can  supply  a  function  that  is called whenever pcre2_compile()
700       starts to compile a parenthesized part of a pattern. This function  can
701       check  the  actual  stack  size  (or anything else that it wants to, of
702       course).
703
704       The first argument to the callout function gives the current  depth  of
705       nesting,  and  the second is user data that is set up by the last argu‐
706       ment  of  pcre2_set_compile_recursion_guard().  The  callout   function
707       should return zero if all is well, or non-zero to force an error.
708
709   The match context
710
711       A match context is required if you want to:
712
713         Set up a callout function
714         Set an offset limit for matching an unanchored pattern
715         Change the limit on the amount of heap used when matching
716         Change the backtracking match limit
717         Change the backtracking depth limit
718         Set custom memory management specifically for the match
719
720       If  none  of  these  apply,  just  pass NULL as the context argument of
721       pcre2_match(), pcre2_dfa_match(), or pcre2_jit_match().
722
723       A match context is created, copied, and freed by  the  following  func‐
724       tions:
725
726       pcre2_match_context *pcre2_match_context_create(
727         pcre2_general_context *gcontext);
728
729       pcre2_match_context *pcre2_match_context_copy(
730         pcre2_match_context *mcontext);
731
732       void pcre2_match_context_free(pcre2_match_context *mcontext);
733
734       A  match  context  is  created  with default values for its parameters.
735       These can be changed by calling the following functions, which return 0
736       on success, or PCRE2_ERROR_BADDATA if invalid data is detected.
737
738       int pcre2_set_callout(pcre2_match_context *mcontext,
739         int (*callout_function)(pcre2_callout_block *, void *),
740         void *callout_data);
741
742       This  sets  up a callout function for PCRE2 to call at specified points
743       during a matching operation. Details are given in the pcre2callout doc‐
744       umentation.
745
746       int pcre2_set_substitute_callout(pcre2_match_context *mcontext,
747         int (*callout_function)(pcre2_substitute_callout_block *, void *),
748         void *callout_data);
749
750       This  sets up a callout function for PCRE2 to call after each substitu‐
751       tion made by pcre2_substitute(). Details are given in the section enti‐
752       tled "Creating a new string with substitutions" below.
753
754       int pcre2_set_offset_limit(pcre2_match_context *mcontext,
755         PCRE2_SIZE value);
756
757       The  offset_limit  parameter  limits  how  far an unanchored search can
758       advance in the subject string. The default value  is  PCRE2_UNSET.  The
759       pcre2_match()      and      pcre2_dfa_match()      functions     return
760       PCRE2_ERROR_NOMATCH if a match with a starting point before or  at  the
761       given  offset  is  not  found. The pcre2_substitute() function makes no
762       more substitutions.
763
764       For example, if the pattern /abc/ is matched against "123abc"  with  an
765       offset  limit  less  than 3, the result is PCRE2_ERROR_NOMATCH. A match
766       can never be  found  if  the  startoffset  argument  of  pcre2_match(),
767       pcre2_dfa_match(),  or  pcre2_substitute()  is  greater than the offset
768       limit set in the match context.
769
770       When using this  facility,  you  must  set  the  PCRE2_USE_OFFSET_LIMIT
771       option when calling pcre2_compile() so that when JIT is in use, differ‐
772       ent code can be compiled. If a match  is  started  with  a  non-default
773       match  limit when PCRE2_USE_OFFSET_LIMIT is not set, an error is gener‐
774       ated.
775
776       The offset limit facility can be used to track progress when  searching
777       large  subject  strings or to limit the extent of global substitutions.
778       See also the PCRE2_FIRSTLINE option, which requires a  match  to  start
779       before  or  at  the first newline that follows the start of matching in
780       the subject. If this is set with an offset limit, a match must occur in
781       the first line and also within the offset limit. In other words, which‐
782       ever limit comes first is used.
783
784       int pcre2_set_heap_limit(pcre2_match_context *mcontext,
785         uint32_t value);
786
787       The heap_limit parameter specifies, in units of kibibytes (1024 bytes),
788       the  maximum  amount  of heap memory that pcre2_match() may use to hold
789       backtracking information when running an interpretive match. This limit
790       also applies to pcre2_dfa_match(), which may use the heap when process‐
791       ing patterns with a lot of nested pattern recursion or  lookarounds  or
792       atomic groups. This limit does not apply to matching with the JIT opti‐
793       mization, which has  its  own  memory  control  arrangements  (see  the
794       pcre2jit  documentation for more details). If the limit is reached, the
795       negative error code  PCRE2_ERROR_HEAPLIMIT  is  returned.  The  default
796       limit  can be set when PCRE2 is built; if it is not, the default is set
797       very large and is essentially "unlimited".
798
799       A value for the heap limit may also be supplied by an item at the start
800       of a pattern of the form
801
802         (*LIMIT_HEAP=ddd)
803
804       where  ddd  is  a  decimal  number.  However, such a setting is ignored
805       unless ddd is less than the limit set by the  caller  of  pcre2_match()
806       or, if no such limit is set, less than the default.
807
808       The  pcre2_match() function starts out using a 20KiB vector on the sys‐
809       tem stack for recording backtracking points. The more nested backtrack‐
810       ing  points  there  are (that is, the deeper the search tree), the more
811       memory is needed.  Heap memory is used only if the  initial  vector  is
812       too small. If the heap limit is set to a value less than 21 (in partic‐
813       ular, zero) no heap memory will be used. In this  case,  only  patterns
814       that  do not have a lot of nested backtracking can be successfully pro‐
815       cessed.
816
817       Similarly, for pcre2_dfa_match(), a vector on the system stack is  used
818       when  processing pattern recursions, lookarounds, or atomic groups, and
819       only if this is not big enough is heap memory used. In this case,  too,
820       setting a value of zero disables the use of the heap.
821
822       int pcre2_set_match_limit(pcre2_match_context *mcontext,
823         uint32_t value);
824
825       The  match_limit  parameter  provides  a means of preventing PCRE2 from
826       using up too many computing resources when processing patterns that are
827       not going to match, but which have a very large number of possibilities
828       in their search trees. The classic  example  is  a  pattern  that  uses
829       nested unlimited repeats.
830
831       There  is an internal counter in pcre2_match() that is incremented each
832       time round its main matching loop. If  this  value  reaches  the  match
833       limit, pcre2_match() returns the negative value PCRE2_ERROR_MATCHLIMIT.
834       This has the effect of limiting the amount  of  backtracking  that  can
835       take place. For patterns that are not anchored, the count restarts from
836       zero for each position in the subject string. This limit  also  applies
837       to pcre2_dfa_match(), though the counting is done in a different way.
838
839       When  pcre2_match() is called with a pattern that was successfully pro‐
840       cessed by pcre2_jit_compile(), the way in which matching is executed is
841       entirely  different. However, there is still the possibility of runaway
842       matching that goes on for a very long  time,  and  so  the  match_limit
843       value  is  also used in this case (but in a different way) to limit how
844       long the matching can continue.
845
846       The default value for the limit can be set when  PCRE2  is  built;  the
847       default  default  is 10 million, which handles all but the most extreme
848       cases. A value for the match limit may also be supplied by an  item  at
849       the start of a pattern of the form
850
851         (*LIMIT_MATCH=ddd)
852
853       where  ddd  is  a  decimal  number.  However, such a setting is ignored
854       unless ddd is less than the limit set by the caller of pcre2_match() or
855       pcre2_dfa_match() or, if no such limit is set, less than the default.
856
857       int pcre2_set_depth_limit(pcre2_match_context *mcontext,
858         uint32_t value);
859
860       This   parameter   limits   the   depth   of   nested  backtracking  in
861       pcre2_match().  Each time a nested backtracking point is passed, a  new
862       memory "frame" is used to remember the state of matching at that point.
863       Thus, this parameter indirectly limits the amount  of  memory  that  is
864       used  in  a  match.  However,  because  the size of each memory "frame"
865       depends on the number of capturing parentheses, the actual memory limit
866       varies  from pattern to pattern. This limit was more useful in versions
867       before 10.30, where function recursion was used for backtracking.
868
869       The depth limit is not relevant, and is ignored, when matching is  done
870       using JIT compiled code. However, it is supported by pcre2_dfa_match(),
871       which uses it to limit the depth of nested internal recursive  function
872       calls  that implement atomic groups, lookaround assertions, and pattern
873       recursions. This limits, indirectly, the amount of system stack that is
874       used.  It  was  more useful in versions before 10.32, when stack memory
875       was used for local workspace vectors for recursive function calls. From
876       version  10.32,  only local variables are allocated on the stack and as
877       each call uses only a few hundred bytes, even a small stack can support
878       quite a lot of recursion.
879
880       If  the  depth  of  internal  recursive function calls is great enough,
881       local workspace vectors are allocated on the heap  from  version  10.32
882       onwards,  so  the depth limit also indirectly limits the amount of heap
883       memory that is used. A recursive pattern such as /(.(?2))((?1)|)/, when
884       matched  to a very long string using pcre2_dfa_match(), can use a great
885       deal of memory. However, it is probably  better  to  limit  heap  usage
886       directly by calling pcre2_set_heap_limit().
887
888       The  default  value for the depth limit can be set when PCRE2 is built;
889       if it is not, the default is set to the same value as the  default  for
890       the   match   limit.   If  the  limit  is  exceeded,  pcre2_match()  or
891       pcre2_dfa_match() returns PCRE2_ERROR_DEPTHLIMIT. A value for the depth
892       limit  may also be supplied by an item at the start of a pattern of the
893       form
894
895         (*LIMIT_DEPTH=ddd)
896
897       where ddd is a decimal number.  However,  such  a  setting  is  ignored
898       unless ddd is less than the limit set by the caller of pcre2_match() or
899       pcre2_dfa_match() or, if no such limit is set, less than the default.
900

CHECKING BUILD-TIME OPTIONS

902
903       int pcre2_config(uint32_t what, void *where);
904
905       The function pcre2_config() makes it possible for  a  PCRE2  client  to
906       discover  which  optional  features  have  been compiled into the PCRE2
907       library. The pcre2build documentation  has  more  details  about  these
908       optional features.
909
910       The  first  argument  for pcre2_config() specifies which information is
911       required. The second argument is a pointer to  memory  into  which  the
912       information  is  placed.  If  NULL  is passed, the function returns the
913       amount of memory that is needed  for  the  requested  information.  For
914       calls  that  return  numerical  values,  the  value  is  in bytes; when
915       requesting these values, where should point  to  appropriately  aligned
916       memory.  For calls that return strings, the required length is given in
917       code units, not counting the terminating zero.
918
919       When requesting information, the returned value from pcre2_config()  is
920       non-negative  on success, or the negative error code PCRE2_ERROR_BADOP‐
921       TION if the value in the first argument is not recognized. The  follow‐
922       ing information is available:
923
924         PCRE2_CONFIG_BSR
925
926       The  output  is a uint32_t integer whose value indicates what character
927       sequences the \R  escape  sequence  matches  by  default.  A  value  of
928       PCRE2_BSR_UNICODE  means  that  \R  matches  any  Unicode  line  ending
929       sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches  only  CR,
930       LF, or CRLF. The default can be overridden when a pattern is compiled.
931
932         PCRE2_CONFIG_COMPILED_WIDTHS
933
934       The  output  is a uint32_t integer whose lower bits indicate which code
935       unit widths were selected when PCRE2 was  built.  The  1-bit  indicates
936       8-bit  support, and the 2-bit and 4-bit indicate 16-bit and 32-bit sup‐
937       port, respectively.
938
939         PCRE2_CONFIG_DEPTHLIMIT
940
941       The output is a uint32_t integer that gives the default limit  for  the
942       depth  of  nested  backtracking in pcre2_match() or the depth of nested
943       recursions, lookarounds, and atomic groups in  pcre2_dfa_match().  Fur‐
944       ther details are given with pcre2_set_depth_limit() above.
945
946         PCRE2_CONFIG_HEAPLIMIT
947
948       The  output is a uint32_t integer that gives, in kibibytes, the default
949       limit  for  the  amount  of  heap  memory  used  by  pcre2_match()   or
950       pcre2_dfa_match().      Further      details     are     given     with
951       pcre2_set_heap_limit() above.
952
953         PCRE2_CONFIG_JIT
954
955       The output is a uint32_t integer that is set  to  one  if  support  for
956       just-in-time compiling is available; otherwise it is set to zero.
957
958         PCRE2_CONFIG_JITTARGET
959
960       The  where  argument  should point to a buffer that is at least 48 code
961       units long.  (The  exact  length  required  can  be  found  by  calling
962       pcre2_config()  with  where  set  to NULL.) The buffer is filled with a
963       string that contains the name of the architecture  for  which  the  JIT
964       compiler  is  configured,  for  example  "x86  32bit  (little  endian +
965       unaligned)". If JIT support is not available, PCRE2_ERROR_BADOPTION  is
966       returned,  otherwise the number of code units used is returned. This is
967       the length of the string, plus one unit for the terminating zero.
968
969         PCRE2_CONFIG_LINKSIZE
970
971       The output is a uint32_t integer that contains the number of bytes used
972       for  internal  linkage  in  compiled regular expressions. When PCRE2 is
973       configured, the value can be set to 2, 3, or 4, with the default  being
974       2.  This is the value that is returned by pcre2_config(). However, when
975       the 16-bit library is compiled, a value of 3 is rounded up  to  4,  and
976       when  the  32-bit  library  is compiled, internal linkages always use 4
977       bytes, so the configured value is not relevant.
978
979       The default value of 2 for the 8-bit and 16-bit libraries is sufficient
980       for  all but the most massive patterns, since it allows the size of the
981       compiled pattern to be up to 65535  code  units.  Larger  values  allow
982       larger  regular  expressions to be compiled by those two libraries, but
983       at the expense of slower matching.
984
985         PCRE2_CONFIG_MATCHLIMIT
986
987       The output is a uint32_t integer that gives the default match limit for
988       pcre2_match().  Further  details are given with pcre2_set_match_limit()
989       above.
990
991         PCRE2_CONFIG_NEWLINE
992
993       The output is a uint32_t integer  whose  value  specifies  the  default
994       character  sequence that is recognized as meaning "newline". The values
995       are:
996
997         PCRE2_NEWLINE_CR       Carriage return (CR)
998         PCRE2_NEWLINE_LF       Linefeed (LF)
999         PCRE2_NEWLINE_CRLF     Carriage return, linefeed (CRLF)
1000         PCRE2_NEWLINE_ANY      Any Unicode line ending
1001         PCRE2_NEWLINE_ANYCRLF  Any of CR, LF, or CRLF
1002         PCRE2_NEWLINE_NUL      The NUL character (binary zero)
1003
1004       The default should normally correspond to  the  standard  sequence  for
1005       your operating system.
1006
1007         PCRE2_CONFIG_NEVER_BACKSLASH_C
1008
1009       The  output  is  a uint32_t integer that is set to one if the use of \C
1010       was permanently disabled when PCRE2 was built; otherwise it is  set  to
1011       zero.
1012
1013         PCRE2_CONFIG_PARENSLIMIT
1014
1015       The  output is a uint32_t integer that gives the maximum depth of nest‐
1016       ing of parentheses (of any kind) in a pattern. This limit is imposed to
1017       cap  the  amount of system stack used when a pattern is compiled. It is
1018       specified when PCRE2 is built; the default is 250. This limit does  not
1019       take  into  account  the  stack that may already be used by the calling
1020       application. For  finer  control  over  compilation  stack  usage,  see
1021       pcre2_set_compile_recursion_guard().
1022
1023         PCRE2_CONFIG_STACKRECURSE
1024
1025       This parameter is obsolete and should not be used in new code. The out‐
1026       put is a uint32_t integer that is always set to zero.
1027
1028         PCRE2_CONFIG_UNICODE_VERSION
1029
1030       The where argument should point to a buffer that is at  least  24  code
1031       units  long.  (The  exact  length  required  can  be  found  by calling
1032       pcre2_config() with where set to NULL.)  If  PCRE2  has  been  compiled
1033       without  Unicode  support,  the buffer is filled with the text "Unicode
1034       not supported". Otherwise, the Unicode  version  string  (for  example,
1035       "8.0.0")  is  inserted. The number of code units used is returned. This
1036       is the length of the string plus one unit for the terminating zero.
1037
1038         PCRE2_CONFIG_UNICODE
1039
1040       The output is a uint32_t integer that is set to one if Unicode  support
1041       is  available; otherwise it is set to zero. Unicode support implies UTF
1042       support.
1043
1044         PCRE2_CONFIG_VERSION
1045
1046       The where argument should point to a buffer that is at  least  24  code
1047       units  long.  (The  exact  length  required  can  be  found  by calling
1048       pcre2_config() with where set to NULL.) The buffer is filled  with  the
1049       PCRE2 version string, zero-terminated. The number of code units used is
1050       returned. This is the length of the string plus one unit for the termi‐
1051       nating zero.
1052

COMPILING A PATTERN

1054
1055       pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length,
1056         uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
1057         pcre2_compile_context *ccontext);
1058
1059       void pcre2_code_free(pcre2_code *code);
1060
1061       pcre2_code *pcre2_code_copy(const pcre2_code *code);
1062
1063       pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code);
1064
1065       The  pcre2_compile() function compiles a pattern into an internal form.
1066       The pattern is defined by a pointer to a string of  code  units  and  a
1067       length  (in  code units). If the pattern is zero-terminated, the length
1068       can be specified  as  PCRE2_ZERO_TERMINATED.  The  function  returns  a
1069       pointer  to  a  block  of memory that contains the compiled pattern and
1070       related data, or NULL if an error occurred.
1071
1072       If the compile context argument ccontext is NULL, memory for  the  com‐
1073       piled  pattern  is  obtained  by  calling  malloc().  Otherwise,  it is
1074       obtained from the same memory function that was used  for  the  compile
1075       context.  The  caller must free the memory by calling pcre2_code_free()
1076       when it is no longer needed.  If pcre2_code_free()  is  called  with  a
1077       NULL argument, it returns immediately, without doing anything.
1078
1079       The function pcre2_code_copy() makes a copy of the compiled code in new
1080       memory, using the same memory allocator as was used for  the  original.
1081       However,  if  the  code  has  been  processed  by the JIT compiler (see
1082       below), the JIT information cannot be copied (because it  is  position-
1083       dependent).  The new copy can initially be used only for non-JIT match‐
1084       ing, though it can be passed to  pcre2_jit_compile()  if  required.  If
1085       pcre2_code_copy() is called with a NULL argument, it returns NULL.
1086
1087       The pcre2_code_copy() function provides a way for individual threads in
1088       a multithreaded application to acquire a private copy  of  shared  com‐
1089       piled  code.   However, it does not make a copy of the character tables
1090       used by the compiled pattern; the new pattern code points to  the  same
1091       tables  as  the original code.  (See "Locale Support" below for details
1092       of these character tables.) In many applications the  same  tables  are
1093       used  throughout, so this behaviour is appropriate. Nevertheless, there
1094       are occasions when a copy of a compiled pattern and the relevant tables
1095       are  needed.  The pcre2_code_copy_with_tables() provides this facility.
1096       Copies of both the code and the tables are  made,  with  the  new  code
1097       pointing  to the new tables. The memory for the new tables is automati‐
1098       cally freed when pcre2_code_free() is called for the new  copy  of  the
1099       compiled  code.  If pcre2_code_copy_with_tables() is called with a NULL
1100       argument, it returns NULL.
1101
1102       NOTE: When one of the matching functions is  called,  pointers  to  the
1103       compiled pattern and the subject string are set in the match data block
1104       so that they can be referenced by the  substring  extraction  functions
1105       after  a  successful match.  After running a match, you must not free a
1106       compiled pattern or a subject string until after all operations on  the
1107       match  data  block have taken place, unless, in the case of the subject
1108       string, you have used the PCRE2_COPY_MATCHED_SUBJECT option,  which  is
1109       described  in  the  section  entitled  "Option  bits for pcre2_match()"
1110       below.
1111
1112       The options argument for pcre2_compile() contains various bit  settings
1113       that  affect  the  compilation.  It  should be zero if none of them are
1114       required. The available options are described below. Some of  them  (in
1115       particular,  those  that  are  compatible with Perl, but some others as
1116       well) can also be set and  unset  from  within  the  pattern  (see  the
1117       detailed description in the pcre2pattern documentation).
1118
1119       For  those options that can be different in different parts of the pat‐
1120       tern, the contents of the options argument specifies their settings  at
1121       the  start  of  compilation. The PCRE2_ANCHORED, PCRE2_ENDANCHORED, and
1122       PCRE2_NO_UTF_CHECK options can be set at the time of matching  as  well
1123       as at compile time.
1124
1125       Some  additional  options  and  less  frequently  required compile-time
1126       parameters (for example, the newline setting) can be provided in a com‐
1127       pile context (as described above).
1128
1129       If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme‐
1130       diately. Otherwise, the variables to which these point are  set  to  an
1131       error  code  and  an  offset (number of code units) within the pattern,
1132       respectively, when pcre2_compile() returns NULL because  a  compilation
1133       error has occurred. The values are not defined when compilation is suc‐
1134       cessful and pcre2_compile() returns a non-NULL value.
1135
1136       There are nearly 100 positive  error  codes  that  pcre2_compile()  may
1137       return  if  it finds an error in the pattern. There are also some nega‐
1138       tive error codes that are used for invalid UTF  strings  when  validity
1139       checking  is in force. These are the same as given by pcre2_match() and
1140       pcre2_dfa_match(), and are described in the pcre2unicode documentation.
1141       There  is  no  separate  documentation  for  the  positive error codes,
1142       because the textual error messages that are  obtained  by  calling  the
1143       pcre2_get_error_message() function (see "Obtaining a textual error mes‐
1144       sage" below) should be  self-explanatory.  Macro  names  starting  with
1145       PCRE2_ERROR_  are defined for both positive and negative error codes in
1146       pcre2.h.
1147
1148       The value returned in erroroffset is an indication of where in the pat‐
1149       tern  the  error  occurred. It is not necessarily the furthest point in
1150       the pattern that was read. For example,  after  the  error  "lookbehind
1151       assertion is not fixed length", the error offset points to the start of
1152       the failing assertion. For an invalid UTF-8 or UTF-16 string, the  off‐
1153       set is that of the first code unit of the failing character.
1154
1155       Some  errors are not detected until the whole pattern has been scanned;
1156       in these cases, the offset passed back is the length  of  the  pattern.
1157       Note  that  the  offset is in code units, not characters, even in a UTF
1158       mode. It may sometimes point into the middle of a UTF-8 or UTF-16 char‐
1159       acter.
1160
1161       This  code  fragment shows a typical straightforward call to pcre2_com‐
1162       pile():
1163
1164         pcre2_code *re;
1165         PCRE2_SIZE erroffset;
1166         int errorcode;
1167         re = pcre2_compile(
1168           "^A.*Z",                /* the pattern */
1169           PCRE2_ZERO_TERMINATED,  /* the pattern is zero-terminated */
1170           0,                      /* default options */
1171           &errorcode,             /* for error code */
1172           &erroffset,             /* for error offset */
1173           NULL);                  /* no compile context */
1174
1175
1176   Main compile options
1177
1178       The following names for option bits are defined in the  pcre2.h  header
1179       file:
1180
1181         PCRE2_ANCHORED
1182
1183       If this bit is set, the pattern is forced to be "anchored", that is, it
1184       is constrained to match only at the first matching point in the  string
1185       that  is being searched (the "subject string"). This effect can also be
1186       achieved by appropriate constructs in the pattern itself, which is  the
1187       only way to do it in Perl.
1188
1189         PCRE2_ALLOW_EMPTY_CLASS
1190
1191       By  default, for compatibility with Perl, a closing square bracket that
1192       immediately follows an opening one is treated as a data  character  for
1193       the  class.  When  PCRE2_ALLOW_EMPTY_CLASS  is  set,  it terminates the
1194       class, which therefore contains no characters and so can never match.
1195
1196         PCRE2_ALT_BSUX
1197
1198       This option request alternative handling  of  three  escape  sequences,
1199       which  makes  PCRE2's  behaviour more like ECMAscript (aka JavaScript).
1200       When it is set:
1201
1202       (1) \U matches an upper case "U" character; by default \U causes a com‐
1203       pile time error (Perl uses \U to upper case subsequent characters).
1204
1205       (2) \u matches a lower case "u" character unless it is followed by four
1206       hexadecimal digits, in which case the hexadecimal  number  defines  the
1207       code  point  to match. By default, \u causes a compile time error (Perl
1208       uses it to upper case the following character).
1209
1210       (3) \x matches a lower case "x" character unless it is followed by  two
1211       hexadecimal  digits,  in  which case the hexadecimal number defines the
1212       code point to match. By default, as in Perl, a  hexadecimal  number  is
1213       always expected after \x, but it may have zero, one, or two digits (so,
1214       for example, \xz matches a binary zero character followed by z).
1215
1216       ECMAscript 6 added additional functionality to \u. This can be accessed
1217       using   the  PCRE2_EXTRA_ALT_BSUX  extra  option  (see  "Extra  compile
1218       options" below).  Note that this alternative  escape  handling  applies
1219       only  to  patterns.  Neither of these options affects the processing of
1220       replacement strings passed to pcre2_substitute().
1221
1222         PCRE2_ALT_CIRCUMFLEX
1223
1224       In  multiline  mode  (when  PCRE2_MULTILINE  is  set),  the  circumflex
1225       metacharacter  matches at the start of the subject (unless PCRE2_NOTBOL
1226       is set), and also after any internal  newline.  However,  it  does  not
1227       match after a newline at the end of the subject, for compatibility with
1228       Perl. If you want a multiline circumflex also to match after  a  termi‐
1229       nating newline, you must set PCRE2_ALT_CIRCUMFLEX.
1230
1231         PCRE2_ALT_VERBNAMES
1232
1233       By  default, for compatibility with Perl, the name in any verb sequence
1234       such as (*MARK:NAME) is  any  sequence  of  characters  that  does  not
1235       include  a  closing  parenthesis. The name is not processed in any way,
1236       and it is not possible to include a closing parenthesis  in  the  name.
1237       However,  if  the  PCRE2_ALT_VERBNAMES  option is set, normal backslash
1238       processing is applied to verb  names  and  only  an  unescaped  closing
1239       parenthesis  terminates the name. A closing parenthesis can be included
1240       in a name either as \) or between \Q and \E. If the  PCRE2_EXTENDED  or
1241       PCRE2_EXTENDED_MORE  option  is set with PCRE2_ALT_VERBNAMES, unescaped
1242       whitespace in verb names is  skipped  and  #-comments  are  recognized,
1243       exactly as in the rest of the pattern.
1244
1245         PCRE2_AUTO_CALLOUT
1246
1247       If  this  bit  is  set,  pcre2_compile()  automatically inserts callout
1248       items, all with number 255, before each pattern  item,  except  immedi‐
1249       ately  before  or after an explicit callout in the pattern. For discus‐
1250       sion of the callout facility, see the pcre2callout documentation.
1251
1252         PCRE2_CASELESS
1253
1254       If this bit is set, letters in the pattern match both upper  and  lower
1255       case  letters in the subject. It is equivalent to Perl's /i option, and
1256       it can be changed within  a  pattern  by  a  (?i)  option  setting.  If
1257       PCRE2_UTF  is  set, Unicode properties are used for all characters with
1258       more than one other case, and for all characters whose code points  are
1259       greater  than  U+007F.  For lower valued characters with only one other
1260       case, a lookup table is used for speed. When PCRE2_UTF is  not  set,  a
1261       lookup table is used for all code points less than 256, and higher code
1262       points (available only in 16-bit or 32-bit mode)  are  treated  as  not
1263       having another case.
1264
1265         PCRE2_DOLLAR_ENDONLY
1266
1267       If  this bit is set, a dollar metacharacter in the pattern matches only
1268       at the end of the subject string. Without this option,  a  dollar  also
1269       matches  immediately before a newline at the end of the string (but not
1270       before any other newlines). The PCRE2_DOLLAR_ENDONLY option is  ignored
1271       if  PCRE2_MULTILINE  is  set.  There is no equivalent to this option in
1272       Perl, and no way to set it within a pattern.
1273
1274         PCRE2_DOTALL
1275
1276       If this bit is set, a dot metacharacter  in  the  pattern  matches  any
1277       character,  including  one  that  indicates a newline. However, it only
1278       ever matches one character, even if newlines are coded as CRLF. Without
1279       this option, a dot does not match when the current position in the sub‐
1280       ject is at a newline. This option is equivalent to  Perl's  /s  option,
1281       and it can be changed within a pattern by a (?s) option setting. A neg‐
1282       ative class such as [^a] always matches newline characters, and the  \N
1283       escape  sequence always matches a non-newline character, independent of
1284       the setting of PCRE2_DOTALL.
1285
1286         PCRE2_DUPNAMES
1287
1288       If this bit is set, names used to identify capture groups need  not  be
1289       unique.   This  can  be helpful for certain types of pattern when it is
1290       known that only one instance of the named group can  ever  be  matched.
1291       There  are  more  details  of  named capture groups below; see also the
1292       pcre2pattern documentation.
1293
1294         PCRE2_ENDANCHORED
1295
1296       If this bit is set, the end of any pattern match must be right  at  the
1297       end of the string being searched (the "subject string"). If the pattern
1298       match succeeds by reaching (*ACCEPT), but does not reach the end of the
1299       subject,  the match fails at the current starting point. For unanchored
1300       patterns, a new match is then tried at the next  starting  point.  How‐
1301       ever, if the match succeeds by reaching the end of the pattern, but not
1302       the end of the subject, backtracking occurs and  an  alternative  match
1303       may be found. Consider these two patterns:
1304
1305         .(*ACCEPT)|..
1306         .|..
1307
1308       If  matched against "abc" with PCRE2_ENDANCHORED set, the first matches
1309       "c" whereas the second matches "bc". The  effect  of  PCRE2_ENDANCHORED
1310       can  also  be achieved by appropriate constructs in the pattern itself,
1311       which is the only way to do it in Perl.
1312
1313       For DFA matching with pcre2_dfa_match(), PCRE2_ENDANCHORED applies only
1314       to  the  first  (that  is,  the longest) matched string. Other parallel
1315       matches, which are necessarily substrings of the first one, must  obvi‐
1316       ously end before the end of the subject.
1317
1318         PCRE2_EXTENDED
1319
1320       If  this  bit  is  set,  most white space characters in the pattern are
1321       totally ignored except when escaped or inside a character  class.  How‐
1322       ever,  white  space  is  not  allowed within sequences such as (?> that
1323       introduce various parenthesized groups, nor  within  numerical  quanti‐
1324       fiers such as {1,3}. Ignorable white space is permitted between an item
1325       and a following quantifier and between a quantifier and a  following  +
1326       that  indicates  possessiveness. PCRE2_EXTENDED is equivalent to Perl's
1327       /x option, and it can be changed within a pattern by a (?x) option set‐
1328       ting.
1329
1330       When  PCRE2  is compiled without Unicode support, PCRE2_EXTENDED recog‐
1331       nizes as white space only those characters with code points  less  than
1332       256 that are flagged as white space in its low-character table. The ta‐
1333       ble is normally created by pcre2_maketables(), which uses the isspace()
1334       function  to identify space characters. In most ASCII environments, the
1335       relevant characters are those with code  points  0x0009  (tab),  0x000A
1336       (linefeed),  0x000B (vertical tab), 0x000C (formfeed), 0x000D (carriage
1337       return), and 0x0020 (space).
1338
1339       When PCRE2 is compiled with Unicode support, in addition to these char‐
1340       acters,  five  more Unicode "Pattern White Space" characters are recog‐
1341       nized by PCRE2_EXTENDED. These are U+0085 (next line), U+200E (left-to-
1342       right  mark), U+200F (right-to-left mark), U+2028 (line separator), and
1343       U+2029 (paragraph separator). This set of characters  is  the  same  as
1344       recognized  by  Perl's /x option. Note that the horizontal and vertical
1345       space characters that are matched by the \h and \v escapes in  patterns
1346       are a much bigger set.
1347
1348       As  well as ignoring most white space, PCRE2_EXTENDED also causes char‐
1349       acters between an unescaped # outside a character class  and  the  next
1350       newline,  inclusive,  to be ignored, which makes it possible to include
1351       comments inside complicated patterns. Note that the end of this type of
1352       comment  is a literal newline sequence in the pattern; escape sequences
1353       that happen to represent a newline do not count.
1354
1355       Which characters are interpreted as newlines can be specified by a set‐
1356       ting  in  the compile context that is passed to pcre2_compile() or by a
1357       special sequence at the start of the pattern, as described in the  sec‐
1358       tion  entitled "Newline conventions" in the pcre2pattern documentation.
1359       A default is defined when PCRE2 is built.
1360
1361         PCRE2_EXTENDED_MORE
1362
1363       This option  has  the  effect  of  PCRE2_EXTENDED,  but,  in  addition,
1364       unescaped  space  and  horizontal  tab  characters are ignored inside a
1365       character class. Note: only these two characters are ignored,  not  the
1366       full  set  of pattern white space characters that are ignored outside a
1367       character  class.  PCRE2_EXTENDED_MORE  is  equivalent  to  Perl's  /xx
1368       option,  and  it can be changed within a pattern by a (?xx) option set‐
1369       ting.
1370
1371         PCRE2_FIRSTLINE
1372
1373       If this option is set, the start of an unanchored pattern match must be
1374       before  or  at  the  first  newline in the subject string following the
1375       start of matching, though the matched text may continue over  the  new‐
1376       line. If startoffset is non-zero, the limiting newline is not necessar‐
1377       ily the first newline in the  subject.  For  example,  if  the  subject
1378       string is "abc\nxyz" (where \n represents a single-character newline) a
1379       pattern match for "yz" succeeds with PCRE2_FIRSTLINE if startoffset  is
1380       greater  than 3. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
1381       general limiting facility. If PCRE2_FIRSTLINE is  set  with  an  offset
1382       limit,  a match must occur in the first line and also within the offset
1383       limit. In other words, whichever limit comes first is used.
1384
1385         PCRE2_LITERAL
1386
1387       If this option is set, all meta-characters in the pattern are disabled,
1388       and  it is treated as a literal string. Matching literal strings with a
1389       regular expression engine is not the most efficient way of doing it. If
1390       you  are  doing  a  lot of literal matching and are worried about effi‐
1391       ciency, you should consider using other approaches. The only other main
1392       options  that  are  allowed  with  PCRE2_LITERAL  are:  PCRE2_ANCHORED,
1393       PCRE2_ENDANCHORED, PCRE2_AUTO_CALLOUT, PCRE2_CASELESS, PCRE2_FIRSTLINE,
1394       PCRE2_MATCH_INVALID_UTF,  PCRE2_NO_START_OPTIMIZE,  PCRE2_NO_UTF_CHECK,
1395       PCRE2_UTF,    and    PCRE2_USE_OFFSET_LIMIT.    The    extra    options
1396       PCRE2_EXTRA_MATCH_LINE  and  PCRE2_EXTRA_MATCH_WORD are also supported.
1397       Any other options cause an error.
1398
1399         PCRE2_MATCH_INVALID_UTF
1400
1401       This option forces PCRE2_UTF (see below) and also enables  support  for
1402       matching  by  pcre2_match() in subject strings that contain invalid UTF
1403       sequences.  This facility  is  not  supported  for  DFA  matching.  For
1404       details, see the pcre2unicode documentation.
1405
1406         PCRE2_MATCH_UNSET_BACKREF
1407
1408       If  this  option  is  set,  a  backreference  to an unset capture group
1409       matches an empty string (by default this causes  the  current  matching
1410       alternative  to  fail).   A  pattern such as (\1)(a) succeeds when this
1411       option is set (assuming it can find an "a" in the subject), whereas  it
1412       fails  by  default,  for  Perl compatibility. Setting this option makes
1413       PCRE2 behave more like ECMAscript (aka JavaScript).
1414
1415         PCRE2_MULTILINE
1416
1417       By default, for the purposes of matching "start of line"  and  "end  of
1418       line",  PCRE2  treats the subject string as consisting of a single line
1419       of characters, even if it actually contains  newlines.  The  "start  of
1420       line"  metacharacter  (^)  matches only at the start of the string, and
1421       the "end of line" metacharacter ($) matches only  at  the  end  of  the
1422       string,  or  before  a  terminating  newline  (except  when  PCRE2_DOL‐
1423       LAR_ENDONLY is set). Note, however, that unless  PCRE2_DOTALL  is  set,
1424       the "any character" metacharacter (.) does not match at a newline. This
1425       behaviour (for ^, $, and dot) is the same as Perl.
1426
1427       When PCRE2_MULTILINE it is set, the "start of line" and "end  of  line"
1428       constructs  match  immediately following or immediately before internal
1429       newlines in the subject string, respectively, as well as  at  the  very
1430       start  and  end.  This is equivalent to Perl's /m option, and it can be
1431       changed within a pattern by a (?m) option setting. Note that the "start
1432       of line" metacharacter does not match after a newline at the end of the
1433       subject, for compatibility with Perl.  However, you can change this  by
1434       setting  the PCRE2_ALT_CIRCUMFLEX option. If there are no newlines in a
1435       subject string, or no occurrences of ^  or  $  in  a  pattern,  setting
1436       PCRE2_MULTILINE has no effect.
1437
1438         PCRE2_NEVER_BACKSLASH_C
1439
1440       This  option  locks out the use of \C in the pattern that is being com‐
1441       piled.  This escape can  cause  unpredictable  behaviour  in  UTF-8  or
1442       UTF-16  modes,  because  it may leave the current matching point in the
1443       middle of a multi-code-unit character. This option  may  be  useful  in
1444       applications  that  process  patterns  from external sources. Note that
1445       there is also a build-time option that permanently locks out the use of
1446       \C.
1447
1448         PCRE2_NEVER_UCP
1449
1450       This  option  locks  out the use of Unicode properties for handling \B,
1451       \b, \D, \d, \S, \s, \W, \w, and some of the POSIX character classes, as
1452       described  for  the  PCRE2_UCP option below. In particular, it prevents
1453       the creator of the pattern from enabling this facility by starting  the
1454       pattern  with  (*UCP).  This  option may be useful in applications that
1455       process patterns from external sources. The option combination PCRE_UCP
1456       and PCRE_NEVER_UCP causes an error.
1457
1458         PCRE2_NEVER_UTF
1459
1460       This  option  locks out interpretation of the pattern as UTF-8, UTF-16,
1461       or UTF-32, depending on which library is in use. In particular, it pre‐
1462       vents  the  creator of the pattern from switching to UTF interpretation
1463       by starting the pattern with (*UTF).  This  option  may  be  useful  in
1464       applications  that process patterns from external sources. The combina‐
1465       tion of PCRE2_UTF and PCRE2_NEVER_UTF causes an error.
1466
1467         PCRE2_NO_AUTO_CAPTURE
1468
1469       If this option is set, it disables the use of numbered capturing paren‐
1470       theses  in the pattern. Any opening parenthesis that is not followed by
1471       ? behaves as if it were followed by ?: but named parentheses can  still
1472       be used for capturing (and they acquire numbers in the usual way). This
1473       is the same as Perl's /n option.  Note that, when this option  is  set,
1474       references  to  capture  groups (backreferences or recursion/subroutine
1475       calls) may only refer to named groups, though the reference can  be  by
1476       name or by number.
1477
1478         PCRE2_NO_AUTO_POSSESS
1479
1480       If this option is set, it disables "auto-possessification", which is an
1481       optimization that, for example, turns a+b into a++b in order  to  avoid
1482       backtracks  into  a+ that can never be successful. However, if callouts
1483       are in use, auto-possessification means that some  callouts  are  never
1484       taken. You can set this option if you want the matching functions to do
1485       a full unoptimized search and run all the callouts, but  it  is  mainly
1486       provided for testing purposes.
1487
1488         PCRE2_NO_DOTSTAR_ANCHOR
1489
1490       If this option is set, it disables an optimization that is applied when
1491       .* is the first significant item in a top-level branch  of  a  pattern,
1492       and  all  the  other branches also start with .* or with \A or \G or ^.
1493       The optimization is automatically disabled for .* if it  is  inside  an
1494       atomic group or a capture group that is the subject of a backreference,
1495       or if the pattern contains (*PRUNE) or (*SKIP). When  the  optimization
1496       is   not   disabled,  such  a  pattern  is  automatically  anchored  if
1497       PCRE2_DOTALL is set for all the .* items and PCRE2_MULTILINE is not set
1498       for  any  ^ items. Otherwise, the fact that any match must start either
1499       at the start of the subject or following a newline is remembered.  Like
1500       other optimizations, this can cause callouts to be skipped.
1501
1502         PCRE2_NO_START_OPTIMIZE
1503
1504       This  is  an  option whose main effect is at matching time. It does not
1505       change what pcre2_compile() generates, but it does affect the output of
1506       the JIT compiler.
1507
1508       There  are  a  number of optimizations that may occur at the start of a
1509       match, in order to speed up the process. For example, if  it  is  known
1510       that  an  unanchored  match must start with a specific code unit value,
1511       the matching code searches the subject for that value, and fails  imme‐
1512       diately  if it cannot find it, without actually running the main match‐
1513       ing function. This means that a special item such as (*COMMIT)  at  the
1514       start  of  a  pattern is not considered until after a suitable starting
1515       point for the match has been found.  Also,  when  callouts  or  (*MARK)
1516       items  are  in use, these "start-up" optimizations can cause them to be
1517       skipped if the pattern is never actually used. The  start-up  optimiza‐
1518       tions  are  in effect a pre-scan of the subject that takes place before
1519       the pattern is run.
1520
1521       The PCRE2_NO_START_OPTIMIZE option disables the start-up optimizations,
1522       possibly  causing  performance  to  suffer,  but ensuring that in cases
1523       where the result is "no match", the callouts do occur, and  that  items
1524       such as (*COMMIT) and (*MARK) are considered at every possible starting
1525       position in the subject string.
1526
1527       Setting PCRE2_NO_START_OPTIMIZE may change the outcome  of  a  matching
1528       operation.  Consider the pattern
1529
1530         (*COMMIT)ABC
1531
1532       When  this  is compiled, PCRE2 records the fact that a match must start
1533       with the character "A". Suppose the subject  string  is  "DEFABC".  The
1534       start-up  optimization  scans along the subject, finds "A" and runs the
1535       first match attempt from there. The (*COMMIT) item means that the  pat‐
1536       tern  must  match the current starting position, which in this case, it
1537       does. However, if the same match is  run  with  PCRE2_NO_START_OPTIMIZE
1538       set,  the  initial  scan  along the subject string does not happen. The
1539       first match attempt is run starting  from  "D"  and  when  this  fails,
1540       (*COMMIT)  prevents  any  further  matches  being tried, so the overall
1541       result is "no match".
1542
1543       As another start-up optimization makes use of a minimum  length  for  a
1544       matching subject, which is recorded when possible. Consider the pattern
1545
1546         (*MARK:1)B(*MARK:2)(X|Y)
1547
1548       The  minimum  length  for  a match is two characters. If the subject is
1549       "XXBB", the "starting character" optimization skips "XX", then tries to
1550       match  "BB", which is long enough. In the process, (*MARK:2) is encoun‐
1551       tered and remembered. When the match attempt fails,  the  next  "B"  is
1552       found,  but  there  is  only  one  character left, so there are no more
1553       attempts, and "no match" is returned with the "last mark seen"  set  to
1554       "2".  If  NO_START_OPTIMIZE is set, however, matches are tried at every
1555       possible starting position, including at the end of the subject,  where
1556       (*MARK:1)  is encountered, but there is no "B", so the "last mark seen"
1557       that is returned is "1". In this case, the optimizations do not  affect
1558       the overall match result, which is still "no match", but they do affect
1559       the auxiliary information that is returned.
1560
1561         PCRE2_NO_UTF_CHECK
1562
1563       When PCRE2_UTF is set, the validity of the pattern as a UTF  string  is
1564       automatically  checked.  There  are  discussions  about the validity of
1565       UTF-8 strings, UTF-16 strings, and UTF-32 strings in  the  pcre2unicode
1566       document.  If an invalid UTF sequence is found, pcre2_compile() returns
1567       a negative error code.
1568
1569       If you know that your pattern is a valid UTF string, and  you  want  to
1570       skip   this   check   for   performance   reasons,   you  can  set  the
1571       PCRE2_NO_UTF_CHECK option. When it is set, the  effect  of  passing  an
1572       invalid UTF string as a pattern is undefined. It may cause your program
1573       to crash or loop.
1574
1575       Note  that  this  option  can  also  be  passed  to  pcre2_match()  and
1576       pcre_dfa_match(),  to  suppress  UTF  validity  checking of the subject
1577       string.
1578
1579       Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis‐
1580       able  the error that is given if an escape sequence for an invalid Uni‐
1581       code code point is encountered in the pattern. In particular,  the  so-
1582       called  "surrogate"  code points (0xd800 to 0xdfff) are invalid. If you
1583       want to allow escape  sequences  such  as  \x{d800}  you  can  set  the
1584       PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  extra  option, as described in the
1585       section entitled "Extra compile options" below.  However, this is  pos‐
1586       sible only in UTF-8 and UTF-32 modes, because these values are not rep‐
1587       resentable in UTF-16.
1588
1589         PCRE2_UCP
1590
1591       This option changes the way PCRE2 processes \B, \b, \D, \d, \S, \s, \W,
1592       \w,  and  some  of  the POSIX character classes. By default, only ASCII
1593       characters are recognized, but if PCRE2_UCP is set, Unicode  properties
1594       are  used instead to classify characters. More details are given in the
1595       section on generic character types in the pcre2pattern page. If you set
1596       PCRE2_UCP,  matching one of the items it affects takes much longer. The
1597       option is available only if PCRE2 has been compiled with  Unicode  sup‐
1598       port (which is the default).
1599
1600         PCRE2_UNGREEDY
1601
1602       This  option  inverts  the "greediness" of the quantifiers so that they
1603       are not greedy by default, but become greedy if followed by "?". It  is
1604       not  compatible  with Perl. It can also be set by a (?U) option setting
1605       within the pattern.
1606
1607         PCRE2_USE_OFFSET_LIMIT
1608
1609       This option must be set for pcre2_compile() if pcre2_set_offset_limit()
1610       is  going  to be used to set a non-default offset limit in a match con‐
1611       text for matches that use this pattern. An error  is  generated  if  an
1612       offset  limit  is  set  without  this option. For more details, see the
1613       description of pcre2_set_offset_limit() in the section  that  describes
1614       match contexts. See also the PCRE2_FIRSTLINE option above.
1615
1616         PCRE2_UTF
1617
1618       This  option  causes  PCRE2  to regard both the pattern and the subject
1619       strings that are subsequently processed as strings  of  UTF  characters
1620       instead  of  single-code-unit  strings.  It  is available when PCRE2 is
1621       built to include Unicode support (which is  the  default).  If  Unicode
1622       support  is  not  available,  the use of this option provokes an error.
1623       Details of how PCRE2_UTF changes the behaviour of PCRE2  are  given  in
1624       the  pcre2unicode  page.  In  particular,  note that it changes the way
1625       PCRE2_CASELESS handles characters with code points greater than 127.
1626
1627   Extra compile options
1628
1629       The option bits that can be set in a compile  context  by  calling  the
1630       pcre2_set_compile_extra_options() function are as follows:
1631
1632         PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
1633
1634       This  option  applies when compiling a pattern in UTF-8 or UTF-32 mode.
1635       It is forbidden in UTF-16 mode, and ignored in non-UTF  modes.  Unicode
1636       "surrogate" code points in the range 0xd800 to 0xdfff are used in pairs
1637       in UTF-16 to encode code points with values in  the  range  0x10000  to
1638       0x10ffff.  The  surrogates  cannot  therefore be represented in UTF-16.
1639       They can be represented in UTF-8 and UTF-32, but are defined as invalid
1640       code  points,  and  cause  errors  if  encountered in a UTF-8 or UTF-32
1641       string that is being checked for validity by PCRE2.
1642
1643       These values also cause errors if encountered in escape sequences  such
1644       as \x{d912} within a pattern. However, it seems that some applications,
1645       when using PCRE2 to check for unwanted  characters  in  UTF-8  strings,
1646       explicitly   test  for  the  surrogates  using  escape  sequences.  The
1647       PCRE2_NO_UTF_CHECK option does  not  disable  the  error  that  occurs,
1648       because  it applies only to the testing of input strings for UTF valid‐
1649       ity.
1650
1651       If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set,  surro‐
1652       gate  code  point values in UTF-8 and UTF-32 patterns no longer provoke
1653       errors and are incorporated in the compiled pattern. However, they  can
1654       only  match  subject characters if the matching function is called with
1655       PCRE2_NO_UTF_CHECK set.
1656
1657         PCRE2_EXTRA_ALT_BSUX
1658
1659       The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u,  and
1660       \x  in  the way that ECMAscript (aka JavaScript) does. Additional func‐
1661       tionality was defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has
1662       the  effect  of PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..}
1663       as a hexadecimal character code, where hhh.. is any number of hexadeci‐
1664       mal digits.
1665
1666         PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
1667
1668       This  is a dangerous option. Use with care. By default, an unrecognized
1669       escape such as \j or a malformed one such as \x{2z} causes  a  compile-
1670       time error when detected by pcre2_compile(). Perl is somewhat inconsis‐
1671       tent in handling such items: for example, \j is treated  as  a  literal
1672       "j",  and non-hexadecimal digits in \x{} are just ignored, though warn‐
1673       ings are given in both cases if Perl's warning switch is enabled.  How‐
1674       ever,  a  malformed  octal  number  after \o{ always causes an error in
1675       Perl.
1676
1677       If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL  extra  option  is  passed  to
1678       pcre2_compile(),  all  unrecognized  or  malformed escape sequences are
1679       treated as single-character escapes. For example, \j is a  literal  "j"
1680       and  \x{2z}  is  treated  as  the  literal string "x{2z}". Setting this
1681       option means that typos in patterns may go undetected  and  have  unex‐
1682       pected  results. Also note that a sequence such as [\N{] is interpreted
1683       as a malformed attempt at [\N{...}] and so is treated as  [N{]  whereas
1684       [\N]  gives  an  error  because  an  unqualified  \N  is a valid escape
1685       sequence but is not supported in a character class. To reiterate:  this
1686       is a dangerous option. Use with great care.
1687
1688         PCRE2_EXTRA_ESCAPED_CR_IS_LF
1689
1690       There  are  some  legacy applications where the escape sequence \r in a
1691       pattern is expected to match a newline. If this option is set, \r in  a
1692       pattern  is  converted to \n so that it matches a LF (linefeed) instead
1693       of a CR (carriage return) character. The option does not affect a  lit‐
1694       eral  CR in the pattern, nor does it affect CR specified as an explicit
1695       code point such as \x{0D}.
1696
1697         PCRE2_EXTRA_MATCH_LINE
1698
1699       This option is provided for use by  the  -x  option  of  pcre2grep.  It
1700       causes  the  pattern  only to match complete lines. This is achieved by
1701       automatically inserting the code for "^(?:" at the start  of  the  com‐
1702       piled  pattern  and ")$" at the end. Thus, when PCRE2_MULTILINE is set,
1703       the matched line may be in the  middle  of  the  subject  string.  This
1704       option can be used with PCRE2_LITERAL.
1705
1706         PCRE2_EXTRA_MATCH_WORD
1707
1708       This  option  is  provided  for  use  by the -w option of pcre2grep. It
1709       causes the pattern only to match strings that have a word  boundary  at
1710       the  start and the end. This is achieved by automatically inserting the
1711       code for "\b(?:" at the start of the compiled pattern and ")\b" at  the
1712       end.  The option may be used with PCRE2_LITERAL. However, it is ignored
1713       if PCRE2_EXTRA_MATCH_LINE is also set.
1714

JUST-IN-TIME (JIT) COMPILATION

1716
1717       int pcre2_jit_compile(pcre2_code *code, uint32_t options);
1718
1719       int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
1720         PCRE2_SIZE length, PCRE2_SIZE startoffset,
1721         uint32_t options, pcre2_match_data *match_data,
1722         pcre2_match_context *mcontext);
1723
1724       void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
1725
1726       pcre2_jit_stack *pcre2_jit_stack_create(PCRE2_SIZE startsize,
1727         PCRE2_SIZE maxsize, pcre2_general_context *gcontext);
1728
1729       void pcre2_jit_stack_assign(pcre2_match_context *mcontext,
1730         pcre2_jit_callback callback_function, void *callback_data);
1731
1732       void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
1733
1734       These functions provide support for  JIT  compilation,  which,  if  the
1735       just-in-time  compiler  is available, further processes a compiled pat‐
1736       tern into machine code that executes much faster than the pcre2_match()
1737       interpretive  matching function. Full details are given in the pcre2jit
1738       documentation.
1739
1740       JIT compilation is a heavyweight optimization. It can  take  some  time
1741       for  patterns  to  be analyzed, and for one-off matches and simple pat‐
1742       terns the benefit of faster execution might be offset by a much  slower
1743       compilation  time.  Most (but not all) patterns can be optimized by the
1744       JIT compiler.
1745

LOCALE SUPPORT

1747
1748       const uint8_t *pcre2_maketables(pcre2_general_context *gcontext);
1749
1750       void pcre2_maketables_free(pcre2_general_context *gcontext,
1751         const uint8_t *tables);
1752
1753       PCRE2 handles caseless matching, and determines whether characters  are
1754       letters,  digits, or whatever, by reference to a set of tables, indexed
1755       by character code point. However, this applies only to characters whose
1756       code  points  are  less than 256. By default, higher-valued code points
1757       never match escapes such as \w or \d. When PCRE2 is built with  Unicode
1758       support (the default), all characters can be tested with \p and \P, or,
1759       alternatively, the PCRE2_UCP option can be set when a pattern  is  com‐
1760       piled;  this  causes  \w  and  friends  to use Unicode property support
1761       instead of the built-in tables.
1762
1763       The use of locales with Unicode is discouraged.  If  you  are  handling
1764       characters  with  code  points  greater than 128, you should either use
1765       Unicode support, or use locales, but not try to mix the two.
1766
1767       PCRE2 contains a built-in set of character  tables  that  are  used  by
1768       default.   These  are  sufficient  for many applications. Normally, the
1769       internal tables recognize only ASCII characters. However, when PCRE2 is
1770       built, it is possible to cause the internal tables to be rebuilt in the
1771       default "C" locale of the local system, which may cause them to be dif‐
1772       ferent.
1773
1774       The  built-in tables can be overridden by tables supplied by the appli‐
1775       cation that calls PCRE2. These may be created  in  a  different  locale
1776       from  the  default.  As more and more applications change to using Uni‐
1777       code, the need for this locale support is expected to die away.
1778
1779       External tables are built by calling the  pcre2_maketables()  function,
1780       in the relevant locale. The only argument to this function is a general
1781       context, which can be used to pass a custom memory  allocator.  If  the
1782       argument is NULL, the system malloc() is used. The result can be passed
1783       to pcre2_compile() as often as necessary, by creating a compile context
1784       and  calling  pcre2_set_character_tables()  to  set  the tables pointer
1785       therein.
1786
1787       For example, to build and use  tables  that  are  appropriate  for  the
1788       French  locale  (where accented characters with values greater than 128
1789       are treated as letters), the following code could be used:
1790
1791         setlocale(LC_CTYPE, "fr_FR");
1792         tables = pcre2_maketables(NULL);
1793         ccontext = pcre2_compile_context_create(NULL);
1794         pcre2_set_character_tables(ccontext, tables);
1795         re = pcre2_compile(..., ccontext);
1796
1797       The locale name "fr_FR" is used on Linux and other  Unix-like  systems;
1798       if you are using Windows, the name for the French locale is "french".
1799
1800       The pointer that is passed (via the compile context) to pcre2_compile()
1801       is saved with the compiled pattern, and the same  tables  are  used  by
1802       pcre2_match()  and pcre_dfa_match(). Thus, for any single pattern, com‐
1803       pilation and matching both happen in the  same  locale,  but  different
1804       patterns can be processed in different locales.
1805
1806       It  is the caller's responsibility to ensure that the memory containing
1807       the tables remains available while they are still in use. When they are
1808       no  longer  needed, you can discard them using pcre2_maketables_free(),
1809       which should pass as its first parameter the same global  context  that
1810       was used to create the tables.
1811

INFORMATION ABOUT A COMPILED PATTERN

1813
1814       int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
1815
1816       The  pcre2_pattern_info()  function returns general information about a
1817       compiled pattern. For information about callouts, see the next section.
1818       The  first  argument  for pcre2_pattern_info() is a pointer to the com‐
1819       piled pattern. The second argument specifies which piece of information
1820       is  required,  and  the  third  argument  is a pointer to a variable to
1821       receive the data. If the third argument is NULL, the first argument  is
1822       ignored,  and  the  function  returns the size in bytes of the variable
1823       that is required for the information requested. Otherwise, the yield of
1824       the function is zero for success, or one of the following negative num‐
1825       bers:
1826
1827         PCRE2_ERROR_NULL           the argument code was NULL
1828         PCRE2_ERROR_BADMAGIC       the "magic number" was not found
1829         PCRE2_ERROR_BADOPTION      the value of what was invalid
1830         PCRE2_ERROR_UNSET          the requested field is not set
1831
1832       The "magic number" is placed at the start of each compiled pattern as a
1833       simple  check  against  passing  an arbitrary memory pointer. Here is a
1834       typical call of pcre2_pattern_info(), to obtain the length of the  com‐
1835       piled pattern:
1836
1837         int rc;
1838         size_t length;
1839         rc = pcre2_pattern_info(
1840           re,               /* result of pcre2_compile() */
1841           PCRE2_INFO_SIZE,  /* what is required */
1842           &length);         /* where to put the data */
1843
1844       The possible values for the second argument are defined in pcre2.h, and
1845       are as follows:
1846
1847         PCRE2_INFO_ALLOPTIONS
1848         PCRE2_INFO_ARGOPTIONS
1849         PCRE2_INFO_EXTRAOPTIONS
1850
1851       Return copies of the pattern's options. The third argument should point
1852       to  a  uint32_t  variable.  PCRE2_INFO_ARGOPTIONS  returns  exactly the
1853       options that were passed to pcre2_compile(), whereas  PCRE2_INFO_ALLOP‐
1854       TIONS  returns  the compile options as modified by any top-level (*XXX)
1855       option settings such as (*UTF) at the  start  of  the  pattern  itself.
1856       PCRE2_INFO_EXTRAOPTIONS  returns the extra options that were set in the
1857       compile context by calling the pcre2_set_compile_extra_options()  func‐
1858       tion.
1859
1860       For   example,   if  the  pattern  /(*UTF)abc/  is  compiled  with  the
1861       PCRE2_EXTENDED  option,  the  result   for   PCRE2_INFO_ALLOPTIONS   is
1862       PCRE2_EXTENDED  and  PCRE2_UTF.   Option settings such as (?i) that can
1863       change within a pattern do not affect the result  of  PCRE2_INFO_ALLOP‐
1864       TIONS, even if they appear right at the start of the pattern. (This was
1865       different in some earlier releases.)
1866
1867       A pattern compiled without PCRE2_ANCHORED is automatically anchored  by
1868       PCRE2 if the first significant item in every top-level branch is one of
1869       the following:
1870
1871         ^     unless PCRE2_MULTILINE is set
1872         \A    always
1873         \G    always
1874         .*    sometimes - see below
1875
1876       When .* is the first significant item, anchoring is possible only  when
1877       all the following are true:
1878
1879         .* is not in an atomic group
1880         .* is not in a capture group that is the subject
1881              of a backreference
1882         PCRE2_DOTALL is in force for .*
1883         Neither (*PRUNE) nor (*SKIP) appears in the pattern
1884         PCRE2_NO_DOTSTAR_ANCHOR is not set
1885
1886       For  patterns  that are auto-anchored, the PCRE2_ANCHORED bit is set in
1887       the options returned for PCRE2_INFO_ALLOPTIONS.
1888
1889         PCRE2_INFO_BACKREFMAX
1890
1891       Return the number of the highest  backreference  in  the  pattern.  The
1892       third  argument  should  point  to  a  uint32_t variable. Named capture
1893       groups acquire numbers as well as names, and these  count  towards  the
1894       highest  backreference.  Backreferences  such as \4 or \g{12} match the
1895       captured characters of the given group, but in addition, the check that
1896       a capture group is set in a conditional group such as (?(3)a|b) is also
1897       a backreference.  Zero is returned if there are no backreferences.
1898
1899         PCRE2_INFO_BSR
1900
1901       The output is a uint32_t integer whose value indicates  what  character
1902       sequences  the \R escape sequence matches. A value of PCRE2_BSR_UNICODE
1903       means that \R matches any Unicode line  ending  sequence;  a  value  of
1904       PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF.
1905
1906         PCRE2_INFO_CAPTURECOUNT
1907
1908       Return  the  highest  capture  group number in the pattern. In patterns
1909       where (?| is not used, this is also the total number of capture groups.
1910       The third argument should point to a uint32_t variable.
1911
1912         PCRE2_INFO_DEPTHLIMIT
1913
1914       If  the  pattern set a backtracking depth limit by including an item of
1915       the form (*LIMIT_DEPTH=nnnn) at the start, the value is  returned.  The
1916       third argument should point to a uint32_t integer. If no such value has
1917       been  set,  the  call  to  pcre2_pattern_info()   returns   the   error
1918       PCRE2_ERROR_UNSET. Note that this limit will only be used during match‐
1919       ing if it is less than the limit set or defaulted by the caller of  the
1920       match function.
1921
1922         PCRE2_INFO_FIRSTBITMAP
1923
1924       In  the absence of a single first code unit for a non-anchored pattern,
1925       pcre2_compile() may construct a 256-bit table that defines a fixed  set
1926       of  values for the first code unit in any match. For example, a pattern
1927       that starts with [abc] results in a table with  three  bits  set.  When
1928       code  unit  values greater than 255 are supported, the flag bit for 255
1929       means "any code unit of value 255 or above". If such a table  was  con‐
1930       structed,  a pointer to it is returned. Otherwise NULL is returned. The
1931       third argument should point to a const uint8_t * variable.
1932
1933         PCRE2_INFO_FIRSTCODETYPE
1934
1935       Return information about the first code unit of any matched string, for
1936       a  non-anchored  pattern. The third argument should point to a uint32_t
1937       variable. If there is a fixed first value, for example, the letter  "c"
1938       from  a  pattern such as (cat|cow|coyote), 1 is returned, and the value
1939       can be retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is  no  fixed
1940       first  value,  but it is known that a match can occur only at the start
1941       of the subject or following a newline in the subject,  2  is  returned.
1942       Otherwise, and for anchored patterns, 0 is returned.
1943
1944         PCRE2_INFO_FIRSTCODEUNIT
1945
1946       Return  the  value  of  the first code unit of any matched string for a
1947       pattern where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise  return  0.
1948       The  third  argument  should point to a uint32_t variable. In the 8-bit
1949       library, the value is always less than 256. In the 16-bit  library  the
1950       value  can  be  up  to 0xffff. In the 32-bit library in UTF-32 mode the
1951       value can be up to 0x10ffff, and up to 0xffffffff when not using UTF-32
1952       mode.
1953
1954         PCRE2_INFO_FRAMESIZE
1955
1956       Return the size (in bytes) of the data frames that are used to remember
1957       backtracking positions when the pattern is processed  by  pcre2_match()
1958       without  the  use  of  JIT. The third argument should point to a size_t
1959       variable. The frame size depends on the number of capturing parentheses
1960       in the pattern. Each additional capture group adds two PCRE2_SIZE vari‐
1961       ables.
1962
1963         PCRE2_INFO_HASBACKSLASHC
1964
1965       Return 1 if the pattern contains any instances of \C, otherwise 0.  The
1966       third argument should point to a uint32_t variable.
1967
1968         PCRE2_INFO_HASCRORLF
1969
1970       Return  1  if  the  pattern  contains any explicit matches for CR or LF
1971       characters, otherwise 0. The third argument should point to a  uint32_t
1972       variable.  An explicit match is either a literal CR or LF character, or
1973       \r or  \n  or  one  of  the  equivalent  hexadecimal  or  octal  escape
1974       sequences.
1975
1976         PCRE2_INFO_HEAPLIMIT
1977
1978       If the pattern set a heap memory limit by including an item of the form
1979       (*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu‐
1980       ment should point to a uint32_t integer. If no such value has been set,
1981       the call to pcre2_pattern_info() returns the  error  PCRE2_ERROR_UNSET.
1982       Note  that  this  limit will only be used during matching if it is less
1983       than the limit set or defaulted by the caller of the match function.
1984
1985         PCRE2_INFO_JCHANGED
1986
1987       Return 1 if the (?J) or (?-J) option setting is used  in  the  pattern,
1988       otherwise  0.  The  third argument should point to a uint32_t variable.
1989       (?J) and (?-J) set and unset the local PCRE2_DUPNAMES  option,  respec‐
1990       tively.
1991
1992         PCRE2_INFO_JITSIZE
1993
1994       If  the  compiled  pattern was successfully processed by pcre2_jit_com‐
1995       pile(), return the size of the  JIT  compiled  code,  otherwise  return
1996       zero. The third argument should point to a size_t variable.
1997
1998         PCRE2_INFO_LASTCODETYPE
1999
2000       Returns  1 if there is a rightmost literal code unit that must exist in
2001       any matched string, other than at its start. The third argument  should
2002       point to a uint32_t variable. If there is no such value, 0 is returned.
2003       When 1 is returned, the code unit value itself can be  retrieved  using
2004       PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is
2005       recorded only if it follows something of variable length. For  example,
2006       for  the pattern /^a\d+z\d+/ the returned value is 1 (with "z" returned
2007       from PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value  is
2008       0.
2009
2010         PCRE2_INFO_LASTCODEUNIT
2011
2012       Return  the value of the rightmost literal code unit that must exist in
2013       any matched string, other than  at  its  start,  for  a  pattern  where
2014       PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argu‐
2015       ment should point to a uint32_t variable.
2016
2017         PCRE2_INFO_MATCHEMPTY
2018
2019       Return 1 if the pattern might match an empty string, otherwise  0.  The
2020       third argument should point to a uint32_t variable. When a pattern con‐
2021       tains recursive subroutine calls it is not always possible to determine
2022       whether  or  not  it  can match an empty string. PCRE2 takes a cautious
2023       approach and returns 1 in such cases.
2024
2025         PCRE2_INFO_MATCHLIMIT
2026
2027       If the pattern set a match limit by  including  an  item  of  the  form
2028       (*LIMIT_MATCH=nnnn)  at  the  start,  the  value is returned. The third
2029       argument should point to a uint32_t integer. If no such value has  been
2030       set,    the    call   to   pcre2_pattern_info()   returns   the   error
2031       PCRE2_ERROR_UNSET. Note that this limit will only be used during match‐
2032       ing  if it is less than the limit set or defaulted by the caller of the
2033       match function.
2034
2035         PCRE2_INFO_MAXLOOKBEHIND
2036
2037       A lookbehind assertion moves back a certain number of  characters  (not
2038       code  units)  when  it  starts  to  process  each of its branches. This
2039       request returns the largest of these backward moves. The third argument
2040       should  point  to  a  uint32_t integer. The simple assertions \b and \B
2041       require a one-character lookbehind and  cause  PCRE2_INFO_MAXLOOKBEHIND
2042       to return 1 in the absence of anything longer. \A also registers a one-
2043       character lookbehind, though it does not actually inspect the  previous
2044       character.
2045
2046       Note that this information is useful for multi-segment matching only if
2047       the pattern contains no nested lookbehinds. For  example,  the  pattern
2048       (?<=a(?<=ba)c)  returns  a maximum lookbehind of 2, but when it is pro‐
2049       cessed, the first lookbehind moves back by two characters, matches  one
2050       character,  then  the  nested lookbehind also moves back by two charac‐
2051       ters. This puts the matching point three characters earlier than it was
2052       at  the  start.   PCRE2_INFO_MAXLOOKBEHIND  is  really only useful as a
2053       debugging tool. See the pcre2partial documentation for a discussion  of
2054       multi-segment matching.
2055
2056         PCRE2_INFO_MINLENGTH
2057
2058       If  a  minimum  length  for  matching subject strings was computed, its
2059       value is returned. Otherwise the returned value is 0. This value is not
2060       computed  when PCRE2_NO_START_OPTIMIZE is set. The value is a number of
2061       characters, which in UTF mode may be different from the number of  code
2062       units.  The  third  argument  should  point to a uint32_t variable. The
2063       value is a lower bound to the length of any matching string. There  may
2064       not  be  any  strings  of that length that do actually match, but every
2065       string that does match is at least that long.
2066
2067         PCRE2_INFO_NAMECOUNT
2068         PCRE2_INFO_NAMEENTRYSIZE
2069         PCRE2_INFO_NAMETABLE
2070
2071       PCRE2 supports the use of named as well as numbered capturing parenthe‐
2072       ses.  The names are just an additional way of identifying the parenthe‐
2073       ses, which still acquire numbers. Several convenience functions such as
2074       pcre2_substring_get_byname()  are provided for extracting captured sub‐
2075       strings by name. It is also possible to extract the data  directly,  by
2076       first  converting  the  name to a number in order to access the correct
2077       pointers in the output vector (described with pcre2_match() below).  To
2078       do  the  conversion,  you  need to use the name-to-number map, which is
2079       described by these three values.
2080
2081       The map consists of a number of  fixed-size  entries.  PCRE2_INFO_NAME‐
2082       COUNT  gives  the number of entries, and PCRE2_INFO_NAMEENTRYSIZE gives
2083       the size of each entry in code units; both of these return  a  uint32_t
2084       value. The entry size depends on the length of the longest name.
2085
2086       PCRE2_INFO_NAMETABLE returns a pointer to the first entry of the table.
2087       This is a PCRE2_SPTR pointer to a block of code  units.  In  the  8-bit
2088       library,  the  first two bytes of each entry are the number of the cap‐
2089       turing parenthesis, most significant byte first. In the 16-bit library,
2090       the  pointer  points  to 16-bit code units, the first of which contains
2091       the parenthesis number. In the 32-bit library, the  pointer  points  to
2092       32-bit  code units, the first of which contains the parenthesis number.
2093       The rest of the entry is the corresponding name, zero terminated.
2094
2095       The names are in alphabetical order. If (?| is used to create  multiple
2096       capture  groups  with  the  same number, as described in the section on
2097       duplicate group numbers in the pcre2pattern page,  the  groups  may  be
2098       given  the same name, but there is only one entry in the table. Differ‐
2099       ent names for groups of the same number are not permitted.
2100
2101       Duplicate names for capture groups with different numbers  are  permit‐
2102       ted, but only if PCRE2_DUPNAMES is set. They appear in the table in the
2103       order in which they were found in the pattern. In the  absence  of  (?|
2104       this  is  the  order of increasing number; when (?| is used this is not
2105       necessarily the case because later capture groups may have  lower  num‐
2106       bers.
2107
2108       As  a  simple  example of the name/number table, consider the following
2109       pattern after compilation by the 8-bit library  (assume  PCRE2_EXTENDED
2110       is set, so white space - including newlines - is ignored):
2111
2112         (?<date> (?<year>(\d\d)?\d\d) -
2113         (?<month>\d\d) - (?<day>\d\d) )
2114
2115       There are four named capture groups, so the table has four entries, and
2116       each entry in the table is eight bytes long. The table is  as  follows,
2117       with non-printing bytes shows in hexadecimal, and undefined bytes shown
2118       as ??:
2119
2120         00 01 d  a  t  e  00 ??
2121         00 05 d  a  y  00 ?? ??
2122         00 04 m  o  n  t  h  00
2123         00 02 y  e  a  r  00 ??
2124
2125       When writing code to extract data from named capture groups  using  the
2126       name-to-number  map,  remember that the length of the entries is likely
2127       to be different for each compiled pattern.
2128
2129         PCRE2_INFO_NEWLINE
2130
2131       The output is one of the following uint32_t values:
2132
2133         PCRE2_NEWLINE_CR       Carriage return (CR)
2134         PCRE2_NEWLINE_LF       Linefeed (LF)
2135         PCRE2_NEWLINE_CRLF     Carriage return, linefeed (CRLF)
2136         PCRE2_NEWLINE_ANY      Any Unicode line ending
2137         PCRE2_NEWLINE_ANYCRLF  Any of CR, LF, or CRLF
2138         PCRE2_NEWLINE_NUL      The NUL character (binary zero)
2139
2140       This identifies the character sequence that will be recognized as mean‐
2141       ing "newline" while matching.
2142
2143         PCRE2_INFO_SIZE
2144
2145       Return  the  size  of  the  compiled  pattern  in  bytes (for all three
2146       libraries). The third argument should point to a size_t variable.  This
2147       value  includes  the  size  of the general data block that precedes the
2148       code units of the compiled pattern itself. The value that is used  when
2149       pcre2_compile()  is  getting memory in which to place the compiled pat‐
2150       tern may be slightly larger than the value  returned  by  this  option,
2151       because  there are cases where the code that calculates the size has to
2152       over-estimate. Processing a pattern with  the  JIT  compiler  does  not
2153       alter the value returned by this option.
2154

INFORMATION ABOUT A PATTERN'S CALLOUTS

2156
2157       int pcre2_callout_enumerate(const pcre2_code *code,
2158         int (*callback)(pcre2_callout_enumerate_block *, void *),
2159         void *user_data);
2160
2161       A script language that supports the use of string arguments in callouts
2162       might like to scan all the callouts in a  pattern  before  running  the
2163       match. This can be done by calling pcre2_callout_enumerate(). The first
2164       argument is a pointer to a compiled pattern, the  second  points  to  a
2165       callback  function,  and the third is arbitrary user data. The callback
2166       function is called for every callout in the pattern  in  the  order  in
2167       which they appear. Its first argument is a pointer to a callout enumer‐
2168       ation block, and its second argument is the user_data  value  that  was
2169       passed  to  pcre2_callout_enumerate(). The contents of the callout enu‐
2170       meration block are described in the pcre2callout  documentation,  which
2171       also gives further details about callouts.
2172

SERIALIZATION AND PRECOMPILING

2174
2175       It  is  possible  to  save  compiled patterns on disc or elsewhere, and
2176       reload them later, subject to a number of  restrictions.  The  host  on
2177       which  the  patterns  are  reloaded must be running the same version of
2178       PCRE2, with the same code unit width, and must also have the same endi‐
2179       anness,  pointer  width,  and PCRE2_SIZE type. Before compiled patterns
2180       can be saved, they must be converted to a "serialized" form,  which  in
2181       the  case of PCRE2 is really just a bytecode dump.  The functions whose
2182       names begin with pcre2_serialize_ are used for converting to  and  from
2183       the  serialized form. They are described in the pcre2serialize documen‐
2184       tation. Note that PCRE2 serialization does not  convert  compiled  pat‐
2185       terns to an abstract format like Java or .NET serialization.
2186

THE MATCH DATA BLOCK

2188
2189       pcre2_match_data *pcre2_match_data_create(uint32_t ovecsize,
2190         pcre2_general_context *gcontext);
2191
2192       pcre2_match_data *pcre2_match_data_create_from_pattern(
2193         const pcre2_code *code, pcre2_general_context *gcontext);
2194
2195       void pcre2_match_data_free(pcre2_match_data *match_data);
2196
2197       Information  about  a  successful  or unsuccessful match is placed in a
2198       match data block, which is an opaque  structure  that  is  accessed  by
2199       function  calls.  In particular, the match data block contains a vector
2200       of offsets into the subject string that define the matched part of  the
2201       subject  and  any  substrings  that were captured. This is known as the
2202       ovector.
2203
2204       Before calling pcre2_match(), pcre2_dfa_match(),  or  pcre2_jit_match()
2205       you must create a match data block by calling one of the creation func‐
2206       tions above. For pcre2_match_data_create(), the first argument  is  the
2207       number  of  pairs  of  offsets  in  the ovector. One pair of offsets is
2208       required to identify the string that matched the whole pattern, with an
2209       additional  pair for each captured substring. For example, a value of 4
2210       creates enough space to record the matched portion of the subject  plus
2211       three  captured  substrings. A minimum of at least 1 pair is imposed by
2212       pcre2_match_data_create(), so it is always possible to return the over‐
2213       all matched string.
2214
2215       The second argument of pcre2_match_data_create() is a pointer to a gen‐
2216       eral context, which can specify custom memory management for  obtaining
2217       the memory for the match data block. If you are not using custom memory
2218       management, pass NULL, which causes malloc() to be used.
2219
2220       For pcre2_match_data_create_from_pattern(), the  first  argument  is  a
2221       pointer to a compiled pattern. The ovector is created to be exactly the
2222       right size to hold all the substrings a pattern might capture. The sec‐
2223       ond  argument is again a pointer to a general context, but in this case
2224       if NULL is passed, the memory is obtained using the same allocator that
2225       was used for the compiled pattern (custom or default).
2226
2227       A  match  data block can be used many times, with the same or different
2228       compiled patterns. You can extract information from a match data  block
2229       after  a  match  operation  has  finished,  using  functions  that  are
2230       described in the sections on  matched  strings  and  other  match  data
2231       below.
2232
2233       When  a  call  of  pcre2_match()  fails, valid data is available in the
2234       match   block   only   when   the   error    is    PCRE2_ERROR_NOMATCH,
2235       PCRE2_ERROR_PARTIAL,  or  one  of  the  error  codes for an invalid UTF
2236       string. Exactly what is available depends on the error, and is detailed
2237       below.
2238
2239       When  one of the matching functions is called, pointers to the compiled
2240       pattern and the subject string are set in the match data block so  that
2241       they  can  be referenced by the extraction functions after a successful
2242       match. After running a match, you must not free a compiled pattern or a
2243       subject  string until after all operations on the match data block (for
2244       that match) have taken place,  unless,  in  the  case  of  the  subject
2245       string,  you  have used the PCRE2_COPY_MATCHED_SUBJECT option, which is
2246       described in the  section  entitled  "Option  bits  for  pcre2_match()"
2247       below.
2248
2249       When  a match data block itself is no longer needed, it should be freed
2250       by calling pcre2_match_data_free(). If this function is called  with  a
2251       NULL argument, it returns immediately, without doing anything.
2252

MATCHING A PATTERN: THE TRADITIONAL FUNCTION

2254
2255       int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
2256         PCRE2_SIZE length, PCRE2_SIZE startoffset,
2257         uint32_t options, pcre2_match_data *match_data,
2258         pcre2_match_context *mcontext);
2259
2260       The  function pcre2_match() is called to match a subject string against
2261       a compiled pattern, which is passed in the code argument. You can  call
2262       pcre2_match() with the same code argument as many times as you like, in
2263       order to find multiple matches in the subject string or to  match  dif‐
2264       ferent subject strings with the same pattern.
2265
2266       This  function  is  the  main  matching facility of the library, and it
2267       operates in a Perl-like manner. For specialist use  there  is  also  an
2268       alternative  matching function, which is described below in the section
2269       about the pcre2_dfa_match() function.
2270
2271       Here is an example of a simple call to pcre2_match():
2272
2273         pcre2_match_data *md = pcre2_match_data_create(4, NULL);
2274         int rc = pcre2_match(
2275           re,             /* result of pcre2_compile() */
2276           "some string",  /* the subject string */
2277           11,             /* the length of the subject string */
2278           0,              /* start at offset 0 in the subject */
2279           0,              /* default options */
2280           md,             /* the match data block */
2281           NULL);          /* a match context; NULL means use defaults */
2282
2283       If the subject string is zero-terminated, the length can  be  given  as
2284       PCRE2_ZERO_TERMINATED. A match context must be provided if certain less
2285       common matching parameters are to be changed. For details, see the sec‐
2286       tion on the match context above.
2287
2288   The string to be matched by pcre2_match()
2289
2290       The  subject string is passed to pcre2_match() as a pointer in subject,
2291       a length in length, and a starting offset in  startoffset.  The  length
2292       and  offset  are  in  code units, not characters.  That is, they are in
2293       bytes for the 8-bit library, 16-bit code units for the 16-bit  library,
2294       and  32-bit  code units for the 32-bit library, whether or not UTF pro‐
2295       cessing is enabled.
2296
2297       If startoffset is greater than the length of the subject, pcre2_match()
2298       returns  PCRE2_ERROR_BADOFFSET.  When  the starting offset is zero, the
2299       search for a match starts at the beginning of the subject, and this  is
2300       by far the most common case. In UTF-8 or UTF-16 mode, the starting off‐
2301       set must point to the start of a character, or to the end of  the  sub‐
2302       ject  (in  UTF-32 mode, one code unit equals one character, so all off‐
2303       sets are valid). Like the  pattern  string,  the  subject  may  contain
2304       binary zeros.
2305
2306       A  non-zero  starting offset is useful when searching for another match
2307       in the same subject by calling pcre2_match()  again  after  a  previous
2308       success.   Setting  startoffset  differs  from passing over a shortened
2309       string and setting PCRE2_NOTBOL in the case of a  pattern  that  begins
2310       with any kind of lookbehind. For example, consider the pattern
2311
2312         \Biss\B
2313
2314       which  finds  occurrences  of "iss" in the middle of words. (\B matches
2315       only if the current position in the subject is not  a  word  boundary.)
2316       When applied to the string "Mississipi" the first call to pcre2_match()
2317       finds the first occurrence. If pcre2_match() is called again with  just
2318       the  remainder  of  the  subject,  namely  "issipi", it does not match,
2319       because \B is always false at the start of the subject, which is deemed
2320       to  be  a word boundary. However, if pcre2_match() is passed the entire
2321       string again, but with startoffset set to 4, it finds the second occur‐
2322       rence  of "iss" because it is able to look behind the starting point to
2323       discover that it is preceded by a letter.
2324
2325       Finding all the matches in a subject is tricky  when  the  pattern  can
2326       match an empty string. It is possible to emulate Perl's /g behaviour by
2327       first  trying  the  match  again  at  the   same   offset,   with   the
2328       PCRE2_NOTEMPTY_ATSTART  and  PCRE2_ANCHORED  options,  and then if that
2329       fails, advancing the starting  offset  and  trying  an  ordinary  match
2330       again.  There  is  some  code  that  demonstrates how to do this in the
2331       pcre2demo sample program. In the most general case, you have  to  check
2332       to  see  if the newline convention recognizes CRLF as a newline, and if
2333       so, and the current character is CR followed by LF, advance the  start‐
2334       ing offset by two characters instead of one.
2335
2336       If a non-zero starting offset is passed when the pattern is anchored, a
2337       single attempt to match at the given offset is made. This can only suc‐
2338       ceed  if  the  pattern does not require the match to be at the start of
2339       the subject. In other words, the anchoring must be the result  of  set‐
2340       ting  the PCRE2_ANCHORED option or the use of .* with PCRE2_DOTALL, not
2341       by starting the pattern with ^ or \A.
2342
2343   Option bits for pcre2_match()
2344
2345       The unused bits of the options argument for pcre2_match() must be zero.
2346       The    only    bits    that    may    be    set   are   PCRE2_ANCHORED,
2347       PCRE2_COPY_MATCHED_SUBJECT,      PCRE2_ENDANCHORED,       PCRE2_NOTBOL,
2348       PCRE2_NOTEOL,   PCRE2_NOTEMPTY,  PCRE2_NOTEMPTY_ATSTART,  PCRE2_NO_JIT,
2349       PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and  PCRE2_PARTIAL_SOFT.  Their
2350       action is described below.
2351
2352       Setting  PCRE2_ANCHORED  or PCRE2_ENDANCHORED at match time is not sup‐
2353       ported by the just-in-time (JIT) compiler. If it is set,  JIT  matching
2354       is  disabled  and  the interpretive code in pcre2_match() is run. Apart
2355       from PCRE2_NO_JIT (obviously), the remaining options are supported  for
2356       JIT matching.
2357
2358         PCRE2_ANCHORED
2359
2360       The PCRE2_ANCHORED option limits pcre2_match() to matching at the first
2361       matching position. If a pattern was compiled  with  PCRE2_ANCHORED,  or
2362       turned  out to be anchored by virtue of its contents, it cannot be made
2363       unachored at matching time. Note that setting the option at match  time
2364       disables JIT matching.
2365
2366         PCRE2_COPY_MATCHED_SUBJECT
2367
2368       By  default,  a  pointer to the subject is remembered in the match data
2369       block so that, after a successful match, it can be  referenced  by  the
2370       substring  extraction  functions.  This means that the subject's memory
2371       must not be freed until all such  operations  are  complete.  For  some
2372       applications  where  the  lifetime of the subject string is not guaran‐
2373       teed, it may be necessary to make a copy of the subject string, but  it
2374       is wasteful to do this unless the match is successful. After a success‐
2375       ful match, if PCRE2_COPY_MATCHED_SUBJECT is set, the subject is  copied
2376       and  the  new  pointer is remembered in the match data block instead of
2377       the original subject pointer. The memory allocator that  was  used  for
2378       the  match  block  itself is used. The copy is automatically freed when
2379       pcre2_match_data_free() is called to free the match data block.  It  is
2380       also automatically freed if the match data block is re-used for another
2381       match operation.
2382
2383         PCRE2_ENDANCHORED
2384
2385       If the PCRE2_ENDANCHORED option is set, any string  that  pcre2_match()
2386       matches  must be right at the end of the subject string. Note that set‐
2387       ting the option at match time disables JIT matching.
2388
2389         PCRE2_NOTBOL
2390
2391       This option specifies that first character of the subject string is not
2392       the  beginning  of  a  line, so the circumflex metacharacter should not
2393       match before it. Setting this without  having  set  PCRE2_MULTILINE  at
2394       compile time causes circumflex never to match. This option affects only
2395       the behaviour of the circumflex metacharacter. It does not affect \A.
2396
2397         PCRE2_NOTEOL
2398
2399       This option specifies that the end of the subject string is not the end
2400       of  a line, so the dollar metacharacter should not match it nor (except
2401       in multiline mode) a newline immediately before it. Setting this  with‐
2402       out  having  set PCRE2_MULTILINE at compile time causes dollar never to
2403       match. This option affects only the behaviour of the dollar metacharac‐
2404       ter. It does not affect \Z or \z.
2405
2406         PCRE2_NOTEMPTY
2407
2408       An empty string is not considered to be a valid match if this option is
2409       set. If there are alternatives in the pattern, they are tried.  If  all
2410       the  alternatives  match  the empty string, the entire match fails. For
2411       example, if the pattern
2412
2413         a?b?
2414
2415       is applied to a string not beginning with "a" or  "b",  it  matches  an
2416       empty string at the start of the subject. With PCRE2_NOTEMPTY set, this
2417       match is not valid, so pcre2_match() searches further into  the  string
2418       for occurrences of "a" or "b".
2419
2420         PCRE2_NOTEMPTY_ATSTART
2421
2422       This  is  like PCRE2_NOTEMPTY, except that it locks out an empty string
2423       match only at the first matching position, that is, at the start of the
2424       subject  plus  the  starting offset. An empty string match later in the
2425       subject is permitted.  If the pattern is anchored,  such  a  match  can
2426       occur only if the pattern contains \K.
2427
2428         PCRE2_NO_JIT
2429
2430       By   default,   if   a  pattern  has  been  successfully  processed  by
2431       pcre2_jit_compile(), JIT is automatically used  when  pcre2_match()  is
2432       called  with  options  that JIT supports. Setting PCRE2_NO_JIT disables
2433       the use of JIT; it forces matching to be done by the interpreter.
2434
2435         PCRE2_NO_UTF_CHECK
2436
2437       When PCRE2_UTF is set at compile time, the validity of the subject as a
2438       UTF   string   is   checked  unless  PCRE2_NO_UTF_CHECK  is  passed  to
2439       pcre2_match() or PCRE2_MATCH_INVALID_UTF was passed to pcre2_compile().
2440       The latter special case is discussed in detail in the pcre2unicode doc‐
2441       umentation.
2442
2443       In the default case, if a non-zero starting offset is given, the  check
2444       is  applied  only  to  that part of the subject that could be inspected
2445       during matching, and there is a check that the starting  offset  points
2446       to  the first code unit of a character or to the end of the subject. If
2447       there are no lookbehind assertions in the pattern, the check starts  at
2448       the starting offset.  Otherwise, it starts at the length of the longest
2449       lookbehind before the starting offset, or at the start of  the  subject
2450       if  there are not that many characters before the starting offset. Note
2451       that the sequences \b and \B are one-character lookbehinds.
2452
2453       The check is carried out before any other processing takes place, and a
2454       negative  error  code is returned if the check fails. There are several
2455       UTF error codes for each code unit width,  corresponding  to  different
2456       problems  with  the code unit sequence. There are discussions about the
2457       validity of UTF-8 strings, UTF-16 strings, and UTF-32  strings  in  the
2458       pcre2unicode documentation.
2459
2460       If you know that your subject is valid, and you want to skip this check
2461       for performance reasons, you can set the PCRE2_NO_UTF_CHECK option when
2462       calling  pcre2_match().  You  might  want to do this for the second and
2463       subsequent calls to pcre2_match() if you are making repeated  calls  to
2464       find multiple matches in the same subject string.
2465
2466       Warning:  Unless  PCRE2_MATCH_INVALID_UTF was set at compile time, when
2467       PCRE2_NO_UTF_CHECK is set at  match  time  the  effect  of  passing  an
2468       invalid  string  as  a  subject, or an invalid value of startoffset, is
2469       undefined.  Your program may crash or loop indefinitely or  give  wrong
2470       results.
2471
2472         PCRE2_PARTIAL_HARD
2473         PCRE2_PARTIAL_SOFT
2474
2475       These  options  turn  on  the partial matching feature. A partial match
2476       occurs if the end of the subject string is  reached  successfully,  but
2477       there are not enough subject characters to complete the match. In addi‐
2478       tion, either at least one character must have  been  inspected  or  the
2479       pattern  must  contain  a  lookbehind,  or the pattern must be one that
2480       could match an empty string.
2481
2482       If this situation arises when PCRE2_PARTIAL_SOFT  (but  not  PCRE2_PAR‐
2483       TIAL_HARD) is set, matching continues by testing any remaining alterna‐
2484       tives. Only if no complete match can be  found  is  PCRE2_ERROR_PARTIAL
2485       returned  instead  of  PCRE2_ERROR_NOMATCH.  In other words, PCRE2_PAR‐
2486       TIAL_SOFT specifies that the caller is prepared  to  handle  a  partial
2487       match, but only if no complete match can be found.
2488
2489       If  PCRE2_PARTIAL_HARD is set, it overrides PCRE2_PARTIAL_SOFT. In this
2490       case, if a partial match is found,  pcre2_match()  immediately  returns
2491       PCRE2_ERROR_PARTIAL,  without  considering  any  other alternatives. In
2492       other words, when PCRE2_PARTIAL_HARD is set, a partial match is consid‐
2493       ered to be more important that an alternative complete match.
2494
2495       There is a more detailed discussion of partial and multi-segment match‐
2496       ing, with examples, in the pcre2partial documentation.
2497

NEWLINE HANDLING WHEN MATCHING

2499
2500       When PCRE2 is built, a default newline convention is set; this is  usu‐
2501       ally  the standard convention for the operating system. The default can
2502       be overridden in a compile context by calling  pcre2_set_newline().  It
2503       can  also be overridden by starting a pattern string with, for example,
2504       (*CRLF), as described in the section  on  newline  conventions  in  the
2505       pcre2pattern  page. During matching, the newline choice affects the be‐
2506       haviour of the dot, circumflex, and dollar metacharacters. It may  also
2507       alter  the  way  the  match starting position is advanced after a match
2508       failure for an unanchored pattern.
2509
2510       When PCRE2_NEWLINE_CRLF, PCRE2_NEWLINE_ANYCRLF, or PCRE2_NEWLINE_ANY is
2511       set  as  the  newline convention, and a match attempt for an unanchored
2512       pattern fails when the current starting position is at a CRLF sequence,
2513       and  the  pattern contains no explicit matches for CR or LF characters,
2514       the match position is advanced by two characters  instead  of  one,  in
2515       other words, to after the CRLF.
2516
2517       The above rule is a compromise that makes the most common cases work as
2518       expected. For example, if the pattern  is  .+A  (and  the  PCRE2_DOTALL
2519       option is not set), it does not match the string "\r\nA" because, after
2520       failing at the start, it skips both the CR and the LF before  retrying.
2521       However,  the  pattern  [\r\n]A does match that string, because it con‐
2522       tains an explicit CR or LF reference, and so advances only by one char‐
2523       acter after the first failure.
2524
2525       An explicit match for CR of LF is either a literal appearance of one of
2526       those characters in the pattern, or one of the \r or \n  or  equivalent
2527       octal or hexadecimal escape sequences. Implicit matches such as [^X] do
2528       not count, nor does \s, even though it includes CR and LF in the  char‐
2529       acters that it matches.
2530
2531       Notwithstanding  the above, anomalous effects may still occur when CRLF
2532       is a valid newline sequence and explicit \r or \n escapes appear in the
2533       pattern.
2534

HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS

2536
2537       uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data);
2538
2539       PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data);
2540
2541       In  general, a pattern matches a certain portion of the subject, and in
2542       addition, further substrings from the subject  may  be  picked  out  by
2543       parenthesized  parts  of  the  pattern.  Following the usage in Jeffrey
2544       Friedl's book, this is called "capturing"  in  what  follows,  and  the
2545       phrase  "capture  group" (Perl terminology) is used for a fragment of a
2546       pattern that picks out a substring. PCRE2 supports several other  kinds
2547       of parenthesized group that do not cause substrings to be captured. The
2548       pcre2_pattern_info() function can be used to find out how many  capture
2549       groups there are in a compiled pattern.
2550
2551       You  can  use  auxiliary functions for accessing captured substrings by
2552       number or by name, as described in sections below.
2553
2554       Alternatively, you can make direct use of the vector of PCRE2_SIZE val‐
2555       ues,  called  the  ovector,  which  contains  the  offsets  of captured
2556       strings.  It  is  part  of  the  match  data   block.    The   function
2557       pcre2_get_ovector_pointer()  returns  the  address  of the ovector, and
2558       pcre2_get_ovector_count() returns the number of pairs of values it con‐
2559       tains.
2560
2561       Within the ovector, the first in each pair of values is set to the off‐
2562       set of the first code unit of a substring, and the second is set to the
2563       offset  of the first code unit after the end of a substring. These val‐
2564       ues are always code unit offsets, not character offsets. That is,  they
2565       are  byte  offsets  in  the 8-bit library, 16-bit offsets in the 16-bit
2566       library, and 32-bit offsets in the 32-bit library.
2567
2568       After a partial match  (error  return  PCRE2_ERROR_PARTIAL),  only  the
2569       first  pair  of  offsets  (that is, ovector[0] and ovector[1]) are set.
2570       They identify the part of the subject that was partially  matched.  See
2571       the pcre2partial documentation for details of partial matching.
2572
2573       After  a  fully  successful match, the first pair of offsets identifies
2574       the portion of the subject string that was matched by the  entire  pat‐
2575       tern.  The  next  pair is used for the first captured substring, and so
2576       on. The value returned by pcre2_match() is one more  than  the  highest
2577       numbered  pair  that  has been set. For example, if two substrings have
2578       been captured, the returned value is 3. If there are no  captured  sub‐
2579       strings, the return value from a successful match is 1, indicating that
2580       just the first pair of offsets has been set.
2581
2582       If a pattern uses the \K escape sequence within a  positive  assertion,
2583       the reported start of a successful match can be greater than the end of
2584       the match.  For example, if the pattern  (?=ab\K)  is  matched  against
2585       "ab", the start and end offset values for the match are 2 and 0.
2586
2587       If  a  capture group is matched repeatedly within a single match opera‐
2588       tion, it is the last portion of the subject that  it  matched  that  is
2589       returned.
2590
2591       If the ovector is too small to hold all the captured substring offsets,
2592       as much as possible is filled in, and the function returns a  value  of
2593       zero.  If captured substrings are not of interest, pcre2_match() may be
2594       called with a match data block whose ovector is of minimum length (that
2595       is, one pair).
2596
2597       It  is  possible for capture group number n+1 to match some part of the
2598       subject when group n has not been used at  all.  For  example,  if  the
2599       string "abc" is matched against the pattern (a|(z))(bc) the return from
2600       the function is 4, and groups 1 and 3 are matched, but 2 is  not.  When
2601       this  happens,  both values in the offset pairs corresponding to unused
2602       groups are set to PCRE2_UNSET.
2603
2604       Offset values that correspond to  unused  groups  at  the  end  of  the
2605       expression  are  also  set  to  PCRE2_UNSET. For example, if the string
2606       "abc" is matched against the pattern (abc)(x(yz)?)? groups 2 and 3  are
2607       not  matched.  The  return  from the function is 2, because the highest
2608       used capture group number is 1. The offsets  for  for  the  second  and
2609       third  capture groupss (assuming the vector is large enough, of course)
2610       are set to PCRE2_UNSET.
2611
2612       Elements in the ovector that do not correspond to capturing parentheses
2613       in the pattern are never changed. That is, if a pattern contains n cap‐
2614       turing parentheses, no more than ovector[0] to ovector[2n+1] are set by
2615       pcre2_match().  The  other  elements retain whatever values they previ‐
2616       ously had. After a failed match attempt, the contents  of  the  ovector
2617       are unchanged.
2618

OTHER INFORMATION ABOUT A MATCH

2620
2621       PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data);
2622
2623       PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *match_data);
2624
2625       As  well as the offsets in the ovector, other information about a match
2626       is retained in the match data block and can be retrieved by  the  above
2627       functions  in  appropriate  circumstances.  If they are called at other
2628       times, the result is undefined.
2629
2630       After a successful match, a partial match (PCRE2_ERROR_PARTIAL),  or  a
2631       failure  to  match (PCRE2_ERROR_NOMATCH), a mark name may be available.
2632       The function pcre2_get_mark() can be called to access this name,  which
2633       can  be  specified  in  the  pattern by any of the backtracking control
2634       verbs, not just (*MARK). The same function applies to all the verbs. It
2635       returns a pointer to the zero-terminated name, which is within the com‐
2636       piled pattern. If no name is available, NULL is returned. The length of
2637       the  name  (excluding  the terminating zero) is stored in the code unit
2638       that precedes the name. You should use this length instead  of  relying
2639       on the terminating zero if the name might contain a binary zero.
2640
2641       After  a  successful  match, the name that is returned is the last mark
2642       name encountered on the matching path through the pattern. Instances of
2643       backtracking  verbs  without  names do not count. Thus, for example, if
2644       the matching path contains (*MARK:A)(*PRUNE), the name "A" is returned.
2645       After  a  "no  match"  or a partial match, the last encountered name is
2646       returned. For example, consider this pattern:
2647
2648         ^(*MARK:A)((*MARK:B)a|b)c
2649
2650       When it matches "bc", the returned name is A. The B mark is  "seen"  in
2651       the  first  branch of the group, but it is not on the matching path. On
2652       the other hand, when this pattern fails to  match  "bx",  the  returned
2653       name is B.
2654
2655       Warning:  By  default, certain start-of-match optimizations are used to
2656       give a fast "no match" result in some situations. For example,  if  the
2657       anchoring  is removed from the pattern above, there is an initial check
2658       for the presence of "c" in the  subject  before  running  the  matching
2659       engine. This check fails for "bx", causing a match failure without see‐
2660       ing any marks. You can disable the start-of-match optimizations by set‐
2661       ting  the  PCRE2_NO_START_OPTIMIZE  option  for  pcre2_compile()  or by
2662       starting the pattern with (*NO_START_OPT).
2663
2664       After a successful match, a partial match, or one of  the  invalid  UTF
2665       errors  (for example, PCRE2_ERROR_UTF8_ERR5), pcre2_get_startchar() can
2666       be called. After a successful or partial match it returns the code unit
2667       offset  of  the character at which the match started. For a non-partial
2668       match, this can be different to the value of ovector[0] if the  pattern
2669       contains  the  \K escape sequence. After a partial match, however, this
2670       value is always the same as ovector[0] because \K does not  affect  the
2671       result of a partial match.
2672
2673       After  a UTF check failure, pcre2_get_startchar() can be used to obtain
2674       the code unit offset of the invalid UTF character. Details are given in
2675       the pcre2unicode page.
2676

ERROR RETURNS FROM pcre2_match()

2678
2679       If  pcre2_match() fails, it returns a negative number. This can be con‐
2680       verted to a text string by calling the pcre2_get_error_message()  func‐
2681       tion  (see  "Obtaining a textual error message" below).  Negative error
2682       codes are also returned by other functions,  and  are  documented  with
2683       them.  The codes are given names in the header file. If UTF checking is
2684       in force and an invalid UTF subject string is detected, one of a number
2685       of  UTF-specific negative error codes is returned. Details are given in
2686       the pcre2unicode page. The following are the other errors that  may  be
2687       returned by pcre2_match():
2688
2689         PCRE2_ERROR_NOMATCH
2690
2691       The subject string did not match the pattern.
2692
2693         PCRE2_ERROR_PARTIAL
2694
2695       The  subject  string did not match, but it did match partially. See the
2696       pcre2partial documentation for details of partial matching.
2697
2698         PCRE2_ERROR_BADMAGIC
2699
2700       PCRE2 stores a 4-byte "magic number" at the start of the compiled code,
2701       to  catch  the case when it is passed a junk pointer. This is the error
2702       that is returned when the magic number is not present.
2703
2704         PCRE2_ERROR_BADMODE
2705
2706       This error is given when a compiled pattern is passed to a function  in
2707       a  library  of a different code unit width, for example, a pattern com‐
2708       piled by the 8-bit library is passed to  a  16-bit  or  32-bit  library
2709       function.
2710
2711         PCRE2_ERROR_BADOFFSET
2712
2713       The value of startoffset was greater than the length of the subject.
2714
2715         PCRE2_ERROR_BADOPTION
2716
2717       An unrecognized bit was set in the options argument.
2718
2719         PCRE2_ERROR_BADUTFOFFSET
2720
2721       The UTF code unit sequence that was passed as a subject was checked and
2722       found to be valid (the PCRE2_NO_UTF_CHECK option was not set), but  the
2723       value  of startoffset did not point to the beginning of a UTF character
2724       or the end of the subject.
2725
2726         PCRE2_ERROR_CALLOUT
2727
2728       This error is never generated by pcre2_match() itself. It  is  provided
2729       for  use  by  callout  functions  that  want  to cause pcre2_match() or
2730       pcre2_callout_enumerate() to return a distinctive error code.  See  the
2731       pcre2callout documentation for details.
2732
2733         PCRE2_ERROR_DEPTHLIMIT
2734
2735       The nested backtracking depth limit was reached.
2736
2737         PCRE2_ERROR_HEAPLIMIT
2738
2739       The heap limit was reached.
2740
2741         PCRE2_ERROR_INTERNAL
2742
2743       An  unexpected  internal error has occurred. This error could be caused
2744       by a bug in PCRE2 or by overwriting of the compiled pattern.
2745
2746         PCRE2_ERROR_JIT_STACKLIMIT
2747
2748       This error is returned when a pattern  that  was  successfully  studied
2749       using  JIT  is being matched, but the memory available for the just-in-
2750       time processing stack is not large enough. See the pcre2jit  documenta‐
2751       tion for more details.
2752
2753         PCRE2_ERROR_MATCHLIMIT
2754
2755       The backtracking match limit was reached.
2756
2757         PCRE2_ERROR_NOMEMORY
2758
2759       If  a  pattern contains many nested backtracking points, heap memory is
2760       used to remember them. This error is given when the  memory  allocation
2761       function  (default  or  custom)  fails.  Note  that  a different error,
2762       PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed  exceeds
2763       the    heap   limit.   PCRE2_ERROR_NOMEMORY   is   also   returned   if
2764       PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
2765
2766         PCRE2_ERROR_NULL
2767
2768       Either the code, subject, or match_data argument was passed as NULL.
2769
2770         PCRE2_ERROR_RECURSELOOP
2771
2772       This error is returned when  pcre2_match()  detects  a  recursion  loop
2773       within  the  pattern. Specifically, it means that either the whole pat‐
2774       tern or a capture group has been called recursively for the second time
2775       at  the  same position in the subject string. Some simple patterns that
2776       might do this are detected and faulted at compile time, but  more  com‐
2777       plicated  cases,  in particular mutual recursions between two different
2778       groups, cannot be detected until matching is attempted.
2779

OBTAINING A TEXTUAL ERROR MESSAGE

2781
2782       int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer,
2783         PCRE2_SIZE bufflen);
2784
2785       A text message for an error code  from  any  PCRE2  function  (compile,
2786       match,  or  auxiliary)  can be obtained by calling pcre2_get_error_mes‐
2787       sage(). The code is passed as the first argument,  with  the  remaining
2788       two  arguments  specifying  a  code  unit buffer and its length in code
2789       units, into which the text message is placed. The message  is  returned
2790       in  code  units  of the appropriate width for the library that is being
2791       used.
2792
2793       The returned message is terminated with a trailing zero, and the  func‐
2794       tion  returns  the  number  of  code units used, excluding the trailing
2795       zero.  If  the  error  number  is  unknown,  the  negative  error  code
2796       PCRE2_ERROR_BADDATA  is  returned. If the buffer is too small, the mes‐
2797       sage is truncated (but still with a trailing zero),  and  the  negative
2798       error  code PCRE2_ERROR_NOMEMORY is returned.  None of the messages are
2799       very long; a buffer size of 120 code units is ample.
2800

EXTRACTING CAPTURED SUBSTRINGS BY NUMBER

2802
2803       int pcre2_substring_length_bynumber(pcre2_match_data *match_data,
2804         uint32_t number, PCRE2_SIZE *length);
2805
2806       int pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
2807         uint32_t number, PCRE2_UCHAR *buffer,
2808         PCRE2_SIZE *bufflen);
2809
2810       int pcre2_substring_get_bynumber(pcre2_match_data *match_data,
2811         uint32_t number, PCRE2_UCHAR **bufferptr,
2812         PCRE2_SIZE *bufflen);
2813
2814       void pcre2_substring_free(PCRE2_UCHAR *buffer);
2815
2816       Captured substrings can be accessed directly by using  the  ovector  as
2817       described above.  For convenience, auxiliary functions are provided for
2818       extracting  captured  substrings  as  new,  separate,   zero-terminated
2819       strings. A substring that contains a binary zero is correctly extracted
2820       and has a further zero added on the end, but  the  result  is  not,  of
2821       course, a C string.
2822
2823       The functions in this section identify substrings by number. The number
2824       zero refers to the entire matched substring, with higher numbers refer‐
2825       ring  to  substrings  captured by parenthesized groups. After a partial
2826       match, only substring zero is available.  An  attempt  to  extract  any
2827       other  substring  gives the error PCRE2_ERROR_PARTIAL. The next section
2828       describes similar functions for extracting captured substrings by name.
2829
2830       If a pattern uses the \K escape sequence within a  positive  assertion,
2831       the reported start of a successful match can be greater than the end of
2832       the match.  For example, if the pattern  (?=ab\K)  is  matched  against
2833       "ab",  the  start  and  end offset values for the match are 2 and 0. In
2834       this situation, calling these functions with a  zero  substring  number
2835       extracts a zero-length empty string.
2836
2837       You  can  find the length in code units of a captured substring without
2838       extracting it by calling pcre2_substring_length_bynumber().  The  first
2839       argument  is a pointer to the match data block, the second is the group
2840       number, and the third is a pointer to a variable into which the  length
2841       is  placed.  If  you just want to know whether or not the substring has
2842       been captured, you can pass the third argument as NULL.
2843
2844       The pcre2_substring_copy_bynumber() function  copies  a  captured  sub‐
2845       string  into  a supplied buffer, whereas pcre2_substring_get_bynumber()
2846       copies it into new memory, obtained using the  same  memory  allocation
2847       function  that  was  used for the match data block. The first two argu‐
2848       ments of these functions are a pointer to the match data  block  and  a
2849       capture group number.
2850
2851       The final arguments of pcre2_substring_copy_bynumber() are a pointer to
2852       the buffer and a pointer to a variable that contains its length in code
2853       units.  This is updated to contain the actual number of code units used
2854       for the extracted substring, excluding the terminating zero.
2855
2856       For pcre2_substring_get_bynumber() the third and fourth arguments point
2857       to  variables that are updated with a pointer to the new memory and the
2858       number of code units that comprise the substring, again  excluding  the
2859       terminating  zero.  When  the substring is no longer needed, the memory
2860       should be freed by calling pcre2_substring_free().
2861
2862       The return value from all these functions is zero  for  success,  or  a
2863       negative  error  code.  If  the pattern match failed, the match failure
2864       code is returned.  If a substring number  greater  than  zero  is  used
2865       after  a partial match, PCRE2_ERROR_PARTIAL is returned. Other possible
2866       error codes are:
2867
2868         PCRE2_ERROR_NOMEMORY
2869
2870       The buffer was too small for  pcre2_substring_copy_bynumber(),  or  the
2871       attempt to get memory failed for pcre2_substring_get_bynumber().
2872
2873         PCRE2_ERROR_NOSUBSTRING
2874
2875       There  is  no  substring  with that number in the pattern, that is, the
2876       number is greater than the number of capturing parentheses.
2877
2878         PCRE2_ERROR_UNAVAILABLE
2879
2880       The substring number, though not greater than the number of captures in
2881       the pattern, is greater than the number of slots in the ovector, so the
2882       substring could not be captured.
2883
2884         PCRE2_ERROR_UNSET
2885
2886       The substring did not participate in the match.  For  example,  if  the
2887       pattern  is  (abc)|(def) and the subject is "def", and the ovector con‐
2888       tains at least two capturing slots, substring number 1 is unset.
2889

EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS

2891
2892       int pcre2_substring_list_get(pcre2_match_data *match_data,
2893         PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr);
2894
2895       void pcre2_substring_list_free(PCRE2_SPTR *list);
2896
2897       The pcre2_substring_list_get() function  extracts  all  available  sub‐
2898       strings  and  builds  a  list of pointers to them. It also (optionally)
2899       builds a second list that  contains  their  lengths  (in  code  units),
2900       excluding a terminating zero that is added to each of them. All this is
2901       done in a single block of memory that is obtained using the same memory
2902       allocation function that was used to get the match data block.
2903
2904       This  function  must be called only after a successful match. If called
2905       after a partial match, the error code PCRE2_ERROR_PARTIAL is returned.
2906
2907       The address of the memory block is returned via listptr, which is  also
2908       the start of the list of string pointers. The end of the list is marked
2909       by a NULL pointer. The address of the list of lengths is  returned  via
2910       lengthsptr.  If your strings do not contain binary zeros and you do not
2911       therefore need the lengths, you may supply NULL as the lengthsptr argu‐
2912       ment  to  disable  the  creation of a list of lengths. The yield of the
2913       function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the  mem‐
2914       ory  block could not be obtained. When the list is no longer needed, it
2915       should be freed by calling pcre2_substring_list_free().
2916
2917       If this function encounters a substring that is unset, which can happen
2918       when  capture  group  number  n+1 matches some part of the subject, but
2919       group n has not been used at all, it returns an empty string. This  can
2920       be distinguished from a genuine zero-length substring by inspecting the
2921       appropriate offset in the ovector, which contain PCRE2_UNSET for  unset
2922       substrings, or by calling pcre2_substring_length_bynumber().
2923

EXTRACTING CAPTURED SUBSTRINGS BY NAME

2925
2926       int pcre2_substring_number_from_name(const pcre2_code *code,
2927         PCRE2_SPTR name);
2928
2929       int pcre2_substring_length_byname(pcre2_match_data *match_data,
2930         PCRE2_SPTR name, PCRE2_SIZE *length);
2931
2932       int pcre2_substring_copy_byname(pcre2_match_data *match_data,
2933         PCRE2_SPTR name, PCRE2_UCHAR *buffer, PCRE2_SIZE *bufflen);
2934
2935       int pcre2_substring_get_byname(pcre2_match_data *match_data,
2936         PCRE2_SPTR name, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen);
2937
2938       void pcre2_substring_free(PCRE2_UCHAR *buffer);
2939
2940       To  extract a substring by name, you first have to find associated num‐
2941       ber.  For example, for this pattern:
2942
2943         (a+)b(?<xxx>\d+)...
2944
2945       the number of the capture group called "xxx" is 2. If the name is known
2946       to be unique (PCRE2_DUPNAMES was not set), you can find the number from
2947       the name by calling pcre2_substring_number_from_name(). The first argu‐
2948       ment  is the compiled pattern, and the second is the name. The yield of
2949       the function is the group number, PCRE2_ERROR_NOSUBSTRING if  there  is
2950       no  group  with that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is
2951       more than one group with that name.  Given the number, you can  extract
2952       the  substring  directly from the ovector, or use one of the "bynumber"
2953       functions described above.
2954
2955       For convenience, there are also "byname" functions that  correspond  to
2956       the  "bynumber"  functions,  the  only difference being that the second
2957       argument is a name instead of a number. If PCRE2_DUPNAMES  is  set  and
2958       there are duplicate names, these functions scan all the groups with the
2959       given name, and return the captured  substring  from  the  first  named
2960       group that is set.
2961
2962       If  there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
2963       returned. If all groups with the name have  numbers  that  are  greater
2964       than  the  number  of  slots in the ovector, PCRE2_ERROR_UNAVAILABLE is
2965       returned. If there is at least one group with a slot  in  the  ovector,
2966       but no group is found to be set, PCRE2_ERROR_UNSET is returned.
2967
2968       Warning: If the pattern uses the (?| feature to set up multiple capture
2969       groups with the same number, as described in the section  on  duplicate
2970       group numbers in the pcre2pattern page, you cannot use names to distin‐
2971       guish the different capture groups, because names are not  included  in
2972       the  compiled  code.  The  matching process uses only numbers. For this
2973       reason, the use of different names for  groups  with  the  same  number
2974       causes an error at compile time.
2975

CREATING A NEW STRING WITH SUBSTITUTIONS

2977
2978       int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
2979         PCRE2_SIZE length, PCRE2_SIZE startoffset,
2980         uint32_t options, pcre2_match_data *match_data,
2981         pcre2_match_context *mcontext, PCRE2_SPTR replacement,
2982         PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer,
2983         PCRE2_SIZE *outlengthptr);
2984
2985       This  function calls pcre2_match() and then makes a copy of the subject
2986       string in outputbuffer, replacing one or more parts that  were  matched
2987       with the replacement string, whose length is supplied in rlength.  This
2988       can be given as PCRE2_ZERO_TERMINATED  for  a  zero-terminated  string.
2989       The  default is to perform just one replacement, but there is an option
2990       that requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL  below
2991       for details).
2992
2993       Matches  in  which  a  \K item in a lookahead in the pattern causes the
2994       match to end before it starts are not supported, and give  rise  to  an
2995       error return. For global replacements, matches in which \K in a lookbe‐
2996       hind causes the match to start earlier than the point that was  reached
2997       in the previous iteration are also not supported.
2998
2999       The  first  seven  arguments  of pcre2_substitute() are the same as for
3000       pcre2_match(), except that the partial matching options are not permit‐
3001       ted,  and  match_data may be passed as NULL, in which case a match data
3002       block is obtained and freed within this function, using memory  manage‐
3003       ment  functions from the match context, if provided, or else those that
3004       were used to allocate memory for the compiled code.
3005
3006       If an external match_data block is provided,  its  contents  afterwards
3007       are  those  set by the final call to pcre2_match(). For global changes,
3008       this will have ended in a matching error. The contents of  the  ovector
3009       within the match data block may or may not have been changed.
3010
3011       The  outlengthptr  argument  must point to a variable that contains the
3012       length, in code units, of the output buffer. If the  function  is  suc‐
3013       cessful,  the value is updated to contain the length of the new string,
3014       excluding the trailing zero that is automatically added.
3015
3016       If the function is not  successful,  the  value  set  via  outlengthptr
3017       depends  on  the  type  of  error. For syntax errors in the replacement
3018       string, the value is the offset in the  replacement  string  where  the
3019       error  was  detected.  For  other  errors,  the value is PCRE2_UNSET by
3020       default. This includes the case of the output buffer being  too  small,
3021       unless  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  is  set (see below), in which
3022       case the value is the minimum length needed, including  space  for  the
3023       trailing  zero.  Note  that  in  order  to compute the required length,
3024       pcre2_substitute() has  to  simulate  all  the  matching  and  copying,
3025       instead of giving an error return as soon as the buffer overflows. Note
3026       also that the length is in code units, not bytes.
3027
3028       In the replacement string, which is interpreted as a UTF string in  UTF
3029       mode,  and  is  checked  for UTF validity unless the PCRE2_NO_UTF_CHECK
3030       option is set, a dollar character is an escape character that can spec‐
3031       ify  the  insertion  of  characters  from  capture groups or names from
3032       (*MARK) or other control verbs in the pattern. The following forms  are
3033       always recognized:
3034
3035         $$                  insert a dollar character
3036         $<n> or ${<n>}      insert the contents of group <n>
3037         $*MARK or ${*MARK}  insert a control verb name
3038
3039       Either  a  group  number  or  a  group name can be given for <n>. Curly
3040       brackets are required only if the following character would  be  inter‐
3041       preted as part of the number or name. The number may be zero to include
3042       the entire matched string.   For  example,  if  the  pattern  a(b)c  is
3043       matched  with "=abc=" and the replacement string "+$1$0$1+", the result
3044       is "=+babcb+=".
3045
3046       $*MARK inserts the name from the last encountered backtracking  control
3047       verb  on the matching path that has a name. (*MARK) must always include
3048       a name, but the other verbs need not.  For  example,  in  the  case  of
3049       (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B)
3050       the relevant name is "B". This facility can be used to  perform  simple
3051       simultaneous substitutions, as this pcre2test example shows:
3052
3053         /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
3054             apple lemon
3055          2: pear orange
3056
3057       As  well as the usual options for pcre2_match(), a number of additional
3058       options can be set in the options argument of pcre2_substitute().
3059
3060       PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
3061       string,  replacing every matching substring. If this option is not set,
3062       only the first matching substring is replaced. The search  for  matches
3063       takes  place in the original subject string (that is, previous replace‐
3064       ments do not affect it).  Iteration is  implemented  by  advancing  the
3065       startoffset  value  for  each search, which is always passed the entire
3066       subject string. If an offset limit is set in the match context, search‐
3067       ing stops when that limit is reached.
3068
3069       You  can  restrict  the effect of a global substitution to a portion of
3070       the subject string by setting either or both of startoffset and an off‐
3071       set limit. Here is a pcre2test example:
3072
3073         /B/g,replace=!,use_offset_limit
3074         ABC ABC ABC ABC\=offset=3,offset_limit=12
3075          2: ABC A!C A!C ABC
3076
3077       When  continuing  with  global substitutions after matching a substring
3078       with zero length, an attempt to find a non-empty match at the same off‐
3079       set is performed.  If this is not successful, the offset is advanced by
3080       one character except when CRLF is a valid newline sequence and the next
3081       two  characters are CR, LF. In this case, the offset is advanced by two
3082       characters.
3083
3084       PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when  the  output
3085       buffer is too small. The default action is to return PCRE2_ERROR_NOMEM‐
3086       ORY immediately. If this option  is  set,  however,  pcre2_substitute()
3087       continues to go through the motions of matching and substituting (with‐
3088       out, of course, writing anything) in order to compute the size of  buf‐
3089       fer  that  is  needed.  This  value is passed back via the outlengthptr
3090       variable,   with   the   result   of   the   function    still    being
3091       PCRE2_ERROR_NOMEMORY.
3092
3093       Passing  a  buffer  size  of zero is a permitted way of finding out how
3094       much memory is needed for given substitution. However, this  does  mean
3095       that the entire operation is carried out twice. Depending on the appli‐
3096       cation, it may be more efficient to allocate a large  buffer  and  free
3097       the   excess   afterwards,   instead  of  using  PCRE2_SUBSTITUTE_OVER‐
3098       FLOW_LENGTH.
3099
3100       PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that
3101       do not appear in the pattern to be treated as unset groups. This option
3102       should be used with care, because it means that a typo in a group  name
3103       or number no longer causes the PCRE2_ERROR_NOSUBSTRING error.
3104
3105       PCRE2_SUBSTITUTE_UNSET_EMPTY  causes  unset  capture  groups (including
3106       unknown  groups  when  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  is  set)  to  be
3107       treated  as  empty  strings  when  inserted as described above. If this
3108       option is not set, an attempt to  insert  an  unset  group  causes  the
3109       PCRE2_ERROR_UNSET  error.  This  option does not influence the extended
3110       substitution syntax described below.
3111
3112       PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to  the
3113       replacement  string.  Without this option, only the dollar character is
3114       special, and only the group insertion forms  listed  above  are  valid.
3115       When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
3116
3117       Firstly,  backslash in a replacement string is interpreted as an escape
3118       character. The usual forms such as \n or \x{ddd} can be used to specify
3119       particular  character codes, and backslash followed by any non-alphanu‐
3120       meric character quotes that character. Extended quoting  can  be  coded
3121       using \Q...\E, exactly as in pattern strings.
3122
3123       There  are  also four escape sequences for forcing the case of inserted
3124       letters.  The insertion mechanism has three states:  no  case  forcing,
3125       force upper case, and force lower case. The escape sequences change the
3126       current state: \U and \L change to upper or lower case forcing, respec‐
3127       tively,  and  \E (when not terminating a \Q quoted sequence) reverts to
3128       no case forcing. The sequences \u and \l force the next  character  (if
3129       it  is  a  letter)  to  upper or lower case, respectively, and then the
3130       state automatically reverts to no case forcing. Case forcing applies to
3131       all  inserted  characters, including those from capture groups and let‐
3132       ters within \Q...\E quoted sequences.
3133
3134       Note that case forcing sequences such as \U...\E do not nest. For exam‐
3135       ple,  the  result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final
3136       \E  has  no   effect.   Note   also   that   the   PCRE2_ALT_BSUX   and
3137       PCRE2_EXTRA_ALT_BSUX options do not apply to replacement strings.
3138
3139       The  second  effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
3140       flexibility to capture group substitution. The  syntax  is  similar  to
3141       that used by Bash:
3142
3143         ${<n>:-<string>}
3144         ${<n>:+<string1>:<string2>}
3145
3146       As  before,  <n> may be a group number or a name. The first form speci‐
3147       fies a default value. If group <n> is set, its value  is  inserted;  if
3148       not,  <string>  is  expanded  and  the result inserted. The second form
3149       specifies strings that are expanded and inserted when group <n> is  set
3150       or  unset,  respectively. The first form is just a convenient shorthand
3151       for
3152
3153         ${<n>:+${<n>}:<string>}
3154
3155       Backslash can be used to escape colons and closing  curly  brackets  in
3156       the  replacement  strings.  A change of the case forcing state within a
3157       replacement string remains  in  force  afterwards,  as  shown  in  this
3158       pcre2test example:
3159
3160         /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo
3161             body
3162          1: hello
3163             somebody
3164          1: HELLO
3165
3166       The  PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
3167       substitutions.  However,  PCRE2_SUBSTITUTE_UNKNOWN_UNSET   does   cause
3168       unknown groups in the extended syntax forms to be treated as unset.
3169
3170       If  successful,  pcre2_substitute()  returns  the  number of successful
3171       matches. This may be zero if  no  matches  were  found,  and  is  never
3172       greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
3173
3174       In the event of an error, a negative error code is returned. Except for
3175       PCRE2_ERROR_NOMATCH   (which   is   never   returned),   errors    from
3176       pcre2_match() are passed straight back.
3177
3178       PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser‐
3179       tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set.
3180
3181       PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ‐
3182       ing  an  unknown  substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set)
3183       when  the  simple  (non-extended)  syntax  is  used  and  PCRE2_SUBSTI‐
3184       TUTE_UNSET_EMPTY is not set.
3185
3186       PCRE2_ERROR_NOMEMORY  is  returned  if  the  output  buffer  is not big
3187       enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size
3188       of  buffer  that is needed is returned via outlengthptr. Note that this
3189       does not happen by default.
3190
3191       PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax  errors  in
3192       the   replacement   string,   with   more   particular   errors   being
3193       PCRE2_ERROR_BADREPESCAPE (invalid  escape  sequence),  PCRE2_ERROR_REP‐
3194       MISSINGBRACE  (closing curly bracket not found), PCRE2_ERROR_BADSUBSTI‐
3195       TUTION   (syntax   error   in   extended   group   substitution),   and
3196       PCRE2_ERROR_BADSUBSPATTERN  (the  pattern match ended before it started
3197       or the match started earlier than the current position in the  subject,
3198       which can happen if \K is used in an assertion).
3199
3200       As for all PCRE2 errors, a text message that describes the error can be
3201       obtained  by  calling  the  pcre2_get_error_message()   function   (see
3202       "Obtaining a textual error message" above).
3203
3204   Substitution callouts
3205
3206       int pcre2_set_substitute_callout(pcre2_match_context *mcontext,
3207         int (*callout_function)(pcre2_substitute_callout_block *, void *),
3208         void *callout_data);
3209
3210       The  pcre2_set_substitution_callout() function can be used to specify a
3211       callout function for pcre2_substitute(). This information is passed  in
3212       a match context. The callout function is called after each substitution
3213       has been processed, but it can cause the replacement not to happen. The
3214       callout  function is not called for simulated substitutions that happen
3215       as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
3216
3217       The first argument of the callout function is a pointer to a substitute
3218       callout  block structure, which contains the following fields, not nec‐
3219       essarily in this order:
3220
3221         uint32_t    version;
3222         uint32_t    subscount;
3223         PCRE2_SPTR  input;
3224         PCRE2_SPTR  output;
3225         PCRE2_SIZE *ovector;
3226         uint32_t    oveccount;
3227         PCRE2_SIZE  output_offsets[2];
3228
3229       The version field contains the version number of the block format.  The
3230       current  version  is  0.  The version number will increase in future if
3231       more fields are added, but the intention is never to remove any of  the
3232       existing fields.
3233
3234       The subscount field is the number of the current match. It is 1 for the
3235       first callout, 2 for the second, and so on. The input and output point‐
3236       ers are copies of the values passed to pcre2_substitute().
3237
3238       The  ovector  field points to the ovector, which contains the result of
3239       the most recent match. The oveccount field contains the number of pairs
3240       that are set in the ovector, and is always greater than zero.
3241
3242       The  output_offsets  vector  contains the offsets of the replacement in
3243       the output string. This has already been processed for dollar  and  (if
3244       requested) backslash substitutions as described above.
3245
3246       The  second  argument  of  the  callout function is the value passed as
3247       callout_data when the function was registered. The  value  returned  by
3248       the callout function is interpreted as follows:
3249
3250       If  the  value is zero, the replacement is accepted, and, if PCRE2_SUB‐
3251       STITUTE_GLOBAL is set, processing continues with a search for the  next
3252       match.  If  the  value  is  not  zero,  the  current replacement is not
3253       accepted. If the value is greater than zero, processing continues  when
3254       PCRE2_SUBSTITUTE_GLOBAL  is set. Otherwise (the value is less than zero
3255       or PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of  the  input  is
3256       copied  to the output and the call to pcre2_substitute() exits, return‐
3257       ing the number of matches so far.
3258

DUPLICATE CAPTURE GROUP NAMES

3260
3261       int pcre2_substring_nametable_scan(const pcre2_code *code,
3262         PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
3263
3264       When a pattern is compiled with the PCRE2_DUPNAMES  option,  names  for
3265       capture  groups  are  not  required  to  be unique. Duplicate names are
3266       always allowed for groups with the same number, created  by  using  the
3267       (?| feature. Indeed, if such groups are named, they are required to use
3268       the same names.
3269
3270       Normally, patterns that use duplicate names are such that  in  any  one
3271       match,  only  one of each set of identically-named groups participates.
3272       An example is shown in the pcre2pattern documentation.
3273
3274       When  duplicates   are   present,   pcre2_substring_copy_byname()   and
3275       pcre2_substring_get_byname()  return  the first substring corresponding
3276       to  the  given  name  that  is  set.  Only   if   none   are   set   is
3277       PCRE2_ERROR_UNSET  is  returned. The pcre2_substring_number_from_name()
3278       function returns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are
3279       duplicate names.
3280
3281       If  you want to get full details of all captured substrings for a given
3282       name, you must use the pcre2_substring_nametable_scan()  function.  The
3283       first  argument is the compiled pattern, and the second is the name. If
3284       the third and fourth arguments are NULL, the function returns  a  group
3285       number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise.
3286
3287       When the third and fourth arguments are not NULL, they must be pointers
3288       to variables that are updated by the function. After it has  run,  they
3289       point to the first and last entries in the name-to-number table for the
3290       given name, and the function returns the length of each entry  in  code
3291       units.  In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are
3292       no entries for the given name.
3293
3294       The format of the name table is described above in the section entitled
3295       Information  about  a  pattern.  Given all the relevant entries for the
3296       name, you can extract each of their numbers,  and  hence  the  captured
3297       data.
3298

FINDING ALL POSSIBLE MATCHES AT ONE POSITION

3300
3301       The  traditional  matching  function  uses a similar algorithm to Perl,
3302       which stops when it finds the first match at a given point in the  sub‐
3303       ject. If you want to find all possible matches, or the longest possible
3304       match at a given position,  consider  using  the  alternative  matching
3305       function  (see  below) instead. If you cannot use the alternative func‐
3306       tion, you can kludge it up by making use of the callout facility, which
3307       is described in the pcre2callout documentation.
3308
3309       What you have to do is to insert a callout right at the end of the pat‐
3310       tern.  When your callout function is called, extract and save the  cur‐
3311       rent  matched  substring.  Then return 1, which forces pcre2_match() to
3312       backtrack and try other alternatives. Ultimately, when it runs  out  of
3313       matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
3314

MATCHING A PATTERN: THE ALTERNATIVE FUNCTION

3316
3317       int pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject,
3318         PCRE2_SIZE length, PCRE2_SIZE startoffset,
3319         uint32_t options, pcre2_match_data *match_data,
3320         pcre2_match_context *mcontext,
3321         int *workspace, PCRE2_SIZE wscount);
3322
3323       The  function  pcre2_dfa_match()  is  called  to match a subject string
3324       against a compiled pattern, using a matching algorithm that  scans  the
3325       subject string just once (not counting lookaround assertions), and does
3326       not backtrack.  This has different characteristics to the normal  algo‐
3327       rithm,  and  is not compatible with Perl. Some of the features of PCRE2
3328       patterns are not supported.  Nevertheless, there are  times  when  this
3329       kind  of  matching  can be useful. For a discussion of the two matching
3330       algorithms, and a list of features that pcre2_dfa_match() does not sup‐
3331       port, see the pcre2matching documentation.
3332
3333       The  arguments  for  the pcre2_dfa_match() function are the same as for
3334       pcre2_match(), plus two extras. The ovector within the match data block
3335       is used in a different way, and this is described below. The other com‐
3336       mon arguments are used in the same way as for pcre2_match(),  so  their
3337       description is not repeated here.
3338
3339       The  two  additional  arguments provide workspace for the function. The
3340       workspace vector should contain at least 20 elements. It  is  used  for
3341       keeping  track  of  multiple  paths  through  the  pattern  tree.  More
3342       workspace is needed for patterns and subjects where there are a lot  of
3343       potential matches.
3344
3345       Here is an example of a simple call to pcre2_dfa_match():
3346
3347         int wspace[20];
3348         pcre2_match_data *md = pcre2_match_data_create(4, NULL);
3349         int rc = pcre2_dfa_match(
3350           re,             /* result of pcre2_compile() */
3351           "some string",  /* the subject string */
3352           11,             /* the length of the subject string */
3353           0,              /* start at offset 0 in the subject */
3354           0,              /* default options */
3355           md,             /* the match data block */
3356           NULL,           /* a match context; NULL means use defaults */
3357           wspace,         /* working space vector */
3358           20);            /* number of elements (NOT size in bytes) */
3359
3360   Option bits for pcre_dfa_match()
3361
3362       The  unused  bits of the options argument for pcre2_dfa_match() must be
3363       zero.  The  only   bits   that   may   be   set   are   PCRE2_ANCHORED,
3364       PCRE2_COPY_MATCHED_SUBJECT,       PCRE2_ENDANCHORED,      PCRE2_NOTBOL,
3365       PCRE2_NOTEOL,          PCRE2_NOTEMPTY,          PCRE2_NOTEMPTY_ATSTART,
3366       PCRE2_NO_UTF_CHECK,       PCRE2_PARTIAL_HARD,       PCRE2_PARTIAL_SOFT,
3367       PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but  the  last  four  of
3368       these  are  exactly the same as for pcre2_match(), so their description
3369       is not repeated here.
3370
3371         PCRE2_PARTIAL_HARD
3372         PCRE2_PARTIAL_SOFT
3373
3374       These have the same general effect as they do  for  pcre2_match(),  but
3375       the  details are slightly different. When PCRE2_PARTIAL_HARD is set for
3376       pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if  the  end  of  the
3377       subject is reached and there is still at least one matching possibility
3378       that requires additional characters. This happens even if some complete
3379       matches  have  already  been found. When PCRE2_PARTIAL_SOFT is set, the
3380       return code PCRE2_ERROR_NOMATCH is converted  into  PCRE2_ERROR_PARTIAL
3381       if  the  end  of  the  subject  is reached, there have been no complete
3382       matches, but there is still at least one matching possibility. The por‐
3383       tion  of  the  string that was inspected when the longest partial match
3384       was found is set as the first matching string in both cases. There is a
3385       more  detailed  discussion  of partial and multi-segment matching, with
3386       examples, in the pcre2partial documentation.
3387
3388         PCRE2_DFA_SHORTEST
3389
3390       Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm  to
3391       stop as soon as it has found one match. Because of the way the alterna‐
3392       tive algorithm works, this is necessarily the shortest  possible  match
3393       at the first possible matching point in the subject string.
3394
3395         PCRE2_DFA_RESTART
3396
3397       When  pcre2_dfa_match() returns a partial match, it is possible to call
3398       it again, with additional subject characters, and have it continue with
3399       the same match. The PCRE2_DFA_RESTART option requests this action; when
3400       it is set, the workspace and wscount options must  reference  the  same
3401       vector  as  before  because data about the match so far is left in them
3402       after a partial match. There is more discussion of this facility in the
3403       pcre2partial documentation.
3404
3405   Successful returns from pcre2_dfa_match()
3406
3407       When pcre2_dfa_match() succeeds, it may have matched more than one sub‐
3408       string in the subject. Note, however, that all the matches from one run
3409       of  the  function  start  at the same point in the subject. The shorter
3410       matches are all initial substrings of the longer matches. For  example,
3411       if the pattern
3412
3413         <.*>
3414
3415       is matched against the string
3416
3417         This is <something> <something else> <something further> no more
3418
3419       the three matched strings are
3420
3421         <something> <something else> <something further>
3422         <something> <something else>
3423         <something>
3424
3425       On  success,  the  yield of the function is a number greater than zero,
3426       which is the number of matched substrings.  The  offsets  of  the  sub‐
3427       strings  are returned in the ovector, and can be extracted by number in
3428       the same way as for pcre2_match(), but the numbers bear no relation  to
3429       any  capture groups that may exist in the pattern, because DFA matching
3430       does not support capturing.
3431
3432       Calls to the convenience functions  that  extract  substrings  by  name
3433       return  the  error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used
3434       after a DFA match. The convenience functions that extract substrings by
3435       number never return PCRE2_ERROR_NOSUBSTRING.
3436
3437       The  matched  strings  are  stored  in  the ovector in reverse order of
3438       length; that is, the longest matching string is first.  If  there  were
3439       too  many matches to fit into the ovector, the yield of the function is
3440       zero, and the vector is filled with the longest matches.
3441
3442       NOTE: PCRE2's "auto-possessification" optimization usually  applies  to
3443       character  repeats at the end of a pattern (as well as internally). For
3444       example, the pattern "a\d+" is compiled as if it were "a\d++". For  DFA
3445       matching,  this  means  that  only  one possible match is found. If you
3446       really do want multiple matches in such cases, either use  an  ungreedy
3447       repeat  such  as  "a\d+?"  or set the PCRE2_NO_AUTO_POSSESS option when
3448       compiling.
3449
3450   Error returns from pcre2_dfa_match()
3451
3452       The pcre2_dfa_match() function returns a negative number when it fails.
3453       Many  of  the  errors  are  the same as for pcre2_match(), as described
3454       above.  There are in addition the following errors that are specific to
3455       pcre2_dfa_match():
3456
3457         PCRE2_ERROR_DFA_UITEM
3458
3459       This  return  is  given  if pcre2_dfa_match() encounters an item in the
3460       pattern that it does not support, for instance, the use of \C in a  UTF
3461       mode or a backreference.
3462
3463         PCRE2_ERROR_DFA_UCOND
3464
3465       This  return  is given if pcre2_dfa_match() encounters a condition item
3466       that uses a backreference for the condition, or a test for recursion in
3467       a specific capture group. These are not supported.
3468
3469         PCRE2_ERROR_DFA_UINVALID_UTF
3470
3471       This  return is given if pcre2_dfa_match() is called for a pattern that
3472       was compiled with PCRE2_MATCH_INVALID_UTF. This is  not  supported  for
3473       DFA matching.
3474
3475         PCRE2_ERROR_DFA_WSSIZE
3476
3477       This  return  is  given  if  pcre2_dfa_match() runs out of space in the
3478       workspace vector.
3479
3480         PCRE2_ERROR_DFA_RECURSE
3481
3482       When a recursion or subroutine call is processed, the matching function
3483       calls  itself  recursively,  using  private  memory for the ovector and
3484       workspace.  This error is given if the internal ovector  is  not  large
3485       enough.  This  should  be  extremely  rare, as a vector of size 1000 is
3486       used.
3487
3488         PCRE2_ERROR_DFA_BADRESTART
3489
3490       When pcre2_dfa_match() is called  with  the  PCRE2_DFA_RESTART  option,
3491       some  plausibility  checks  are  made on the contents of the workspace,
3492       which should contain data about the previous partial match. If  any  of
3493       these checks fail, this error is given.
3494

SEE ALSO

3496
3497       pcre2build(3),    pcre2callout(3),    pcre2demo(3),   pcre2matching(3),
3498       pcre2partial(3), pcre2posix(3), pcre2sample(3), pcre2unicode(3).
3499

AUTHOR

3501
3502       Philip Hazel
3503       University Computing Service
3504       Cambridge, England.
3505

REVISION

3507
3508       Last updated: 02 September 2019
3509       Copyright (c) 1997-2019 University of Cambridge.
3510
3511
3512
3513PCRE2 10.34                    02 September 2019                   PCRE2API(3)
Impressum