1Str(3)                           OCaml library                          Str(3)
2
3
4

NAME

6       Str - Regular expressions and high-level string processing
7

Module

9       Module   Str
10

Documentation

12       Module Str
13        : sig end
14
15
16       Regular expressions and high-level string processing
17
18
19
20
21
22
23
24   Regular expressions
25       type regexp
26
27
28       The type of compiled regular expressions.
29
30
31
32       val regexp : string -> regexp
33
34       Compile a regular expression. The following constructs are recognized:
35
36       - .  Matches any character except newline.
37
38       -  *  (postfix)  Matches  the preceding expression zero, one or several
39       times
40
41       - + (postfix) Matches the preceding expression one or several times
42
43       - ?  (postfix) Matches the preceding expression once or not at all
44
45       - [..]  Character set. Ranges are denoted with - , as in  [a-z]  .   An
46       initial ^ , as in [^0-9] , complements the set.  To include a ] charac‐
47       ter in a set, make it the first character of the set. To  include  a  -
48       character in a set, make it the first or the last character of the set.
49
50       -  ^  Matches  at  beginning  of  line:  either at the beginning of the
51       matched string, or just after a '\n' character.
52
53       - $ Matches at end of line: either at the end of the matched string, or
54       just before a '\n' character.
55
56       - \| (infix) Alternative between two expressions.
57
58       - \(..\) Grouping and naming of the enclosed expression.
59
60       - \1 The text matched by the first \(...\) expression ( \2 for the sec‐
61       ond expression, and so on up to \9 ).
62
63       - \b Matches word boundaries.
64
65       - \ Quotes special characters.  The special characters are $^\.*+?[] .
66
67       Note: the argument to regexp is usually a string literal. In this case,
68       any  backslash  character  in the regular expression must be doubled to
69       make it past the OCaml string parser. For example,  the  following  ex‐
70       pression:
71       let r = Str.regexp "hello \\([A-Za-z]+\\)" in
72             Str.replace_first r "\\1" "hello world"
73       returns the string "world" .
74
75       In  particular,  if you want a regular expression that matches a single
76       backslash character, you need to quote it in  the  argument  to  regexp
77       (according to the last item of the list above) by adding a second back‐
78       slash. Then you need to quote both backslashes (according to the syntax
79       of  string  constants  in OCaml) by doubling them again, so you need to
80       write four backslash characters: Str.regexp "\\\\" .
81
82
83
84       val regexp_case_fold : string -> regexp
85
86       Same as regexp , but the compiled  expression  will  match  text  in  a
87       case-insensitive  way:  uppercase and lowercase letters will be consid‐
88       ered equivalent.
89
90
91
92       val quote : string -> string
93
94
95       Str.quote s returns a regexp string that matches exactly s and  nothing
96       else.
97
98
99
100       val regexp_string : string -> regexp
101
102
103       Str.regexp_string s returns a regular expression that matches exactly s
104       and nothing else.
105
106
107
108       val regexp_string_case_fold : string -> regexp
109
110
111       Str.regexp_string_case_fold is similar to Str.regexp_string ,  but  the
112       regexp matches in a case-insensitive way.
113
114
115
116
117   String matching and searching
118       val string_match : regexp -> string -> int -> bool
119
120
121       string_match  r  s  start tests whether a substring of s that starts at
122       position start matches the regular expression r .  The first  character
123       of a string has position 0 , as usual.
124
125
126
127       val search_forward : regexp -> string -> int -> int
128
129
130       search_forward r s start searches the string s for a substring matching
131       the regular expression r . The search starts at position start and pro‐
132       ceeds  towards the end of the string.  Return the position of the first
133       character of the matched substring.
134
135
136       Raises Not_found if no substring matches.
137
138
139
140       val search_backward : regexp -> string -> int -> int
141
142
143       search_backward r s last searches the string s for a substring matching
144       the  regular  expression r . The search first considers substrings that
145       start at position last and proceeds towards the  beginning  of  string.
146       Return the position of the first character of the matched substring.
147
148
149       Raises Not_found if no substring matches.
150
151
152
153       val string_partial_match : regexp -> string -> int -> bool
154
155       Similar  to  Str.string_match  ,  but also returns true if the argument
156       string is a prefix of a string that matches.  This includes the case of
157       a true complete match.
158
159
160
161       val matched_string : string -> string
162
163
164       matched_string  s  returns  the  substring of s that was matched by the
165       last call to one of the following matching or searching functions:
166
167       - Str.string_match
168
169
170       - Str.search_forward
171
172
173       - Str.search_backward
174
175
176       - Str.string_partial_match
177
178
179       - Str.global_substitute
180
181
182       - Str.substitute_first
183
184       provided that none of the following functions was called in between:
185
186       - Str.global_replace
187
188
189       - Str.replace_first
190
191
192       - Str.split
193
194
195       - Str.bounded_split
196
197
198       - Str.split_delim
199
200
201       - Str.bounded_split_delim
202
203
204       - Str.full_split
205
206
207       - Str.bounded_full_split
208
209       Note: in the case of global_substitute and substitute_first , a call to
210       matched_string  is  only  valid  within  the  subst argument, not after
211       global_substitute or substitute_first returns.
212
213       The user must make sure that the parameter s is the  same  string  that
214       was passed to the matching or searching function.
215
216
217
218       val match_beginning : unit -> int
219
220
221       match_beginning()  returns  the  position of the first character of the
222       substring that was matched by the last call to a matching or  searching
223       function (see Str.matched_string for details).
224
225
226
227       val match_end : unit -> int
228
229
230       match_end()  returns  the  position of the character following the last
231       character of the substring that was matched  by  the  last  call  to  a
232       matching or searching function (see Str.matched_string for details).
233
234
235
236       val matched_group : int -> string -> string
237
238
239       matched_group  n s returns the substring of s that was matched by the n
240       th group \(...\) of the regular expression that was matched by the last
241       call  to  a  matching or searching function (see Str.matched_string for
242       details). When n is 0 , it returns the substring matched by  the  whole
243       regular  expression.   The  user must make sure that the parameter s is
244       the same string that was passed to the matching or searching function.
245
246
247       Raises Not_found if the n th group of the regular  expression  was  not
248       matched.   This can happen with groups inside alternatives \| , options
249       ?  or repetitions * .  For instance, the empty string will match \(a\)*
250       ,  but  matched_group 1 "" will raise Not_found because the first group
251       itself was not matched.
252
253
254
255       val group_beginning : int -> int
256
257
258       group_beginning n returns the position of the first  character  of  the
259       substring  that was matched by the n th group of the regular expression
260       that was matched by the last call to a matching or  searching  function
261       (see Str.matched_string for details).
262
263
264       Raises  Not_found  if  the n th group of the regular expression was not
265       matched.
266
267
268       Raises Invalid_argument if there are fewer than n groups in the regular
269       expression.
270
271
272
273       val group_end : int -> int
274
275
276       group_end  n  returns  the position of the character following the last
277       character of substring that was matched by the n th group of the  regu‐
278       lar  expression  that  was  matched  by  the last call to a matching or
279       searching function (see Str.matched_string for details).
280
281
282       Raises Not_found if the n th group of the regular  expression  was  not
283       matched.
284
285
286       Raises Invalid_argument if there are fewer than n groups in the regular
287       expression.
288
289
290
291
292   Replacement
293       val global_replace : regexp -> string -> string -> string
294
295
296       global_replace regexp templ s returns a string identical to s ,  except
297       that  all substrings of s that match regexp have been replaced by templ
298       . The replacement template templ can contain \1 , \2 , etc;  these  se‐
299       quences will be replaced by the text matched by the corresponding group
300       in the regular expression.  \0 stands for the text matched by the whole
301       regular expression.
302
303
304
305       val replace_first : regexp -> string -> string -> string
306
307       Same  as  Str.global_replace  ,  except  that  only the first substring
308       matching the regular expression is replaced.
309
310
311
312       val global_substitute : regexp ->  (string  ->  string)  ->  string  ->
313       string
314
315
316       global_substitute  regexp subst s returns a string identical to s , ex‐
317       cept that all substrings of s that match regexp have been  replaced  by
318       the  result  of  function subst . The function subst is called once for
319       each matching substring, and receives s (the whole text) as argument.
320
321
322
323       val substitute_first : regexp -> (string -> string) -> string -> string
324
325       Same as Str.global_substitute , except that only  the  first  substring
326       matching the regular expression is replaced.
327
328
329
330       val replace_matched : string -> string -> string
331
332
333       replace_matched  repl s returns the replacement text repl in which \1 ,
334       \2 , etc. have been replaced by the text matched by  the  corresponding
335       groups in the regular expression that was matched by the last call to a
336       matching or searching function (see Str.matched_string for details).  s
337       must  be  the  same string that was passed to the matching or searching
338       function.
339
340
341
342
343   Splitting
344       val split : regexp -> string -> string list
345
346
347       split r s splits s into substrings, taking as delimiters the substrings
348       that match r , and returns the list of substrings.  For instance, split
349       (regexp "[ \t]+") s splits s into blank-separated words.  An occurrence
350       of  the  delimiter  at the beginning or at the end of the string is ig‐
351       nored.
352
353
354
355       val bounded_split : regexp -> string -> int -> string list
356
357       Same as Str.split , but splits into at most n substrings,  where  n  is
358       the extra integer parameter.
359
360
361
362       val split_delim : regexp -> string -> string list
363
364       Same as Str.split but occurrences of the delimiter at the beginning and
365       at the end of the string are recognized and returned as  empty  strings
366       in  the result.  For instance, split_delim (regexp " ") " abc " returns
367       [""; "abc"; ""] , while split with the same arguments returns ["abc"] .
368
369
370
371       val bounded_split_delim : regexp -> string -> int -> string list
372
373       Same as Str.bounded_split , but occurrences of the delimiter at the be‐
374       ginning  and  at  the  end of the string are recognized and returned as
375       empty strings in the result.
376
377
378       type split_result =
379        | Text of string
380        | Delim of string
381
382
383
384
385
386       val full_split : regexp -> string -> split_result list
387
388       Same as Str.split_delim , but returns the delimiters  as  well  as  the
389       substrings  contained  between delimiters.  The former are tagged Delim
390       in the result list;  the  latter  are  tagged  Text  .   For  instance,
391       full_split  (regexp "[{}]") "{ab}" returns [Delim "{"; Text "ab"; Delim
392       "}"] .
393
394
395
396       val bounded_full_split : regexp -> string -> int -> split_result list
397
398       Same as Str.bounded_split_delim , but returns the delimiters as well as
399       the substrings contained between delimiters.  The former are tagged De‐
400       lim in the result list; the latter are tagged Text .
401
402
403
404
405   Extracting substrings
406       val string_before : string -> int -> string
407
408
409       string_before s n returns the substring of all  characters  of  s  that
410       precede position n (excluding the character at position n ).
411
412
413
414       val string_after : string -> int -> string
415
416
417       string_after s n returns the substring of all characters of s that fol‐
418       low position n (including the character at position n ).
419
420
421
422       val first_chars : string -> int -> string
423
424
425       first_chars s n returns the first n characters of s .  This is the same
426       function as Str.string_before .
427
428
429
430       val last_chars : string -> int -> string
431
432
433       last_chars s n returns the last n characters of s .
434
435
436
437
438
439OCamldoc                          2022-02-04                            Str(3)
Impressum