1Str(3)                           OCaml library                          Str(3)
2
3
4

NAME

6       Str - Regular expressions and high-level string processing
7

Module

9       Module   Str
10

Documentation

12       Module Str
13        : sig end
14
15
16       Regular expressions and high-level string processing
17
18
19
20
21
22
23
24   Regular expressions
25       type regexp
26
27
28       The type of compiled regular expressions.
29
30
31
32       val regexp : string -> regexp
33
34       Compile a regular expression. The following constructs are recognized:
35
36       - .  Matches any character except newline.
37
38       -  *  (postfix)  Matches  the preceding expression zero, one or several
39       times
40
41       - + (postfix) Matches the preceding expression one or several times
42
43       - ?  (postfix) Matches the preceding expression once or not at all
44
45       - [..]  Character set. Ranges are denoted with - , as in  [a-z]  .   An
46       initial ^ , as in [^0-9] , complements the set.  To include a ] charac‐
47       ter in a set, make it the first character of the set. To  include  a  -
48       character in a set, make it the first or the last character of the set.
49
50       -  ^  Matches  at  beginning  of  line:  either at the beginning of the
51       matched string, or just after a '\n' character.
52
53       - $ Matches at end of line: either at the end of the matched string, or
54       just before a '\n' character.
55
56       - \| (infix) Alternative between two expressions.
57
58       - \(..\) Grouping and naming of the enclosed expression.
59
60       - \1 The text matched by the first \(...\) expression ( \2 for the sec‐
61       ond expression, and so on up to \9 ).
62
63       - \b Matches word boundaries.
64
65       - \ Quotes special characters.  The special characters are $^\.*+?[] .
66
67       In regular expressions you will often use  backslash  characters;  it's
68       easier to use a quoted string literal {|...|} to avoid having to escape
69       backslashes.
70
71       For example, the following expression:
72       let r = Str.regexp {|hello \([A-Za-z]+\)|} in
73             Str.replace_first r {|\1|} "hello world"
74       returns the string "world" .
75
76       If you want a regular expression that matches a literal backslash char‐
77       acter, you need to double it: Str.regexp {|\\|} .
78
79       You  can  use regular string literals "..."  too, however you will have
80       to escape backslashes. The example above can be rewritten with a  regu‐
81       lar string literal as:
82       let r = Str.regexp "hello \\([A-Za-z]+\\)" in
83             Str.replace_first r "\\1" "hello world"
84
85       And the regular expression for matching a backslash becomes a quadruple
86       backslash: Str.regexp "\\\\" .
87
88
89
90       val regexp_case_fold : string -> regexp
91
92       Same as regexp , but the compiled  expression  will  match  text  in  a
93       case-insensitive  way:  uppercase and lowercase letters will be consid‐
94       ered equivalent.
95
96
97
98       val quote : string -> string
99
100
101       Str.quote s returns a regexp string that matches exactly s and  nothing
102       else.
103
104
105
106       val regexp_string : string -> regexp
107
108
109       Str.regexp_string s returns a regular expression that matches exactly s
110       and nothing else.
111
112
113
114       val regexp_string_case_fold : string -> regexp
115
116
117       Str.regexp_string_case_fold is similar to Str.regexp_string ,  but  the
118       regexp matches in a case-insensitive way.
119
120
121
122
123   String matching and searching
124       val string_match : regexp -> string -> int -> bool
125
126
127       string_match  r  s  start tests whether a substring of s that starts at
128       position start matches the regular expression r .  The first  character
129       of a string has position 0 , as usual.
130
131
132
133       val search_forward : regexp -> string -> int -> int
134
135
136       search_forward r s start searches the string s for a substring matching
137       the regular expression r . The search starts at position start and pro‐
138       ceeds  towards the end of the string.  Return the position of the first
139       character of the matched substring.
140
141
142       Raises Not_found if no substring matches.
143
144
145
146       val search_backward : regexp -> string -> int -> int
147
148
149       search_backward r s last searches the string s for a substring matching
150       the  regular  expression r . The search first considers substrings that
151       start at position last and proceeds towards the  beginning  of  string.
152       Return the position of the first character of the matched substring.
153
154
155       Raises Not_found if no substring matches.
156
157
158
159       val string_partial_match : regexp -> string -> int -> bool
160
161       Similar  to  Str.string_match  ,  but also returns true if the argument
162       string is a prefix of a string that matches.  This includes the case of
163       a true complete match.
164
165
166
167       val matched_string : string -> string
168
169
170       matched_string  s  returns  the  substring of s that was matched by the
171       last call to one of the following matching or searching functions:
172
173       - Str.string_match
174
175
176       - Str.search_forward
177
178
179       - Str.search_backward
180
181
182       - Str.string_partial_match
183
184
185       - Str.global_substitute
186
187
188       - Str.substitute_first
189
190       provided that none of the following functions was called in between:
191
192       - Str.global_replace
193
194
195       - Str.replace_first
196
197
198       - Str.split
199
200
201       - Str.bounded_split
202
203
204       - Str.split_delim
205
206
207       - Str.bounded_split_delim
208
209
210       - Str.full_split
211
212
213       - Str.bounded_full_split
214
215       Note: in the case of global_substitute and substitute_first , a call to
216       matched_string  is  only  valid  within  the  subst argument, not after
217       global_substitute or substitute_first returns.
218
219       The user must make sure that the parameter s is the  same  string  that
220       was passed to the matching or searching function.
221
222
223
224       val match_beginning : unit -> int
225
226
227       match_beginning()  returns  the  position of the first character of the
228       substring that was matched by the last call to a matching or  searching
229       function (see Str.matched_string for details).
230
231
232
233       val match_end : unit -> int
234
235
236       match_end()  returns  the  position of the character following the last
237       character of the substring that was matched  by  the  last  call  to  a
238       matching or searching function (see Str.matched_string for details).
239
240
241
242       val matched_group : int -> string -> string
243
244
245       matched_group  n s returns the substring of s that was matched by the n
246       th group \(...\) of the regular expression that was matched by the last
247       call  to  a  matching or searching function (see Str.matched_string for
248       details). When n is 0 , it returns the substring matched by  the  whole
249       regular  expression.   The  user must make sure that the parameter s is
250       the same string that was passed to the matching or searching function.
251
252
253       Raises Not_found if the n th group of the regular  expression  was  not
254       matched.   This can happen with groups inside alternatives \| , options
255       ?  or repetitions * .  For instance, the empty string will match \(a\)*
256       ,  but  matched_group 1 "" will raise Not_found because the first group
257       itself was not matched.
258
259
260
261       val group_beginning : int -> int
262
263
264       group_beginning n returns the position of the first  character  of  the
265       substring  that was matched by the n th group of the regular expression
266       that was matched by the last call to a matching or  searching  function
267       (see Str.matched_string for details).
268
269
270       Raises  Not_found  if  the n th group of the regular expression was not
271       matched.
272
273
274       Raises Invalid_argument if there are fewer than n groups in the regular
275       expression.
276
277
278
279       val group_end : int -> int
280
281
282       group_end  n  returns  the position of the character following the last
283       character of substring that was matched by the n th group of the  regu‐
284       lar  expression  that  was  matched  by  the last call to a matching or
285       searching function (see Str.matched_string for details).
286
287
288       Raises Not_found if the n th group of the regular  expression  was  not
289       matched.
290
291
292       Raises Invalid_argument if there are fewer than n groups in the regular
293       expression.
294
295
296
297
298   Replacement
299       val global_replace : regexp -> string -> string -> string
300
301
302       global_replace regexp templ s returns a string identical to s ,  except
303       that  all substrings of s that match regexp have been replaced by templ
304       . The replacement template templ can contain \1 , \2 , etc;  these  se‐
305       quences will be replaced by the text matched by the corresponding group
306       in the regular expression.  \0 stands for the text matched by the whole
307       regular expression.
308
309
310
311       val replace_first : regexp -> string -> string -> string
312
313       Same  as  Str.global_replace  ,  except  that  only the first substring
314       matching the regular expression is replaced.
315
316
317
318       val global_substitute : regexp ->  (string  ->  string)  ->  string  ->
319       string
320
321
322       global_substitute  regexp subst s returns a string identical to s , ex‐
323       cept that all substrings of s that match regexp have been  replaced  by
324       the  result  of  function subst . The function subst is called once for
325       each matching substring, and receives s (the whole text) as argument.
326
327
328
329       val substitute_first : regexp -> (string -> string) -> string -> string
330
331       Same as Str.global_substitute , except that only  the  first  substring
332       matching the regular expression is replaced.
333
334
335
336       val replace_matched : string -> string -> string
337
338
339       replace_matched  repl s returns the replacement text repl in which \1 ,
340       \2 , etc. have been replaced by the text matched by  the  corresponding
341       groups in the regular expression that was matched by the last call to a
342       matching or searching function (see Str.matched_string for details).  s
343       must  be  the  same string that was passed to the matching or searching
344       function.
345
346
347
348
349   Splitting
350       val split : regexp -> string -> string list
351
352
353       split r s splits s into substrings, taking as delimiters the substrings
354       that match r , and returns the list of substrings.  For instance, split
355       (regexp "[ \t]+") s splits s into blank-separated words.  An occurrence
356       of  the  delimiter  at the beginning or at the end of the string is ig‐
357       nored.
358
359
360
361       val bounded_split : regexp -> string -> int -> string list
362
363       Same as Str.split , but splits into at most n substrings,  where  n  is
364       the extra integer parameter.
365
366
367
368       val split_delim : regexp -> string -> string list
369
370       Same as Str.split but occurrences of the delimiter at the beginning and
371       at the end of the string are recognized and returned as  empty  strings
372       in  the result.  For instance, split_delim (regexp " ") " abc " returns
373       [""; "abc"; ""] , while split with the same arguments returns ["abc"] .
374
375
376
377       val bounded_split_delim : regexp -> string -> int -> string list
378
379       Same as Str.bounded_split , but occurrences of the delimiter at the be‐
380       ginning  and  at  the  end of the string are recognized and returned as
381       empty strings in the result.
382
383
384       type split_result =
385        | Text of string
386        | Delim of string
387
388
389
390
391
392       val full_split : regexp -> string -> split_result list
393
394       Same as Str.split_delim , but returns the delimiters  as  well  as  the
395       substrings  contained  between delimiters.  The former are tagged Delim
396       in the result list;  the  latter  are  tagged  Text  .   For  instance,
397       full_split  (regexp "[{}]") "{ab}" returns [Delim "{"; Text "ab"; Delim
398       "}"] .
399
400
401
402       val bounded_full_split : regexp -> string -> int -> split_result list
403
404       Same as Str.bounded_split_delim , but returns the delimiters as well as
405       the substrings contained between delimiters.  The former are tagged De‐
406       lim in the result list; the latter are tagged Text .
407
408
409
410
411   Extracting substrings
412       val string_before : string -> int -> string
413
414
415       string_before s n returns the substring of all  characters  of  s  that
416       precede position n (excluding the character at position n ).
417
418
419
420       val string_after : string -> int -> string
421
422
423       string_after s n returns the substring of all characters of s that fol‐
424       low position n (including the character at position n ).
425
426
427
428       val first_chars : string -> int -> string
429
430
431       first_chars s n returns the first n characters of s .  This is the same
432       function as Str.string_before .
433
434
435
436       val last_chars : string -> int -> string
437
438
439       last_chars s n returns the last n characters of s .
440
441
442
443
444
445OCamldoc                          2022-07-22                            Str(3)
Impressum