1leex(3)                    Erlang Module Definition                    leex(3)
2
3
4

NAME

6       leex - Lexical analyzer generator for Erlang
7

DESCRIPTION

9       A regular expression based lexical analyzer generator for Erlang, simi‐
10       lar to lex or flex.
11
12   Note:
13       The Leex module should be considered experimental as it will be subject
14       to changes in future releases.
15
16

DATA TYPES

18       error_info() =
19           {erl_anno:line() | none, module(), ErrorDescriptor :: term()}
20
21              The  standard  error_info()  structure that is returned from all
22              I/O modules. ErrorDescriptor is formattable by format_error/1.
23

EXPORTS

25       file(FileName) -> leex_ret()
26
27       file(FileName, Options) -> leex_ret()
28
29              Types:
30
31                 FileName = file:filename()
32                 Options = Option | [Option]
33                 Option =
34                     {dfa_graph, boolean()} |
35                     {includefile, Includefile :: file:filename()} |
36                     {report_errors, boolean()} |
37                     {report_warnings, boolean()} |
38                     {report, boolean()} |
39                     {return_errors, boolean()} |
40                     {return_warnings, boolean()} |
41                     {return, boolean()} |
42                     {scannerfile, Scannerfile :: file:filename()} |
43                     {verbose, boolean()} |
44                     {warnings_as_errors, boolean()} |
45                     dfa_graph | report_errors | report_warnings | report |
46                     return_errors | return_warnings | return | verbose |
47                     warnings_as_errors
48                 leex_ret() = ok_ret() | error_ret()
49                 ok_ret() =
50                     {ok, Scannerfile :: file:filename()} |
51                     {ok, Scannerfile :: file:filename(), warnings()}
52                 error_ret() =
53                     error | {error, Errors :: errors(), Warnings :: warnings()}
54                 errors() = [{file:filename(), [error_info()]}]
55                 warnings() = [{file:filename(), [error_info()]}]
56
57              Generates a lexical analyzer from the definition  in  the  input
58              file.  The  input  file has the extension .xrl. This is added to
59              the filename if it is not given. The resulting module is the Xrl
60              filename without the .xrl extension.
61
62              The current options are:
63
64                dfa_graph:
65                  Generates  a  .dot  file which contains a description of the
66                  DFA  in  a  format  which  can  be  viewed  with   Graphviz,
67                  www.graphviz.com.
68
69                {includefile,Includefile}:
70                  Uses  a  specific or customised prologue file instead of de‐
71                  fault lib/parsetools/include/leexinc.hrl which is  otherwise
72                  included.
73
74                {report_errors, boolean()}:
75                  Causes errors to be printed as they occur. Default is true.
76
77                {report_warnings, boolean()}:
78                  Causes  warnings  to  be  printed  as they occur. Default is
79                  true.
80
81                {report, boolean()}:
82                  This is a short form for both report_errors and report_warn‐
83                  ings.
84
85                {return_errors, boolean()}:
86                  If  this  flag is set, {error, Errors, Warnings} is returned
87                  when there are errors. Default is false.
88
89                {return_warnings, boolean()}:
90                  If this flag is set, an extra field containing  Warnings  is
91                  added to the tuple returned upon success. Default is false.
92
93                {return, boolean()}:
94                  This is a short form for both return_errors and return_warn‐
95                  ings.
96
97                {scannerfile, Scannerfile}:
98                  Scannerfile is the name of the file that  will  contain  the
99                  Erlang  scanner  code that is generated. The default ("") is
100                  to add the extension .erl to FileName stripped of  the  .xrl
101                  extension.
102
103                {verbose, boolean()}:
104                  Outputs information from parsing the input file and generat‐
105                  ing the internal tables.
106
107                {warnings_as_errors, boolean()}:
108                  Causes warnings to be treated as errors.
109
110              Any of the Boolean options can be set to  true  by  stating  the
111              name  of the option. For example, verbose is equivalent to {ver‐
112              bose, true}.
113
114              Leex will add the extension .hrl to the Includefile name and the
115              extension  .erl to the Scannerfile name, unless the extension is
116              already there.
117
118       format_error(ErrorDescriptor) -> io_lib:chars()
119
120              Types:
121
122                 ErrorDescriptor = term()
123
124              Returns a descriptive string in English of an error  reason  Er‐
125              rorDescriptor  returned  by leex:file/1,2 when there is an error
126              in a regular expression.
127

GENERATED SCANNER EXPORTS

129       The following functions are exported by the generated scanner.
130

EXPORTS

132       Module:string(String) -> StringRet
133       Module:string(String, StartLine) -> StringRet
134
135              Types:
136
137                 String = string()
138                 StringRet = {ok,Tokens,EndLine} | ErrorInfo
139                 Tokens = [Token]
140                 EndLine = StartLine = erl_anno:line()
141
142              Scans String and returns all the tokens in it, or an error.
143
144          Note:
145              It is an error if not all of the characters in String  are  con‐
146              sumed.
147
148
149       Module:token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars}
150       Module:token(Cont,  Chars,  StartLine)  ->  {more,Cont1} | {done,Token‐
151       Ret,RestChars}
152
153              Types:
154
155                 Cont = [] | Cont1
156                 Cont1 = tuple()
157                 Chars = RestChars = string() | eof
158                 TokenRet = {ok, Token, EndLine} | {eof, EndLine} | ErrorInfo
159                 StartLine = EndLine = erl_anno:line()
160
161              This is a re-entrant call to try and scan one token from  Chars.
162              If  there  are enough characters in Chars to either scan a token
163              or detect an error then this will be returned  with  {done,...}.
164              Otherwise {cont,Cont} will be returned where Cont is used in the
165              next call to token() with more characters to try an scan the to‐
166              ken.  This  is continued until a token has been scanned. Cont is
167              initially [].
168
169              It is not designed to be called directly by an  application  but
170              used  through the i/o system where it can typically be called in
171              an application by:
172
173              io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]})
174                -> TokenRet
175
176       Module:tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars}
177       Module:tokens(Cont, Chars, StartLine) ->  {more,Cont1}  |  {done,Token‐
178       sRet,RestChars}
179
180              Types:
181
182                 Cont = [] | Cont1
183                 Cont1 = tuple()
184                 Chars = RestChars = string() | eof
185                 TokensRet  =  {ok, Tokens, EndLine} | {eof, EndLine} | Error‐
186                 Info
187                 Tokens = [Token]
188                 StartLine = EndLine = erl_anno:line()
189
190              This is a re-entrant call to try and scan tokens from Chars.  If
191              there  are  enough  characters in Chars to either scan tokens or
192              detect an error then this will be returned with {done,...}. Oth‐
193              erwise  {cont,Cont}  will  be returned where Cont is used in the
194              next call to tokens() with more characters to try  an  scan  the
195              tokens.  This  is  continued until all tokens have been scanned.
196              Cont is initially [].
197
198              This functions differs from token in that it  will  continue  to
199              scan  tokens  upto  and  including an {end_token,Token} has been
200              scanned (see next section). It will then return all the  tokens.
201              This  is  typically used for scanning grammars like Erlang where
202              there is an explicit end token, '.'. If no end  token  is  found
203              then  the  whole  file will be scanned and returned. If an error
204              occurs then all tokens upto and including  the  next  end  token
205              will be skipped.
206
207              It  is  not designed to be called directly by an application but
208              used through the i/o system where it can typically be called  in
209              an application by:
210
211              io:request(InFile, {get_until,unicode,Prompt,Module,tokens,[Line]})
212                -> TokensRet
213

DEFAULT LEEX OPTIONS

215       The  (host  operating system) environment variable ERL_COMPILER_OPTIONS
216       can be used to give default Leex options. Its value must be a valid Er‐
217       lang  term.  If  the  value is a list, it is used as is. If it is not a
218       list, it is put into a list.
219
220       The list is appended to any options given to file/2.
221
222       The list can be retrieved with  compile:env_compiler_options/0.
223

INPUT FILE FORMAT

225       Erlang style comments starting with a % are allowed in scanner files. A
226       definition file has the following format:
227
228       <Header>
229
230       Definitions.
231
232       <Macro Definitions>
233
234       Rules.
235
236       <Token Rules>
237
238       Erlang code.
239
240       <Erlang code>
241
242       The  "Definitions.", "Rules." and "Erlang code." headings are mandatory
243       and must occur at the beginning of a source line. The <Header>,  <Macro
244       Definitions>  and <Erlang code> sections may be empty but there must be
245       at least one rule.
246
247       Macro definitions have the following format:
248
249       NAME = VALUE
250
251       and there must be spaces around =. Macros can be used  in  the  regular
252       expressions of rules by writing {NAME}.
253
254   Note:
255       When macros are expanded in expressions the macro calls are replaced by
256       the macro value without any form of quoting or enclosing  in  parenthe‐
257       ses.
258
259
260       Rules have the following format:
261
262       <Regexp> : <Erlang code>.
263
264       The  <Regexp>  must  occur  at  the start of a line and not include any
265       blanks; use \t and \s to include TAB and SPACE characters in the  regu‐
266       lar  expression.  If  <Regexp>  matches  then the corresponding <Erlang
267       code> is evaluated to generate a token. With the Erlang code  the  fol‐
268       lowing predefined variables are available:
269
270         TokenChars:
271           A list of the characters in the matched token.
272
273         TokenLen:
274           The number of characters in the matched token.
275
276         TokenLine:
277           The line number where the token occurred.
278
279       The code must return:
280
281         {token,Token}:
282           Return Token to the caller.
283
284         {end_token,Token}:
285           Return Token and is last token in a tokens call.
286
287         skip_token:
288           Skip this token completely.
289
290         {error,ErrString}:
291           An error in the token, ErrString is a string describing the error.
292
293       It  is  also possible to push back characters into the input characters
294       with the following returns:
295
296         * {token,Token,PushBackList}
297
298         * {end_token,Token,PushBackList}
299
300         * {skip_token,PushBackList}
301
302       These have the same meanings as the normal returns but  the  characters
303       in  PushBackList  will be prepended to the input characters and scanned
304       for the next token. Note that pushing back a newline will mean the line
305       numbering will no longer be correct.
306
307   Note:
308       Pushing back characters gives you unexpected possibilities to cause the
309       scanner to loop!
310
311
312       The following example would match a simple Erlang integer or float  and
313       return a token which could be sent to the Erlang parser:
314
315       D = [0-9]
316
317       {D}+ :
318         {token,{integer,TokenLine,list_to_integer(TokenChars)}}.
319
320       {D}+\.{D}+((E|e)(\+|\-)?{D}+)? :
321         {token,{float,TokenLine,list_to_float(TokenChars)}}.
322
323       The  Erlang code in the "Erlang code." section is written into the out‐
324       put file directly after the module declaration and  predefined  exports
325       declaration  so it is possible to add extra exports, define imports and
326       other attributes which are then visible in the whole file.
327

REGULAR EXPRESSIONS

329       The regular expressions allowed here is a subset of the  set  found  in
330       egrep  and in the AWK programming language, as defined in the book, The
331       AWK Programming Language, by A. V. Aho, B. W. Kernighan,  P.  J.  Wein‐
332       berger. They are composed of the following characters:
333
334         c:
335           Matches the non-metacharacter c.
336
337         \c:
338           Matches the escape sequence or literal character c.
339
340         .:
341           Matches any character.
342
343         ^:
344           Matches the beginning of a string.
345
346         $:
347           Matches the end of a string.
348
349         [abc...]:
350           Character  class, which matches any of the characters abc.... Char‐
351           acter ranges are specified by a pair of characters separated  by  a
352           -.
353
354         [^abc...]:
355           Negated character class, which matches any character except abc....
356
357         r1 | r2:
358           Alternation. It matches either r1 or r2.
359
360         r1r2:
361           Concatenation. It matches r1 and then r2.
362
363         r+:
364           Matches one or more rs.
365
366         r*:
367           Matches zero or more rs.
368
369         r?:
370           Matches zero or one rs.
371
372         (r):
373           Grouping. It matches r.
374
375       The escape sequences allowed are the same as for Erlang strings:
376
377         \b:
378           Backspace.
379
380         \f:
381           Form feed.
382
383         \n:
384           Newline (line feed).
385
386         \r:
387           Carriage return.
388
389         \t:
390           Tab.
391
392         \e:
393           Escape.
394
395         \v:
396           Vertical tab.
397
398         \s:
399           Space.
400
401         \d:
402           Delete.
403
404         \ddd:
405           The octal value ddd.
406
407         \xhh:
408           The hexadecimal value hh.
409
410         \x{h...}:
411           The hexadecimal value h....
412
413         \c:
414           Any other character literally, for example \\ for backslash, \" for
415           ".
416
417       The following examples define simplified versions of a few Erlang  data
418       types:
419
420       Atoms [a-z][0-9a-zA-Z_]*
421
422       Variables [A-Z_][0-9a-zA-Z_]*
423
424       Floats (\+|-)?[0-9]+\.[0-9]+((E|e)(\+|-)?[0-9]+)?
425
426   Note:
427       Anchoring  a  regular expression with ^ and $ is not implemented in the
428       current version of Leex and just generates a parse error.
429
430
431
432Ericsson AB                    parsetools 2.3.2                        leex(3)
Impressum