1leex(3)                    Erlang Module Definition                    leex(3)
2
3
4

NAME

6       leex - Lexical analyzer generator for Erlang
7

DESCRIPTION

9       A regular expression based lexical analyzer generator for Erlang, simi‐
10       lar to lex or flex.
11
12   Note:
13       The Leex module should be considered experimental as it will be subject
14       to changes in future releases.
15
16

DATA TYPES

18       error_info() =
19           {erl_anno:line() | none, module(), ErrorDescriptor :: term()}
20
21              The  standard  error_info()  structure that is returned from all
22              I/O modules. ErrorDescriptor is formattable by format_error/1.
23

EXPORTS

25       file(FileName) -> leex_ret()
26
27       file(FileName, Options) -> leex_ret()
28
29              Types:
30
31                 FileName = file:filename()
32                 Options = Option | [Option]
33                 Option =
34                     {dfa_graph, boolean()} |
35                     {includefile, Includefile :: file:filename()} |
36                     {report_errors, boolean()} |
37                     {report_warnings, boolean()} |
38                     {report, boolean()} |
39                     {return_errors, boolean()} |
40                     {return_warnings, boolean()} |
41                     {return, boolean()} |
42                     {scannerfile, Scannerfile :: file:filename()} |
43                     {verbose, boolean()} |
44                     {warnings_as_errors, boolean()} |
45                     {deterministic, boolean()} |
46                     dfa_graph | report_errors | report_warnings | report |
47                     return_errors | return_warnings | return | verbose |
48                     warnings_as_errors
49                 leex_ret() = ok_ret() | error_ret()
50                 ok_ret() =
51                     {ok, Scannerfile :: file:filename()} |
52                     {ok, Scannerfile :: file:filename(), warnings()}
53                 error_ret() =
54                     error | {error, Errors :: errors(), Warnings :: warnings()}
55                 errors() = [{file:filename(), [error_info()]}]
56                 warnings() = [{file:filename(), [error_info()]}]
57
58              Generates a lexical analyzer from the definition  in  the  input
59              file.  The  input  file has the extension .xrl. This is added to
60              the filename if it is not given. The resulting module is the Xrl
61              filename without the .xrl extension.
62
63              The current options are:
64
65                dfa_graph:
66                  Generates  a  .dot  file which contains a description of the
67                  DFA  in  a  format  which  can  be  viewed  with   Graphviz,
68                  www.graphviz.com.
69
70                {includefile,Includefile}:
71                  Uses  a  specific or customised prologue file instead of de‐
72                  fault lib/parsetools/include/leexinc.hrl which is  otherwise
73                  included.
74
75                {report_errors, boolean()}:
76                  Causes errors to be printed as they occur. Default is true.
77
78                {report_warnings, boolean()}:
79                  Causes  warnings  to  be  printed  as they occur. Default is
80                  true.
81
82                {report, boolean()}:
83                  This is a short form for both report_errors and report_warn‐
84                  ings.
85
86                {return_errors, boolean()}:
87                  If  this  flag is set, {error, Errors, Warnings} is returned
88                  when there are errors. Default is false.
89
90                {return_warnings, boolean()}:
91                  If this flag is set, an extra field containing  Warnings  is
92                  added to the tuple returned upon success. Default is false.
93
94                {return, boolean()}:
95                  This is a short form for both return_errors and return_warn‐
96                  ings.
97
98                {scannerfile, Scannerfile}:
99                  Scannerfile is the name of the file that  will  contain  the
100                  Erlang  scanner  code that is generated. The default ("") is
101                  to add the extension .erl to FileName stripped of  the  .xrl
102                  extension.
103
104                {verbose, boolean()}:
105                  Outputs information from parsing the input file and generat‐
106                  ing the internal tables.
107
108                {warnings_as_errors, boolean()}:
109                  Causes warnings to be treated as errors.
110
111                {deterministic, boolean()}:
112                  Causes generated -file()  attributes  to  only  include  the
113                  basename of the file path.
114
115              Any  of  the  Boolean  options can be set to true by stating the
116              name of the option. For example, verbose is equivalent to  {ver‐
117              bose, true}.
118
119              Leex will add the extension .hrl to the Includefile name and the
120              extension .erl to the Scannerfile name, unless the extension  is
121              already there.
122
123       format_error(ErrorDescriptor) -> io_lib:chars()
124
125              Types:
126
127                 ErrorDescriptor = term()
128
129              Returns  a  descriptive string in English of an error reason Er‐
130              rorDescriptor returned by leex:file/1,2 when there is  an  error
131              in a regular expression.
132

GENERATED SCANNER EXPORTS

134       The following functions are exported by the generated scanner.
135

EXPORTS

137       Module:string(String) -> StringRet
138       Module:string(String, StartLine) -> StringRet
139
140              Types:
141
142                 String = string()
143                 StringRet = {ok,Tokens,EndLine} | ErrorInfo
144                 Tokens = [Token]
145                 EndLine = StartLine = erl_anno:line()
146
147              Scans String and returns all the tokens in it, or an error.
148
149          Note:
150              It  is  an error if not all of the characters in String are con‐
151              sumed.
152
153
154       Module:token(Cont, Chars) -> {more,Cont1} | {done,TokenRet,RestChars}
155       Module:token(Cont, Chars, StartLine)  ->  {more,Cont1}  |  {done,Token‐
156       Ret,RestChars}
157
158              Types:
159
160                 Cont = [] | Cont1
161                 Cont1 = tuple()
162                 Chars = RestChars = string() | eof
163                 TokenRet = {ok, Token, EndLine} | {eof, EndLine} | ErrorInfo
164                 StartLine = EndLine = erl_anno:line()
165
166              This  is a re-entrant call to try and scan one token from Chars.
167              If there are enough characters in Chars to either scan  a  token
168              or  detect  an error then this will be returned with {done,...}.
169              Otherwise {cont,Cont} will be returned where Cont is used in the
170              next call to token() with more characters to try an scan the to‐
171              ken. This is continued until a token has been scanned.  Cont  is
172              initially [].
173
174              It  is  not designed to be called directly by an application but
175              used through the i/o system where it can typically be called  in
176              an application by:
177
178              io:request(InFile, {get_until,unicode,Prompt,Module,token,[Line]})
179                -> TokenRet
180
181       Module:tokens(Cont, Chars) -> {more,Cont1} | {done,TokensRet,RestChars}
182       Module:tokens(Cont,  Chars,  StartLine)  -> {more,Cont1} | {done,Token‐
183       sRet,RestChars}
184
185              Types:
186
187                 Cont = [] | Cont1
188                 Cont1 = tuple()
189                 Chars = RestChars = string() | eof
190                 TokensRet = {ok, Tokens, EndLine} | {eof, EndLine}  |  Error‐
191                 Info
192                 Tokens = [Token]
193                 StartLine = EndLine = erl_anno:line()
194
195              This  is a re-entrant call to try and scan tokens from Chars. If
196              there are enough characters in Chars to either  scan  tokens  or
197              detect an error then this will be returned with {done,...}. Oth‐
198              erwise {cont,Cont} will be returned where Cont is  used  in  the
199              next  call  to  tokens() with more characters to try an scan the
200              tokens. This is continued until all tokens  have  been  scanned.
201              Cont is initially [].
202
203              This  functions  differs  from token in that it will continue to
204              scan tokens up to and including an  {end_token,Token}  has  been
205              scanned  (see next section). It will then return all the tokens.
206              This is typically used for scanning grammars like  Erlang  where
207              there  is  an  explicit end token, '.'. If no end token is found
208              then the whole file will be scanned and returned.  If  an  error
209              occurs  then  all  tokens up to and including the next end token
210              will be skipped.
211
212              It is not designed to be called directly by an  application  but
213              used  through the i/o system where it can typically be called in
214              an application by:
215
216              io:request(InFile, {get_until,unicode,Prompt,Module,tokens,[Line]})
217                -> TokensRet
218

DEFAULT LEEX OPTIONS

220       The (host operating system) environment  variable  ERL_COMPILER_OPTIONS
221       can be used to give default Leex options. Its value must be a valid Er‐
222       lang term. If the value is a list, it is used as is. If  it  is  not  a
223       list, it is put into a list.
224
225       The list is appended to any options given to file/2.
226
227       The list can be retrieved with  compile:env_compiler_options/0.
228

INPUT FILE FORMAT

230       Erlang style comments starting with a % are allowed in scanner files. A
231       definition file has the following format:
232
233       <Header>
234
235       Definitions.
236
237       <Macro Definitions>
238
239       Rules.
240
241       <Token Rules>
242
243       Erlang code.
244
245       <Erlang code>
246
247       The "Definitions.", "Rules." and "Erlang code." headings are  mandatory
248       and  must occur at the beginning of a source line. The <Header>, <Macro
249       Definitions> and <Erlang code> sections may be empty but there must  be
250       at least one rule.
251
252       Macro definitions have the following format:
253
254       NAME = VALUE
255
256       and  there  must  be spaces around =. Macros can be used in the regular
257       expressions of rules by writing {NAME}.
258
259   Note:
260       When macros are expanded in expressions the macro calls are replaced by
261       the  macro  value without any form of quoting or enclosing in parenthe‐
262       ses.
263
264
265       Rules have the following format:
266
267       <Regexp> : <Erlang code>.
268
269       The <Regexp> must occur at the start of a  line  and  not  include  any
270       blanks;  use \t and \s to include TAB and SPACE characters in the regu‐
271       lar expression. If <Regexp>  matches  then  the  corresponding  <Erlang
272       code>  is  evaluated to generate a token. With the Erlang code the fol‐
273       lowing predefined variables are available:
274
275         TokenChars:
276           A list of the characters in the matched token.
277
278         TokenLen:
279           The number of characters in the matched token.
280
281         TokenLine:
282           The line number where the token occurred.
283
284       The code must return:
285
286         {token,Token}:
287           Return Token to the caller.
288
289         {end_token,Token}:
290           Return Token and is last token in a tokens call.
291
292         skip_token:
293           Skip this token completely.
294
295         {error,ErrString}:
296           An error in the token, ErrString is a string describing the error.
297
298       It is also possible to push back characters into the  input  characters
299       with the following returns:
300
301         * {token,Token,PushBackList}
302
303         * {end_token,Token,PushBackList}
304
305         * {skip_token,PushBackList}
306
307       These  have  the same meanings as the normal returns but the characters
308       in PushBackList will be prepended to the input characters  and  scanned
309       for the next token. Note that pushing back a newline will mean the line
310       numbering will no longer be correct.
311
312   Note:
313       Pushing back characters gives you unexpected possibilities to cause the
314       scanner to loop!
315
316
317       The  following example would match a simple Erlang integer or float and
318       return a token which could be sent to the Erlang parser:
319
320       D = [0-9]
321
322       {D}+ :
323         {token,{integer,TokenLine,list_to_integer(TokenChars)}}.
324
325       {D}+\.{D}+((E|e)(\+|\-)?{D}+)? :
326         {token,{float,TokenLine,list_to_float(TokenChars)}}.
327
328       The Erlang code in the "Erlang code." section is written into the  out‐
329       put  file  directly after the module declaration and predefined exports
330       declaration so it is possible to add extra exports, define imports  and
331       other attributes which are then visible in the whole file.
332

REGULAR EXPRESSIONS

334       The  regular  expressions  allowed here is a subset of the set found in
335       egrep and in the AWK programming language, as defined in the book,  The
336       AWK  Programming  Language,  by A. V. Aho, B. W. Kernighan, P. J. Wein‐
337       berger. They are composed of the following characters:
338
339         c:
340           Matches the non-metacharacter c.
341
342         \c:
343           Matches the escape sequence or literal character c.
344
345         .:
346           Matches any character.
347
348         ^:
349           Matches the beginning of a string.
350
351         $:
352           Matches the end of a string.
353
354         [abc...]:
355           Character class, which matches any of the characters abc....  Char‐
356           acter  ranges  are specified by a pair of characters separated by a
357           -.
358
359         [^abc...]:
360           Negated character class, which matches any character except abc....
361
362         r1 | r2:
363           Alternation. It matches either r1 or r2.
364
365         r1r2:
366           Concatenation. It matches r1 and then r2.
367
368         r+:
369           Matches one or more rs.
370
371         r*:
372           Matches zero or more rs.
373
374         r?:
375           Matches zero or one rs.
376
377         (r):
378           Grouping. It matches r.
379
380       The escape sequences allowed are the same as for Erlang strings:
381
382         \b:
383           Backspace.
384
385         \f:
386           Form feed.
387
388         \n:
389           Newline (line feed).
390
391         \r:
392           Carriage return.
393
394         \t:
395           Tab.
396
397         \e:
398           Escape.
399
400         \v:
401           Vertical tab.
402
403         \s:
404           Space.
405
406         \d:
407           Delete.
408
409         \ddd:
410           The octal value ddd.
411
412         \xhh:
413           The hexadecimal value hh.
414
415         \x{h...}:
416           The hexadecimal value h....
417
418         \c:
419           Any other character literally, for example \\ for backslash, \" for
420           ".
421
422       The  following examples define simplified versions of a few Erlang data
423       types:
424
425       Atoms [a-z][0-9a-zA-Z_]*
426
427       Variables [A-Z_][0-9a-zA-Z_]*
428
429       Floats (\+|-)?[0-9]+\.[0-9]+((E|e)(\+|-)?[0-9]+)?
430
431   Note:
432       Anchoring a regular expression with ^ and $ is not implemented  in  the
433       current version of Leex and just generates a parse error.
434
435
436
437Ericsson AB                    parsetools 2.4.1                        leex(3)
Impressum