1Genlex(3)                        OCaml library                       Genlex(3)
2
3
4

NAME

6       Genlex - A generic lexical analyzer.
7

Module

9       Module   Genlex
10

Documentation

12       Module Genlex
13        : sig end
14
15
16       A generic lexical analyzer.
17
18       This  module implements a simple 'standard' lexical analyzer, presented
19       as a function from character streams to token  streams.  It  implements
20       roughly  the  lexical conventions of OCaml, but is parameterized by the
21       set of keywords of your language.
22
23       Example: a lexer suitable for a desk calculator is obtained by
24       let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"]
25
26       The associated parser would be a function from  token  stream  to,  for
27       instance, int , and would have rules such as:
28
29
30                  let rec parse_expr = parser
31                    | [< n1 = parse_atom; n2 = parse_remainder n1 >] -> n2
32                  and parse_atom = parser
33                    | [< 'Int n >] -> n
34                    | [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n
35                  and parse_remainder n1 = parser
36                    | [< 'Kwd "+"; n2 = parse_expr >] -> n1+n2
37                    | [< >] -> n1
38
39
40       One  should  notice  that  the use of the parser keyword and associated
41       notation for streams are only available through camlp4 extensions. This
42       means  that  one has to preprocess its sources e. g. by using the "-pp"
43       command-line switch of the compilers.
44
45
46
47
48
49       type token =
50        | Kwd of string
51        | Ident of string
52        | Int of int
53        | Float of float
54        | String of string
55        | Char of char
56
57
58       The type of tokens. The lexical classes are: Int and Float for  integer
59       and  floating-point  numbers;  String  for string literals, enclosed in
60       double quotes; Char for character literals, enclosed in single  quotes;
61       Ident for identifiers (either sequences of letters, digits, underscores
62       and quotes, or sequences of 'operator characters' such as + , * , etc);
63       and Kwd for keywords (either identifiers or single 'special characters'
64       such as ( , } , etc).
65
66
67
68       val make_lexer : string list -> char Stream.t -> token Stream.t
69
70       Construct the lexer function. The first argument is the  list  of  key‐
71       words.  An identifier s is returned as Kwd s if s belongs to this list,
72       and as Ident s otherwise.  A special character s is returned as  Kwd  s
73       if  s  belongs  to  this  list,  and  cause  a lexical error (exception
74       Stream.Error with the offending lexeme  as  its  parameter)  otherwise.
75       Blanks  and  newlines  are skipped. Comments delimited by (* and *) are
76       skipped as well, and can  be  nested.  A  Stream.Failure  exception  is
77       raised if end of stream is unexpectedly reached.
78
79
80
81
82
83OCamldoc                          2020-02-27                         Genlex(3)
Impressum