1Uchar(3)                         OCaml library                        Uchar(3)
2
3
4

NAME

6       Uchar - Unicode characters.
7

Module

9       Module   Uchar
10

Documentation

12       Module Uchar
13        : sig end
14
15
16       Unicode characters.
17
18
19       Since 4.03
20
21
22
23
24
25       type t
26
27
28       The type for Unicode characters.
29
30       A  value of this type represents a Unicode scalar value which is an in‐
31       teger in the ranges 0x0000 ...  0xD7FF or 0xE000 ...  0x10FFFF .
32
33
34
35       val min : t
36
37
38       min is U+0000.
39
40
41
42       val max : t
43
44
45       max is U+10FFFF.
46
47
48
49       val bom : t
50
51
52       bom is U+FEFF, the byte order mark (BOM) character.
53
54
55       Since 4.06.0
56
57
58
59       val rep : t
60
61
62       rep is U+FFFD, the replacement character.
63
64
65       Since 4.06.0
66
67
68
69       val succ : t -> t
70
71
72       succ u is the scalar value after u in the set of Unicode scalar values.
73
74
75       Raises Invalid_argument if u is Uchar.max .
76
77
78
79       val pred : t -> t
80
81
82       pred u is the scalar value before u in the set of Unicode  scalar  val‐
83       ues.
84
85
86       Raises Invalid_argument if u is Uchar.min .
87
88
89
90       val is_valid : int -> bool
91
92
93       is_valid  n is true if and only if n is a Unicode scalar value (i.e. in
94       the ranges 0x0000 ...  0xD7FF or 0xE000 ...  0x10FFFF ).
95
96
97
98       val of_int : int -> t
99
100
101       of_int i is i as a Unicode character.
102
103
104       Raises Invalid_argument if i does not satisfy Uchar.is_valid .
105
106
107
108       val to_int : t -> int
109
110
111       to_int u is u as an integer.
112
113
114
115       val is_char : t -> bool
116
117
118       is_char u is true if and only if u is a latin1 OCaml character.
119
120
121
122       val of_char : char -> t
123
124
125       of_char c is c as a Unicode character.
126
127
128
129       val to_char : t -> char
130
131
132       to_char u is u as an OCaml latin1 character.
133
134
135       Raises Invalid_argument if u does not satisfy Uchar.is_char .
136
137
138
139       val equal : t -> t -> bool
140
141
142       equal u u' is u = u' .
143
144
145
146       val compare : t -> t -> int
147
148
149       compare u u' is Stdlib.compare u u' .
150
151
152
153       val hash : t -> int
154
155
156       hash u associates a non-negative integer to u .
157
158
159
160
161   UTF codecs tools
162       type utf_decode
163
164
165       The type for UTF decode results. Values of this type represent the  re‐
166       sult of a Unicode Transformation Format decoding attempt.
167
168
169
170       val utf_decode_is_valid : utf_decode -> bool
171
172
173       utf_decode_is_valid d is true if and only if d holds a valid decode.
174
175
176
177       val utf_decode_uchar : utf_decode -> t
178
179
180       utf_decode_uchar  d  is  the  Unicode character decoded by d if utf_de‐
181       code_is_valid d is true and Uchar.rep otherwise.
182
183
184
185       val utf_decode_length : utf_decode -> int
186
187
188       utf_decode_length d is the number of elements from the source that were
189       consumed by the decode d . This is always strictly positive and smaller
190       or equal to 4 . The kind of source elements depends on the  actual  de‐
191       coder;  for  the  decoders of the standard library this function always
192       returns a length in bytes.
193
194
195
196       val utf_decode : int -> t -> utf_decode
197
198
199       utf_decode n u is a valid UTF decode for u  that  consumed  n  elements
200       from  the source for decoding.  n must be positive and smaller or equal
201       to 4 (this is not checked by the module).
202
203
204
205       val utf_decode_invalid : int -> utf_decode
206
207
208       utf_decode_invalid n is an invalid UTF decode that consumed n  elements
209       from the source to error.  n must be positive and smaller or equal to 4
210       (this is not checked by the module). The resulting decode has Uchar.rep
211       as the decoded Unicode character.
212
213
214
215       val utf_8_byte_length : t -> int
216
217
218       utf_8_byte_length u is the number of bytes needed to encode u in UTF-8.
219
220
221
222       val utf_16_byte_length : t -> int
223
224
225       utf_16_byte_length  u  is  the  number  of  bytes needed to encode u in
226       UTF-16.
227
228
229
230
231
232OCamldoc                          2023-01-23                          Uchar(3)
Impressum