1uconv_u16tou32(9F) Kernel Functions for Drivers uconv_u16tou32(9F)
2
3
4
6 uconv_u16tou32, uconv_u16tou8, uconv_u32tou16, uconv_u32tou8,
7 uconv_u8tou16, uconv_u8tou32 - Unicode encoding conversion functions
8
10 #include <sys/types.h>
11 #include <sys/errno.h>
12 #include <sys/sunddi.h>
13
14 int uconv_u16tou32(const uint16_t *utf16str, size_t *utf16len,
15 uint32_t *utf32str, size_t *utf32len, int flag);
16
17
18 int uconv_u16tou8(const uint16_t *utf16str, size_t *utf16len,
19 uchar_t *utf8str, size_t *utf8len, int flag);
20
21
22 int uconv_u32tou16(const uint32_t *utf32str, size_t *utf32len,
23 uint16_t *utf16str, size_t *utf16len, int flag);
24
25
26 int uconv_u32tou8(const uint32_t *utf32str, size_t *utf32len,
27 uchar_t *utf8str, size_t *utf8len, int flag);
28
29
30 int uconv_u8tou16(const uchar_t *utf8str, size_t *utf8len,
31 uint16_t *utf16str, size_t *utf16len, int flag);
32
33
34 int uconv_u8tou32(const uchar_t *utf8str, size_t *utf8len,
35 uint32_t *utf32str, size_t *utf32len, int flag);
36
37
39 Solaris DDI specific (Solaris DDI).
40
42 utf16str A pointer to a UTF-16 character string.
43
44
45 utf16len As an input parameter, the number of 16-bit unsigned inte‐
46 gers in utf16str as UTF-16 characters to be converted or
47 saved.
48
49 As an output parameter, the number of 16-bit unsigned inte‐
50 gers in utf16str consumed or saved during conversion.
51
52
53 utf32str A pointer to a UTF-32 character string.
54
55
56 utf32len As an input parameter, the number of 32-bit unsigned inte‐
57 gers in utf32str as UTF-32 characters to be converted or
58 saved.
59
60 As an output parameter, the number of 32-bit unsigned inte‐
61 gers in utf32str consumed or saved during conversion.
62
63
64 utf8str A pointer to a UTF-8 character string.
65
66
67 utf8len As an input parameter, the number of bytes in utf8str as
68 UTF-8 characters to be converted or saved.
69
70 As an output parameter, the number of bytes in utf8str con‐
71 sumed or saved during conversion.
72
73
74 flag The possible conversion options that are constructed by a
75 bitwise-inclusive-OR of the following values:
76
77 UCONV_IN_BIG_ENDIAN
78
79 The input parameter is in big endian byte ordering.
80
81
82 UCONV_OUT_BIG_ENDIAN
83
84 The output parameter should be in big endian byte
85 ordering.
86
87
88 UCONV_IN_SYSTEM_ENDIAN
89
90 The input parameter is in the default byte ordering of
91 the current system.
92
93
94 UCONV_OUT_SYSTEM_ENDIAN
95
96 The output parameter should be in the default byte
97 ordering of the current system.
98
99
100 UCONV_IN_LITTLE_ENDIAN
101
102 The input parameter is in little endian byte ordering.
103
104
105 UCONV_OUT_LITTLE_ENDIAN
106
107 The output parameter should be in little endian byte
108 ordering.
109
110
111 UCONV_IGNORE_NULL
112
113 The null or U+0000 character should not stop the con‐
114 version.
115
116
117 UCONV_IN_ACCEPT_BOM
118
119 If the Byte Order Mark (BOM, U+FEFF) character exists
120 as the first character of the input parameter, inter‐
121 pret it as the BOM character.
122
123
124 UCONV_OUT_EMIT_BOM
125
126 Start the output parameter with Byte Order Mark (BOM,
127 U+FEFF) character to indicate the byte ordering if the
128 output parameter is in UTF-16 or UTF-32.
129
130
131
133 The uconv_u16tou32() function reads the given utf16str in UTF-16 until
134 U+0000 (zero) in utf16str is encountered as a character or until the
135 number of 16-bit unsigned integers specified in utf16len is read. The
136 UTF-16 characters that are read are converted into UTF-32 and the
137 result is saved at utf32str. After the successful conversion, utf32len
138 contains the number of 32-bit unsigned integers saved at utf32str as
139 UTF-32 characters.
140
141
142 The uconv_u16tou8() function reads the given utf16str in UTF-16 until
143 U+0000 (zero) in utf16str is encountered as a character or until the
144 number of 16-bit unsigned integers specified in utf16len is read. The
145 UTF-16 characters that are read are converted into UTF-8 and the result
146 is saved at utf8str. After the successful conversion, utf8len contains
147 the number of bytes saved at utf8str as UTF-8 characters.
148
149
150 The uconv_u32tou16() function reads the given utf32str in UTF-32 until
151 U+0000 (zero) in utf32str is encountered as a character or until the
152 number of 32-bit unsigned integers specified in utf32len is read. The
153 UTF-32 characters that are read are converted into UTF-16 and the
154 result is saved at utf16str. After the successful conversion, utf16len
155 contains the number of 16-bit unsigned integers saved at utf16str as
156 UTF-16 characters.
157
158
159 The uconv_u32tou8() function reads the given utf32str in UTF-32 until
160 U+0000 (zero) in utf32str is encountered as a character or until the
161 number of 32-bit unsigned integers specified in utf32len is read. The
162 UTF-32 characters that are read are converted into UTF-8 and the result
163 is saved at utf8str. After the successful conversion, utf8len contains
164 the number of bytes saved at utf8str as UTF-8 characters.
165
166
167 The uconv_u8tou16() function reads the given utf8str in UTF-8 until the
168 null ('\0') byte in utf8str is encountered or until the number of bytes
169 specified in utf8len is read. The UTF-8 characters that are read are
170 converted into UTF-16 and the result is saved at utf16str. After the
171 successful conversion, utf16len contains the number of 16-bit unsigned
172 integers saved at utf16str as UTF-16 characters.
173
174
175 The uconv_u8tou32() function reads the given utf8str in UTF-8 until the
176 null ('\0') byte in utf8str is encountered or until the number of bytes
177 specified in utf8len is read. The UTF-8 characters that are read are
178 converted into UTF-32 and the result is saved at utf32str. After the
179 successful conversion, utf32len contains the number of 32-bit unsigned
180 integers saved at utf32str as UTF-32 characters.
181
182
183 During the conversion, the input and the output parameters are treated
184 with byte orderings specified in the flag parameter. When not speci‐
185 fied, the default byte ordering of the system is used. The byte order‐
186 ing flag value that is specified for UTF-8 is ignored.
187
188
189 When UCONV_IN_ACCEPT_BOM is specified as the flag and the first charac‐
190 ter of the string pointed to by the input parameter is the BOM charac‐
191 ter, the value of the BOM character dictates the byte ordering of the
192 subsequent characters in the string pointed to by the input parameter,
193 regardless of the supplied input parameter byte ordering option flag
194 values. If the UCONV_IN_ACCEPT_BOM is not specified, the BOM as the
195 first character is treated as a regular Unicode character: Zero Width
196 No Break Space (ZWNBSP) character.
197
198
199 When UCONV_IGNORE_NULL is specified, regardless of whether the input
200 parameter contains U+0000 or null byte, the conversion continues until
201 the specified number of input parameter elements at utf16len, utf32len,
202 or utf8len are entirely consumed during the conversion.
203
204
205 As output parameters, utf16len, utf32len, and utf8len are not changed
206 if conversion fails for any reason.
207
209 The uconv_u16tou32(), uconv_u16tou8(), uconv_u32tou16(),
210 uconv_u32tou8(), uconv_u8tou16(), and uconv_u8tou32() functions can be
211 called from user or interrupt context.
212
214 Upon successful conversion, the functions return 0. Upon failure, the
215 functions return one of the following errno values:
216
217 EILSEQ The conversion detected an illegal or out of bound character
218 value in the input parameter.
219
220
221 E2BIG The conversion cannot finish because the size specified in
222 the output parameter is too small.
223
224
225 EINVAL The conversion stops due to an incomplete character at the
226 end of the input string.
227
228
229 EBADF Conflicting byte-ordering option flag values are detected.
230
231
233 Example 1 Convert a UTF-16 string in little-endian byte ordering into
234 UTF-8 string.
235
236 #include <sys/types.h>
237 #include <sys/errno.h>
238 #include <sys/sunddi.h>
239 .
240 .
241 .
242 uint16_t u16s[MAXNAMELEN + 1];
243 uchar_t u8s[MAXNAMELEN + 1];
244 size_t u16len, u8len;
245 int ret;
246 .
247 .
248 .
249 u16len = u8len = MAXNAMELEN;
250 ret = uconv_u16tou8(u16s, &u16len, u8s, &u8len,
251 UCONV_IN_LITTLE_ENDIAN);
252 if (ret != 0) {
253 /* Conversion error occurred. */
254 return (ret);
255 }
256 .
257 .
258 .
259
260
261 Example 2 Convert a UTF-32 string in big endian byte ordering into lit‐
262 tle endian UTF-16.
263
264 #include <sys/types.h>
265 #include <sys/errno.h>
266 #include <sys/sunddi.h>
267 .
268 .
269 .
270 /*
271 * An UTF-32 character can be mapped to an UTF-16 character with
272 * two 16-bit integer entities as a "surrogate pair."
273 */
274 uint32_t u32s[101];
275 uint16_t u16s[101];
276 int ret;
277 size_t u32len, u16len;
278 .
279 .
280 .
281 u32len = u16len = 100;
282 ret = uconv_u32tou16(u32s, &u32len, u16s, &u16len,
283 UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
284 if (ret == 0) {
285 return (0);
286 } else if (ret == E2BIG) {
287 /* Use bigger output parameter and try just one more time. */
288 uint16_t u16s2[201];
289
290 u16len = 200;
291 ret = uconv_u32tou16(u32s, &u32len, u16s2, &u16len,
292 UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
293 if (ret == 0)
294 return (0);
295 }
296
297 /* Otherwise, return -1 to indicate an error condition. */
298 return (-1);
299
300
301 Example 3 Convert a UTF-8 string into UTF-16 in little-endian byte
302 ordering.
303
304
305 Convert a UTF-8 string into UTF-16 in little-endian byte ordering with
306 a Byte Order Mark (BOM) character at the beginning of the output param‐
307 eter.
308
309
310 #include <sys/types.h>
311 #include <sys/errno.h>
312 #include <sys/sunddi.h>
313 .
314 .
315 .
316 uchar_t u8s[MAXNAMELEN + 1];
317 uint16_t u16s[MAXNAMELEN + 1];
318 size_t u8len, u16len;
319 int ret;
320 .
321 .
322 .
323 u8len = u16len = MAXNAMELEN;
324 ret = uconv_u8tou16(u8s, &u8len, u16s, &u16len,
325 UCONV_IN_LITTLE_ENDIAN | UCONV_EMIT_BOM);
326 if (ret != 0) {
327 /* Conversion error occurred. */
328 return (ret);
329 }
330 .
331 .
332 .
333
334
336 See attributes(5) for descriptions of the following attributes:
337
338
339
340
341 ┌─────────────────────────────┬─────────────────────────────┐
342 │ ATTRIBUTE TYPE │ ATTRIBUTE VALUE │
343 ├─────────────────────────────┼─────────────────────────────┤
344 │Interface Stability │Committed │
345 └─────────────────────────────┴─────────────────────────────┘
346
348 uconv_u16tou32(3C), attributes(5)
349
350
351 The Unicode Standard (http://www.unicode.org)
352
354 Each UTF-16 or UTF-32 character maps to an UTF-8 character that might
355 need one to maximum of four bytes.
356
357
358 One UTF-32 or UTF-8 character can yield two 16-bit unsigned integers as
359 a UTF-16 character, which is a surrogate pair if the Unicode scalar
360 value is bigger than U+FFFF.
361
362
363 Ill-formed UTF-16 surrogate pairs are seen as illegal characters during
364 the conversion.
365
366
367
368SunOS 5.11 18 Sep 2007 uconv_u16tou32(9F)