1uconv_u16tou32(9F)       Kernel Functions for Drivers       uconv_u16tou32(9F)
2
3
4

NAME

6       uconv_u16tou32,     uconv_u16tou8,    uconv_u32tou16,    uconv_u32tou8,
7       uconv_u8tou16, uconv_u8tou32 - Unicode encoding conversion functions
8

SYNOPSIS

10       #include <sys/types.h>
11       #include <sys/errno.h>
12       #include <sys/sunddi.h>
13
14       int uconv_u16tou32(const uint16_t *utf16str, size_t *utf16len,
15            uint32_t *utf32str, size_t *utf32len, int flag);
16
17
18       int uconv_u16tou8(const uint16_t *utf16str, size_t *utf16len,
19            uchar_t *utf8str, size_t *utf8len, int flag);
20
21
22       int uconv_u32tou16(const uint32_t *utf32str, size_t *utf32len,
23            uint16_t *utf16str, size_t *utf16len, int flag);
24
25
26       int uconv_u32tou8(const uint32_t *utf32str, size_t *utf32len,
27            uchar_t *utf8str, size_t *utf8len, int flag);
28
29
30       int uconv_u8tou16(const uchar_t *utf8str, size_t *utf8len,
31            uint16_t *utf16str, size_t *utf16len, int flag);
32
33
34       int uconv_u8tou32(const uchar_t *utf8str, size_t *utf8len,
35            uint32_t *utf32str, size_t *utf32len, int flag);
36
37

INTERFACE LEVEL

39       Solaris DDI specific (Solaris DDI).
40

PARAMETERS

42       utf16str    A pointer to a UTF-16 character string.
43
44
45       utf16len    As an input parameter, the number of 16-bit unsigned  inte‐
46                   gers  in  utf16str  as UTF-16 characters to be converted or
47                   saved.
48
49                   As an output parameter, the number of 16-bit unsigned inte‐
50                   gers in utf16str consumed or saved during conversion.
51
52
53       utf32str    A pointer to a UTF-32 character string.
54
55
56       utf32len    As  an input parameter, the number of 32-bit unsigned inte‐
57                   gers in utf32str as UTF-32 characters to  be  converted  or
58                   saved.
59
60                   As an output parameter, the number of 32-bit unsigned inte‐
61                   gers in utf32str consumed or saved during conversion.
62
63
64       utf8str     A pointer to a UTF-8 character string.
65
66
67       utf8len     As an input parameter, the number of bytes  in  utf8str  as
68                   UTF-8 characters to be converted or saved.
69
70                   As an output parameter, the number of bytes in utf8str con‐
71                   sumed or saved during conversion.
72
73
74       flag        The possible conversion options that are constructed  by  a
75                   bitwise-inclusive-OR of the following values:
76
77                   UCONV_IN_BIG_ENDIAN
78
79                       The input parameter is in big endian byte ordering.
80
81
82                   UCONV_OUT_BIG_ENDIAN
83
84                       The  output  parameter  should  be  in  big endian byte
85                       ordering.
86
87
88                   UCONV_IN_SYSTEM_ENDIAN
89
90                       The input parameter is in the default byte ordering  of
91                       the current system.
92
93
94                   UCONV_OUT_SYSTEM_ENDIAN
95
96                       The  output  parameter  should  be  in the default byte
97                       ordering of the current system.
98
99
100                   UCONV_IN_LITTLE_ENDIAN
101
102                       The input parameter is in little endian byte ordering.
103
104
105                   UCONV_OUT_LITTLE_ENDIAN
106
107                       The output parameter should be in  little  endian  byte
108                       ordering.
109
110
111                   UCONV_IGNORE_NULL
112
113                       The  null  or U+0000 character should not stop the con‐
114                       version.
115
116
117                   UCONV_IN_ACCEPT_BOM
118
119                       If the Byte Order Mark (BOM, U+FEFF)  character  exists
120                       as  the  first character of the input parameter, inter‐
121                       pret it as the BOM character.
122
123
124                   UCONV_OUT_EMIT_BOM
125
126                       Start the output parameter with Byte Order  Mark  (BOM,
127                       U+FEFF)  character to indicate the byte ordering if the
128                       output parameter is in UTF-16 or UTF-32.
129
130
131

DESCRIPTION

133       The uconv_u16tou32() function reads the given utf16str in UTF-16  until
134       U+0000  (zero)  in  utf16str is encountered as a character or until the
135       number of 16-bit unsigned integers specified in utf16len is  read.  The
136       UTF-16  characters  that  are  read  are  converted into UTF-32 and the
137       result is saved at utf32str. After the successful conversion,  utf32len
138       contains  the  number  of 32-bit unsigned integers saved at utf32str as
139       UTF-32 characters.
140
141
142       The uconv_u16tou8() function reads the given utf16str in  UTF-16  until
143       U+0000  (zero)  in  utf16str is encountered as a character or until the
144       number of 16-bit unsigned integers specified in utf16len is  read.  The
145       UTF-16 characters that are read are converted into UTF-8 and the result
146       is saved at utf8str. After the successful conversion, utf8len  contains
147       the number of bytes saved at utf8str as UTF-8 characters.
148
149
150       The  uconv_u32tou16() function reads the given utf32str in UTF-32 until
151       U+0000 (zero) in utf32str is encountered as a character  or  until  the
152       number  of  32-bit unsigned integers specified in utf32len is read. The
153       UTF-32 characters that are read  are  converted  into  UTF-16  and  the
154       result  is saved at utf16str. After the successful conversion, utf16len
155       contains the number of 16-bit unsigned integers saved  at  utf16str  as
156       UTF-16 characters.
157
158
159       The  uconv_u32tou8()  function reads the given utf32str in UTF-32 until
160       U+0000 (zero) in utf32str is encountered as a character  or  until  the
161       number  of  32-bit unsigned integers specified in utf32len is read. The
162       UTF-32 characters that are read are converted into UTF-8 and the result
163       is  saved at utf8str. After the successful conversion, utf8len contains
164       the number of bytes saved at utf8str as UTF-8 characters.
165
166
167       The uconv_u8tou16() function reads the given utf8str in UTF-8 until the
168       null ('\0') byte in utf8str is encountered or until the number of bytes
169       specified in utf8len is read. The UTF-8 characters that  are  read  are
170       converted  into  UTF-16  and the result is saved at utf16str. After the
171       successful conversion, utf16len contains the number of 16-bit  unsigned
172       integers saved at utf16str as UTF-16 characters.
173
174
175       The uconv_u8tou32() function reads the given utf8str in UTF-8 until the
176       null ('\0') byte in utf8str is encountered or until the number of bytes
177       specified  in  utf8len  is read. The UTF-8 characters that are read are
178       converted into UTF-32 and the result is saved at  utf32str.  After  the
179       successful  conversion, utf32len contains the number of 32-bit unsigned
180       integers saved at utf32str as UTF-32 characters.
181
182
183       During the conversion, the input and the output parameters are  treated
184       with  byte  orderings  specified in the flag parameter. When not speci‐
185       fied, the default byte ordering of the system is used. The byte  order‐
186       ing flag value that is specified for UTF-8 is ignored.
187
188
189       When UCONV_IN_ACCEPT_BOM is specified as the flag and the first charac‐
190       ter of the string pointed to by the input parameter is the BOM  charac‐
191       ter,  the  value of the BOM character dictates the byte ordering of the
192       subsequent characters in the string pointed to by the input  parameter,
193       regardless  of  the  supplied input parameter byte ordering option flag
194       values. If the UCONV_IN_ACCEPT_BOM is not specified,  the  BOM  as  the
195       first  character  is treated as a regular Unicode character: Zero Width
196       No Break Space (ZWNBSP) character.
197
198
199       When UCONV_IGNORE_NULL is specified, regardless of  whether  the  input
200       parameter  contains U+0000 or null byte, the conversion continues until
201       the specified number of input parameter elements at utf16len, utf32len,
202       or utf8len are entirely consumed during the conversion.
203
204
205       As  output  parameters, utf16len, utf32len, and utf8len are not changed
206       if conversion fails for any reason.
207

CONTEXT

209       The      uconv_u16tou32(),      uconv_u16tou8(),      uconv_u32tou16(),
210       uconv_u32tou8(),  uconv_u8tou16(), and uconv_u8tou32() functions can be
211       called from user or interrupt context.
212

RETURN VALUES

214       Upon successful conversion, the functions return 0. Upon  failure,  the
215       functions return one of the following errno values:
216
217       EILSEQ    The  conversion detected an illegal or out of bound character
218                 value in the input parameter.
219
220
221       E2BIG     The conversion cannot finish because the  size  specified  in
222                 the output parameter is too small.
223
224
225       EINVAL    The  conversion  stops  due to an incomplete character at the
226                 end of the input string.
227
228
229       EBADF     Conflicting byte-ordering option flag values are detected.
230
231

EXAMPLES

233       Example 1 Convert a UTF-16 string in little-endian byte  ordering  into
234       UTF-8 string.
235
236         #include <sys/types.h>
237         #include <sys/errno.h>
238         #include <sys/sunddi.h>
239         .
240         .
241         .
242         uint16_t u16s[MAXNAMELEN + 1];
243         uchar_t u8s[MAXNAMELEN + 1];
244         size_t u16len, u8len;
245         int ret;
246         .
247         .
248         .
249         u16len = u8len = MAXNAMELEN;
250         ret = uconv_u16tou8(u16s, &u16len, u8s, &u8len,
251             UCONV_IN_LITTLE_ENDIAN);
252         if (ret != 0) {
253              /* Conversion error occurred. */
254              return (ret);
255         }
256         .
257         .
258         .
259
260
261       Example 2 Convert a UTF-32 string in big endian byte ordering into lit‐
262       tle endian UTF-16.
263
264         #include <sys/types.h>
265         #include <sys/errno.h>
266         #include <sys/sunddi.h>
267         .
268         .
269         .
270         /*
271           * An UTF-32 character can be mapped to an UTF-16 character with
272           * two 16-bit integer entities as a "surrogate pair."
273           */
274         uint32_t u32s[101];
275         uint16_t u16s[101];
276         int ret;
277         size_t u32len, u16len;
278         .
279         .
280         .
281         u32len = u16len = 100;
282         ret = uconv_u32tou16(u32s, &u32len, u16s, &u16len,
283             UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
284         if (ret == 0) {
285              return (0);
286         } else if (ret == E2BIG) {
287              /* Use bigger output parameter and try just one more time. */
288              uint16_t u16s2[201];
289
290              u16len = 200;
291              ret = uconv_u32tou16(u32s, &u32len, u16s2, &u16len,
292                 UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
293              if (ret == 0)
294                   return (0);
295         }
296
297         /* Otherwise, return -1 to indicate an error condition. */
298         return (-1);
299
300
301       Example 3 Convert a UTF-8 string  into  UTF-16  in  little-endian  byte
302       ordering.
303
304
305       Convert  a UTF-8 string into UTF-16 in little-endian byte ordering with
306       a Byte Order Mark (BOM) character at the beginning of the output param‐
307       eter.
308
309
310         #include <sys/types.h>
311         #include <sys/errno.h>
312         #include <sys/sunddi.h>
313         .
314         .
315         .
316         uchar_t u8s[MAXNAMELEN + 1];
317         uint16_t u16s[MAXNAMELEN + 1];
318         size_t u8len, u16len;
319         int ret;
320         .
321         .
322         .
323         u8len = u16len = MAXNAMELEN;
324         ret = uconv_u8tou16(u8s, &u8len, u16s, &u16len,
325             UCONV_IN_LITTLE_ENDIAN | UCONV_EMIT_BOM);
326         if (ret != 0) {
327              /* Conversion error occurred. */
328              return (ret);
329         }
330         .
331         .
332         .
333
334

ATTRIBUTES

336       See attributes(5) for descriptions of the following attributes:
337
338
339
340
341       ┌─────────────────────────────┬─────────────────────────────┐
342       │      ATTRIBUTE TYPE         │      ATTRIBUTE VALUE        │
343       ├─────────────────────────────┼─────────────────────────────┤
344       │Interface Stability          │Committed                    │
345       └─────────────────────────────┴─────────────────────────────┘
346

SEE ALSO

348       uconv_u16tou32(3C), attributes(5)
349
350
351       The Unicode Standard (http://www.unicode.org)
352

NOTES

354       Each  UTF-16  or UTF-32 character maps to an UTF-8 character that might
355       need one to maximum of four bytes.
356
357
358       One UTF-32 or UTF-8 character can yield two 16-bit unsigned integers as
359       a  UTF-16  character,  which  is a surrogate pair if the Unicode scalar
360       value is bigger than U+FFFF.
361
362
363       Ill-formed UTF-16 surrogate pairs are seen as illegal characters during
364       the conversion.
365
366
367
368SunOS 5.11                        18 Sep 2007               uconv_u16tou32(9F)
Impressum