1uconv_u16tou32(3C)       Standard C Library Functions       uconv_u16tou32(3C)
2
3
4

NAME

6       uconv_u16tou32,     uconv_u16tou8,    uconv_u32tou16,    uconv_u32tou8,
7       uconv_u8tou16, uconv_u8tou32 - Unicode encoding conversion functions
8

SYNOPSIS

10       #include <sys/types.h>
11       #include <sys/errno.h>
12       #include <sys/u8_textprep.h>
13
14       int uconv_u16tou32(const uint16_t *utf16str, size_t *utf16len,
15            uint32_t *utf32str, size_t *utf32len, int flag);
16
17
18       int uconv_u16tou8(const uint16_t *utf16str, size_t *utf16len,
19            uchar_t *utf8str, size_t *utf8len, int flag);
20
21
22       int uconv_u32tou16(const uint32_t *utf32str, size_t *utf32len,
23            uint16_t *utf16str, size_t *utf16len, int flag);
24
25
26       int uconv_u32tou8(const uint32_t *utf32str, size_t *utf32len,
27            uchar_t *utf8str, size_t *utf8len, int flag);
28
29
30       int uconv_u8tou16(const uchar_t *utf8str, size_t *utf8len,
31            uint16_t *utf16str, size_t *utf16len, int flag);
32
33
34       int uconv_u8tou32(const uchar_t *utf8str, size_t *utf8len,
35            uint32_t *utf32str, size_t *utf32len, int flag);
36
37

PARAMETERS

39       utf16str    A pointer to a UTF-16 character string.
40
41
42       utf16len    As an input parameter, the number of 16-bit unsigned  inte‐
43                   gers  in  utf16str  as UTF-16 characters to be converted or
44                   saved.
45
46                   As an output parameter, the number of 16-bit unsigned inte‐
47                   gers in utf16str consumed or saved during conversion.
48
49
50       utf32str    A pointer to a UTF-32 character string.
51
52
53       utf32len    As  an input parameter, the number of 32-bit unsigned inte‐
54                   gers in utf32str as UTF-32 characters to  be  converted  or
55                   saved.
56
57                   As an output parameter, the number of 32-bit unsigned inte‐
58                   gers in utf32str consumed or saved during conversion.
59
60
61       utf8str     A pointer to a UTF-8 character string.
62
63
64       utf8len     As an input parameter, the number of bytes  in  utf8str  as
65                   UTF-8 characters to be converted or saved.
66
67                   As an output parameter, the number of bytes in utf8str con‐
68                   sumed or saved during conversion.
69
70
71       flag        The possible conversion options that are constructed  by  a
72                   bitwise-inclusive-OR of the following values:
73
74                   UCONV_IN_BIG_ENDIAN
75
76                       The input parameter is in big endian byte ordering.
77
78
79                   UCONV_OUT_BIG_ENDIAN
80
81                       The  output  parameter  should  be  in  big endian byte
82                       ordering.
83
84
85                   UCONV_IN_SYSTEM_ENDIAN
86
87                       The input parameter is in the default byte ordering  of
88                       the current system.
89
90
91                   UCONV_OUT_SYSTEM_ENDIAN
92
93                       The  output  parameter  should  be  in the default byte
94                       ordering of the current system.
95
96
97                   UCONV_IN_LITTLE_ENDIAN
98
99                       The input parameter is in little endian byte ordering.
100
101
102                   UCONV_OUT_LITTLE_ENDIAN
103
104                       The output parameter should be in  little  endian  byte
105                       ordering.
106
107
108                   UCONV_IGNORE_NULL
109
110                       The  null  or U+0000 character should not stop the con‐
111                       version.
112
113
114                   UCONV_IN_ACCEPT_BOM
115
116                       If the Byte Order Mark (BOM, U+FEFF)  character  exists
117                       as  the  first character of the input parameter, inter‐
118                       pret it as the BOM character.
119
120
121                   UCONV_OUT_EMIT_BOM
122
123                       Start the output parameter with Byte Order  Mark  (BOM,
124                       U+FEFF)  character to indicate the byte ordering if the
125                       output parameter is in UTF-16 or UTF-32.
126
127
128

DESCRIPTION

130       The uconv_u16tou32() function reads the given utf16str in UTF-16  until
131       U+0000  (zero)  in  utf16str is encountered as a character or until the
132       number of 16-bit unsigned integers specified in utf16len is  read.  The
133       UTF-16  characters  that  are  read  are  converted into UTF-32 and the
134       result is saved at utf32str. After the successful conversion,  utf32len
135       contains  the  number  of 32-bit unsigned integers saved at utf32str as
136       UTF-32 characters.
137
138
139       The uconv_u16tou8() function reads the given utf16str in  UTF-16  until
140       U+0000  (zero)  in  utf16str is encountered as a character or until the
141       number of 16-bit unsigned integers specified in utf16len is  read.  The
142       UTF-16 characters that are read are converted into UTF-8 and the result
143       is saved at utf8str. After the successful conversion, utf8len  contains
144       the number of bytes saved at utf8str as UTF-8 characters.
145
146
147       The  uconv_u32tou16() function reads the given utf32str in UTF-32 until
148       U+0000 (zero) in utf32str is encountered as a character  or  until  the
149       number  of  32-bit unsigned integers specified in utf32len is read. The
150       UTF-32 characters that are read  are  converted  into  UTF-16  and  the
151       result  is saved at utf16str. After the successful conversion, utf16len
152       contains the number of 16-bit unsigned integers saved  at  utf16str  as
153       UTF-16 characters.
154
155
156       The  uconv_u32tou8()  function reads the given utf32str in UTF-32 until
157       U+0000 (zero) in utf32str is encountered as a character  or  until  the
158       number  of  32-bit unsigned integers specified in utf32len is read. The
159       UTF-32 characters that are read are converted into UTF-8 and the result
160       is  saved at utf8str. After the successful conversion, utf8len contains
161       the number of bytes saved at utf8str as UTF-8 characters.
162
163
164       The uconv_u8tou16() function reads the given utf8str in UTF-8 until the
165       null ('\0') byte in utf8str is encountered or until the number of bytes
166       specified in utf8len is read. The UTF-8 characters that  are  read  are
167       converted  into  UTF-16  and the result is saved at utf16str. After the
168       successful conversion, utf16len contains the number of 16-bit  unsigned
169       integers saved at utf16str as UTF-16 characters.
170
171
172       The uconv_u8tou32() function reads the given utf8str in UTF-8 until the
173       null ('\0') byte in utf8str is encountered or until the number of bytes
174       specified  in  utf8len  is read. The UTF-8 characters that are read are
175       converted into UTF-32 and the result is saved at  utf32str.  After  the
176       successful  conversion, utf32len contains the number of 32-bit unsigned
177       integers saved at utf32str as UTF-32 characters.
178
179
180       During the conversion, the input and the output parameters are  treated
181       with  byte  orderings  specified in the flag parameter. When not speci‐
182       fied, the default byte ordering of the system is used. The byte  order‐
183       ing flag value that is specified for UTF-8 is ignored.
184
185
186       When UCONV_IN_ACCEPT_BOM is specified as the flag and the first charac‐
187       ter of the string pointed to by the input parameter is the BOM  charac‐
188       ter,  the  value of the BOM character dictates the byte ordering of the
189       subsequent characters in the string pointed to by the input  parameter,
190       regardless  of  the  supplied input parameter byte ordering option flag
191       values. If the UCONV_IN_ACCEPT_BOM is not specified,  the  BOM  as  the
192       first  character  is treated as a regular Unicode character: Zero Width
193       No Break Space (ZWNBSP) character.
194
195
196       When UCONV_IGNORE_NULL is specified, regardless of  whether  the  input
197       parameter  contains U+0000 or null byte, the conversion continues until
198       the specified number of input parameter elements at utf16len, utf32len,
199       or utf8len are entirely consumed during the conversion.
200
201
202       As  output  parameters, utf16len, utf32len, and utf8len are not changed
203       if conversion fails for any reason.
204

RETURN VALUES

206       Upon successful conversion, the functions return 0. Upon  failure,  the
207       functions return one of the following errno values:
208
209       EILSEQ    The  conversion detected an illegal or out of bound character
210                 value in the input parameter.
211
212
213       E2BIG     The conversion cannot finish because the  size  specified  in
214                 the output parameter is too small.
215
216
217       EINVAL    The  conversion  stops  due to an incomplete character at the
218                 end of the input string.
219
220
221       EBADF     Conflicting byte-ordering option flag values are detected.
222
223

EXAMPLES

225       Example 1 Convert a UTF-16 string in little-endian byte  ordering  into
226       UTF-8 string.
227
228         #include <sys/types.h>
229         #include <sys/errno.h>
230         #include <sys/u8_textprep.h>
231         .
232         .
233         .
234         uint16_t u16s[MAXNAMELEN + 1];
235         uchar_t u8s[MAXNAMELEN + 1];
236         size_t u16len, u8len;
237         int ret;
238         .
239         .
240         .
241         u16len = u8len = MAXNAMELEN;
242         ret = uconv_u16tou8(u16s, &u16len, u8s, &u8len,
243             UCONV_IN_LITTLE_ENDIAN);
244         if (ret != 0) {
245              /* Conversion error occurred. */
246              return (ret);
247         }
248         .
249         .
250         .
251
252
253       Example 2 Convert a UTF-32 string in big endian byte ordering into lit‐
254       tle endian UTF-16.
255
256         #include <sys/types.h>
257         #include <sys/errno.h>
258         #include <sys/u8_textprep.h>
259         .
260         .
261         .
262         /*
263           * An UTF-32 character can be mapped to an UTF-16 character with
264           * two 16-bit integer entities as a "surrogate pair."
265           */
266         uint32_t u32s[101];
267         uint16_t u16s[101];
268         int ret;
269         size_t u32len, u16len;
270         .
271         .
272         .
273         u32len = u16len = 100;
274         ret = uconv_u32tou16(u32s, &u32len, u16s, &u16len,
275             UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
276         if (ret == 0) {
277              return (0);
278         } else if (ret == E2BIG) {
279              /* Use bigger output parameter and try just one more time. */
280              uint16_t u16s2[201];
281
282              u16len = 200;
283              ret = uconv_u32tou16(u32s, &u32len, u16s2, &u16len,
284                 UCONV_IN_BIG_ENDIAN | UCONV_OUT_LITTLE_ENDIAN);
285              if (ret == 0)
286                   return (0);
287         }
288
289         /* Otherwise, return -1 to indicate an error condition. */
290         return (-1);
291
292
293       Example 3 Convert a UTF-8 string  into  UTF-16  in  little-endian  byte
294       ordering.
295
296
297       Convert  a UTF-8 string into UTF-16 in little-endian byte ordering with
298       a Byte Order Mark (BOM) character at the beginning of the output param‐
299       eter.
300
301
302         #include <sys/types.h>
303         #include <sys/errno.h>
304         #include <sys/u8_textprep>
305         .
306         .
307         .
308         uchar_t u8s[MAXNAMELEN + 1];
309         uint16_t u16s[MAXNAMELEN + 1];
310         size_t u8len, u16len;
311         int ret;
312         .
313         .
314         .
315         u8len = u16len = MAXNAMELEN;
316         ret = uconv_u8tou16(u8s, &u8len, u16s, &u16len,
317             UCONV_IN_LITTLE_ENDIAN | UCONV_EMIT_BOM);
318         if (ret != 0) {
319              /* Conversion error occurred. */
320              return (ret);
321         }
322         .
323         .
324         .
325
326

ATTRIBUTES

328       See attributes(5) for descriptions of the following attributes:
329
330
331
332
333       ┌─────────────────────────────┬─────────────────────────────┐
334       │      ATTRIBUTE TYPE         │      ATTRIBUTE VALUE        │
335       ├─────────────────────────────┼─────────────────────────────┤
336       │Interface Stability          │Committed                    │
337       ├─────────────────────────────┼─────────────────────────────┤
338       │MT-Level                     │MT-Safe                      │
339       └─────────────────────────────┴─────────────────────────────┘
340

SEE ALSO

342       attributes(5), uconv_u16tou32(9F)
343
344
345       The Unicode Standard (http://www.unicode.org)
346

NOTES

348       Each  UTF-16  or UTF-32 character maps to an UTF-8 character that might
349       need one to maximum of four bytes.
350
351
352       One UTF-32 or UTF-8 character can yield two 16-bit unsigned integers as
353       a  UTF-16  character,  which  is a surrogate pair if the Unicode scalar
354       value is bigger than U+FFFF.
355
356
357       Ill-formed UTF-16 surrogate pairs are seen as illegal characters during
358       the conversion.
359
360
361
362SunOS 5.11                        18 Sep 2007               uconv_u16tou32(3C)
Impressum