1u8_textprep_str(9F)      Kernel Functions for Drivers      u8_textprep_str(9F)
2
3
4

NAME

6       u8_textprep_str - string-based UTF-8 text preparation function
7

SYNOPSIS

9       #include <sys/types.h>
10       #include <sys/errno.h>
11       #include <sys/sunddi.h>
12
13       size_t u8_textprep_str(char *inarray, size_t *inlen,
14            char *outarray, size_t *outlen, int flag,
15            size_t unicode_version, int *errno);
16
17

INTERFACE LEVEL

19       Solaris DDI specific (Solaris DDI)
20

PARAMETERS

22       inarray             A  pointer to a byte array containing a sequence of
23                           UTF-8 character bytes to be prepared.
24
25
26       inlen               As input argument, the number of bytes to  be  pre‐
27                           pared in inarray. As output argument, the number of
28                           bytes in inarray still not consumed.
29
30
31       outarray            A pointer to a  byte  array  where  prepared  UTF-8
32                           character bytes can be saved.
33
34
35       outlen              As input argument, the number of available bytes at
36                           outarray where  prepared  character  bytes  can  be
37                           saved.   As  output argument, after the conversion,
38                           the number of bytes still available at outarray.
39
40
41       flag                The possible preparation options constructed  by  a
42                           bitwise-inclusive-OR of the following values:
43
44                           U8_TEXTPREP_IGNORE_NULL
45
46                               Normally  u8_textprep_str()  stops the prepara‐
47                               tion if it encounters null  byte  even  if  the
48                               current  inlen  is  pointing  to a value bigger
49                               than zero.
50
51                               With this option, null byte does not  stop  the
52                               preparation and the preparation continues until
53                               inlen specified amount of inarray bytes are all
54                               consumed for preparation or an error happened.
55
56
57                           U8_TEXTPREP_IGNORE_INVALID
58
59                               Normally  u8_textprep_str()  stops the prepara‐
60                               tion if it  encounters  illegal  or  incomplete
61                               characters with corresponding errno values.
62
63                               When this option is set, u8_textprep_str() does
64                               not stop the  preparation  and  instead  treats
65                               such  characters  as no need to do any prepara‐
66                               tion.
67
68
69                           U8_TEXTPREP_TOUPPER
70
71                               Map lowercase characters to  uppercase  charac‐
72                               ters if applicable.
73
74
75                           U8_TEXTPREP_TOLOWER
76
77                               Map  uppercase  characters to lowercase charac‐
78                               ters if applicable.
79
80
81                           U8_TEXTPREP_NFD
82
83                               Apply Unicode Normalization Form D.
84
85
86                           U8_TEXTPREP_NFC
87
88                               Apply Unicode Normalization Form C.
89
90
91                           U8_TEXTPREP_NFKD
92
93                               Apply Unicode Normalization Form KD.
94
95
96                           U8_TEXTPREP_NFKC
97
98                               Apply Unicode Normalization Form KC.
99
100                           Only one case folding option is allowed.  Only  one
101                           Unicode Normalization option is allowed.
102
103                           When a case folding option and a Unicode Normaliza‐
104                           tion option  are  specified  together,  UTF-8  text
105                           preparation is done by doing case folding first and
106                           then Unicode Normalization.
107
108                           If no option is  specified,  no  processing  occurs
109                           except  the  simple  copying of bytes from input to
110                           output.
111
112
113       unicode_version     The version of Unicode data  that  should  be  used
114                           during UTF-8 text preparation. The following values
115                           are supported:
116
117                           U8_UNICODE_320
118
119                               Use Unicode 3.2.0 data during comparison.
120
121
122                           U8_UNICODE_500
123
124                               Use Unicode 5.0.0 data during comparison.
125
126
127                           U8_UNICODE_LATEST
128
129                               Use the latest Unicode version  data  available
130                               which is Unicode 5.0.0 currently.
131
132
133
134       errno               The  error  value when preparation is not completed
135                           or fails. The following values are supported:
136
137                           E2BIG     Text preparation stopped due to  lack  of
138                                     space in the output array.
139
140
141                           EBADF     Specified  option  values are conflicting
142                                     and cannot be supported.
143
144
145                           EILSEQ    Text preparation stopped due to an  input
146                                     byte that does not belong to UTF-8.
147
148
149                           EINVAL    Text preparation stopped due to an incom‐
150                                     plete UTF-8 character at the end  of  the
151                                     input array.
152
153
154                           ERANGE    The  specified  Unicode  version value is
155                                     not a supported version.
156
157
158

DESCRIPTION

160       The u8_textprep_str() function prepares the sequence of  UTF-8  charac‐
161       ters in the array specified by inarray into a sequence of corresponding
162       UTF-8 characters prepared in the array specified by outarray. The inar‐
163       ray argument points to a character byte array to the first character in
164       the input array and inlen indicates the number of bytes to the  end  of
165       the  array to be converted. The outarray argument points to a character
166       byte array to the first available byte in the output array  and  outlen
167       indicates  the  number  of the available bytes to the end of the array.
168       Unless  flag  is  U8_TEXTPREP_IGNORE_NULL,  u8_textprep_str()  normally
169       stops when it encounters a null byte from the input array regardless of
170       the current inlen value.
171
172
173       If flag is U8_TEXTPREP_IGNORE_INVALID and a  sequence  of  input  bytes
174       does not form a valid UTF-8 character, preparation stops after the pre‐
175       vious    successfully    prepared     character.     If     flag     is
176       U8_TEXTPREP_IGNORE_INVALID  and the input array ends with an incomplete
177       UTF-8 character, preparation stops after the previous successfully pre‐
178       pared bytes. If the output array is not large enough to hold the entire
179       prepared text, preparation stops just prior to  the  input  bytes  that
180       would cause the output array to overflow. The value pointed to by inlen
181       is decremented to reflect the number of bytes still not prepared in the
182       input  array.  The value pointed to by outlen is decremented to reflect
183       the number of bytes still available in the output array.
184

RETURN VALUES

186       The u8_textprep_str() function updates the values pointed to  by  inlen
187       and  outlen  arguments  to  reflect the extent of the preparation. When
188       U8_TEXTPREP_IGNORE_INVALID is specified, u8_textprep_str() returns  the
189       number of illegal or incomplete characters found during the text prepa‐
190       ration. When U8_TEXTPREP_IGNORE_INVALID is not specified and  the  text
191       preparation is successful, the function returns 0. If the entire string
192       in the input array is prepared, the value pointed to by inlen  will  be
193       0.  If  the text preparation is stopped due to any conditions mentioned
194       above, the value pointed to by inlen will be non-zero and errno is  set
195       to   indicate   the   error.  If  such  and  any  other  error  occurs,
196       u8_textprep_str() returns (size_t)-1 and sets  errno  to  indicate  the
197       error.
198

CONTEXT

200       The  u8_textprep_str()  function  can  be called from user or interrupt
201       context.
202

EXAMPLES

204       Example 1 Simple UTF-8 text preparation
205
206         #include <sys/types.h>
207         #include <sys/errno.h>
208         #include <sys/sunddi.h>
209         .
210         .
211         .
212         size_t ret;
213         char ib[MAXPATHLEN];
214         char ob[MAXPATHLEN];
215         size_t il, ol;
216         int err;
217         .
218         .
219         .
220         /*
221          * We got a UTF-8 pathname from somewhere.
222          *
223          * Calculate the length of input string including the terminating
224          * NULL byte and prepare other arguments.
225          */
226         (void) strlcpy(ib, pathname, MAXPATHLEN);
227         il = strlen(ib) + 1;
228         ol = MAXPATHLEN;
229
230         /*
231          * Do toupper case folding, apply Unicode Normalization Form D,
232          * ignore NULL byte, and ignore any illegal/incomplete characters.
233          */
234         ret = u8_textprep_str(ib, &il, ob, &ol,
235             (U8_TEXTPREP_IGNORE_NULL|U8_TEXTPREP_IGNORE_INVALID|
236             U8_TEXTPREP_TOUPPER|U8_TEXTPREP_NFD), U8_UNICODE_LATEST, &err);
237         if (ret == (size_t)-1) {
238             if (err == E2BIG)
239                 return (-1);
240             if (err == EBADF)
241                 return (-2);
242             if (err == ERANGE)
243                 return (-3);
244             return (-4);
245         }
246
247

ATTRIBUTES

249       See attributes(5) for descriptions of the following attributes:
250
251
252
253
254       ┌─────────────────────────────┬─────────────────────────────┐
255       │      ATTRIBUTE TYPE         │      ATTRIBUTE VALUE        │
256       ├─────────────────────────────┼─────────────────────────────┤
257       │Interface Stability          │Committed                    │
258       └─────────────────────────────┴─────────────────────────────┘
259

SEE ALSO

261       u8_strcmp(3C),  u8_textprep_str(3C),  u8_validate(3C),   attributes(5),
262       u8_strcmp(9F), u8_validate(9F), uconv_u16tou32(9F)
263
264
265       The Unicode Standard (http://www.unicode.org)
266
267
268
269SunOS 5.11                        18 Sep 2007              u8_textprep_str(9F)
Impressum