1u8_validate(9F)          Kernel Functions for Drivers          u8_validate(9F)
2
3
4

NAME

6       u8_validate - validate UTF-8 characters and calculate the byte length
7

SYNOPSIS

9       #include <sys/types.h>
10       #include <sys/errno.h>
11       #include <sys/sunddi.h>
12
13       int u8_validate(char *u8str, size_t n, char **list, int flag,
14            int *errno);
15
16

INTERFACE LEVEL

18       Solaris DDI specific (Solaris DDI)
19

PARAMETERS

21       u8str    The UTF-8 string to be validated.
22
23
24       n        The  maximum number of bytes in u8str that can be examined and
25                validated.
26
27
28       list     A list of null-terminated character strings in UTF-8 that must
29                be  additionally  checked  against  as invalid characters. The
30                last string in list must be null to indicate there is no  fur‐
31                ther string.
32
33
34       flag     Possible  validation  options  constructed by a bitwise-inclu‐
35                sive-OR of the following values:
36
37                U8_VALIDATE_ENTIRE
38
39                    By default, u8_validate() looks at the first character  or
40                    up to n bytes, whichever is smaller in terms of the number
41                    of bytes to be consumed, and returns with the result.
42
43                    When this option is used, u8_validate() will check up to n
44                    bytes from u8str and possibly more than a character before
45                    returning the result.
46
47
48                U8_VALIDATE_CHECK_ADDITIONAL
49
50                    By default, u8_validate() does not use list supplied.
51
52                    When this option is supplied  with  a  list  of  character
53                    strings,   u8_validate()   additionally   validates  u8str
54                    against the  character  strings  supplied  with  list  and
55                    returns EBADF in errno if u8str has any one of the charac‐
56                    ter strings in list.
57
58
59                U8_VALIDATE_UCS2_RANGE
60
61                    By default, u8_validate() uses the entire  Unicode  coding
62                    space of U+0000 to U+10FFFF.
63
64                    When  this  option  is specified, the valid Unicode coding
65                    space is smaller to U+0000 to U+FFFF.
66
67
68
69       errno    An error occurred during validation.  The following values are
70                supported:
71
72                EBADF     Validation  failed because list-specified characters
73                          were found in the string pointed to by u8str.
74
75
76                EILSEQ    Validation failed because an illegal byte was  found
77                          in the string pointed to by  u8str.
78
79
80                EINVAL    Validation  failed  because  an  incomplete byte was
81                          found in the string pointed to by  u8str.
82
83
84                ERANGE    Validation  failed  because  character  bytes   were
85                          encountered  that  are outside the range of the Uni‐
86                          code coding space.
87
88
89

DESCRIPTION

91       The u8_validate() function validates u8str in UTF-8 and determines  the
92       number of bytes constituting the character(s) pointed to by u8str.
93

RETURN VALUES

95       If u8str is a null pointer, u8_validate() returns 0. Otherwise, u8_val‐
96       idate() returns either the number of bytes that constitute the  charac‐
97       ters if the next n or fewer bytes form valid characters, or -1 if there
98       is an validation failure, in which case it may set  errno  to  indicate
99       the error.
100

EXAMPLES

102       Example 1 Determine the length of the first UTF-8 character.
103
104         #include <sys/types.h>
105         #include <sys/errno.h>
106         #include <sys/sunddi.h>
107
108         char u8[MAXPATHLEN];
109         int errno;
110         .
111         .
112         .
113         len = u8_validate(u8, 4, (char **)NULL, 0, &errno);
114         if (len == -1) {
115             switch (errno) {
116                 case EILSEQ:
117                 case EINVAL:
118                     return (MYFS4_ERR_INVAL);
119                 case EBADF:
120                     return (MYFS4_ERR_BADNAME);
121                 case ERANGE:
122                     return (MYFS4_ERR_BADCHAR);
123                 default:
124                     return (-10);
125             }
126         }
127
128
129       Example  2  Check  if  there  are  any invalid characters in the entire
130       string.
131
132         #include <sys/types.h>
133         #include <sys/errno.h>
134         #include <sys/sunddi.h>
135
136         char u8[MAXPATHLEN];
137         int n;
138         int errno;
139         .
140         .
141         .
142         n = strlen(u8);
143         len = u8_validate(u8, n, (char **)NULL, U8_VALIDATE_ENTIRE, &errno);
144         if (len == -1) {
145             switch (errno) {
146                 case EILSEQ:
147                 case EINVAL:
148                     return (MYFS4_ERR_INVAL);
149                 case EBADF:
150                     return (MYFS4_ERR_BADNAME);
151                 case ERANGE:
152                     return (MYFS4_ERR_BADCHAR);
153                 default:
154                     return (-10);
155             }
156         }
157
158
159       Example 3 Check if there is any invalid character, including prohibited
160       characters, in the entire string.
161
162         #include <sys/types.h>
163         #include <sys/errno.h>
164         #include <sys/sunddi.h>
165
166         char u8[MAXPATHLEN];
167         int n;
168         int errno;
169         char *prohibited[4] = {
170             ".", "..", "\\", NULL
171         };
172         .
173         .
174         .
175         n = strlen(u8);
176         len = u8_validate(u8, n, prohibited,
177             (U8_VALIDATE_ENTIRE|U8_VALIDATE_CHECK_ADDITIONAL), &errno);
178         if (len == -1) {
179             switch (errno) {
180                 case EILSEQ:
181                 case EINVAL:
182                     return (MYFS4_ERR_INVAL);
183                 case EBADF:
184                     return (MYFS4_ERR_BADNAME);
185                 case ERANGE:
186                     return (MYFS4_ERR_BADCHAR);
187                 default:
188                     return (-10);
189             }
190         }
191
192

ATTRIBUTES

194       See attributes(5) for descriptions of the following attributes:
195
196
197
198
199       ┌─────────────────────────────┬─────────────────────────────┐
200       │      ATTRIBUTE TYPE         │      ATTRIBUTE VALUE        │
201       ├─────────────────────────────┼─────────────────────────────┤
202       │Interface Stability          │Committed                    │
203       └─────────────────────────────┴─────────────────────────────┘
204

SEE ALSO

206       u8_strcmp(3C),   u8_textprep_str(3C),  u8_validate(3C),  attributes(5),
207       u8_strcmp(9F), u8_textprep_str(9F), uconv_u16tou32(9F)
208
209
210       The Unicode Standard (http://www.unicode.org)
211
212
213
214SunOS 5.11                        18 Sep 2007                  u8_validate(9F)
Impressum