1u8_validate(3C) Standard C Library Functions u8_validate(3C)
2
3
4
6 u8_validate - validate UTF-8 characters and calculate the byte length
7
9 #include <sys/u8_textprep.h>
10
11 int u8_validate(char *u8str, size_t n, char **list, int flag,
12 int *errnum);
13
14
16 u8str The UTF-8 string to be validated.
17
18
19 n The maximum number of bytes in u8str that can be examined and
20 validated.
21
22
23 list A list of null-terminated character strings in UTF-8 that
24 must be additionally checked against as invalid characters.
25 The last string in list must be null to indicate there is no
26 further string.
27
28
29 flag Possible validation options constructed by a bitwise-inclu‐
30 sive-OR of the following values:
31
32 U8_VALIDATE_ENTIRE
33
34 By default, u8_validate() looks at the first character or
35 up to n bytes, whichever is smaller in terms of the num‐
36 ber of bytes to be consumed, and returns with the result.
37
38 When this option is used, u8_validate() will check up to
39 n bytes from u8str and possibly more than a character
40 before returning the result.
41
42
43 U8_VALIDATE_CHECK_ADDITIONAL
44
45 By default, u8_validate() does not use list supplied.
46
47 When this option is supplied with a list of character
48 strings, u8_validate() additionally validates u8str
49 against the character strings supplied with list and
50 returns EBADF in errnum if u8str has any one of the char‐
51 acter strings in list.
52
53
54 U8_VALIDATE_UCS2_RANGE
55
56 By default, u8_validate() uses the entire Unicode coding
57 space of U+0000 to U+10FFFF.
58
59 When this option is specified, the valid Unicode coding
60 space is smaller to U+0000 to U+FFFF.
61
62
63
64 errnum An error occurred during validation. The following values
65 are supported:
66
67 EBADF Validation failed because list-specified characters
68 were found in the string pointed to by u8str.
69
70
71 EILSEQ Validation failed because an illegal byte was found
72 in the string pointed to by u8str.
73
74
75 EINVAL Validation failed because an incomplete byte was
76 found in the string pointed to by u8str.
77
78
79 ERANGE Validation failed because character bytes were
80 encountered that are outside the range of the Uni‐
81 code coding space.
82
83
84
86 The u8_validate() function validates u8str in UTF-8 and determines the
87 number of bytes constituting the character(s) pointed to by u8str.
88
90 If u8str is a null pointer, u8_validate() returns 0. Otherwise, u8_val‐
91 idate() returns either the number of bytes that constitute the charac‐
92 ters if the next n or fewer bytes form valid characters, or -1 if there
93 is an validation failure, in which case it may set errnum to indicate
94 the error.
95
97 Example 1 Determine the length of the first UTF-8 character.
98
99 #include <sys/u8_textprep.h>
100
101 char u8[MAXPATHLEN];
102 int errnum;
103 .
104 .
105 .
106 len = u8_validate(u8, 4, (char **)NULL, 0, &errnum);
107 if (len == -1) {
108 switch (errnum) {
109 case EILSEQ:
110 case EINVAL:
111 return (MYFS4_ERR_INVAL);
112 case EBADF:
113 return (MYFS4_ERR_BADNAME);
114 case ERANGE:
115 return (MYFS4_ERR_BADCHAR);
116 default:
117 return (-10);
118 }
119 }
120
121
122 Example 2 Check if there are any invalid characters in the entire
123 string.
124
125 #include <sys/u8_textprep.h>
126
127 char u8[MAXPATHLEN];
128 int n;
129 int errnum;
130 .
131 .
132 .
133 n = strlen(u8);
134 len = u8_validate(u8, n, (char **)NULL, U8_VALIDATE_ENTIRE, &errnum);
135 if (len == -1) {
136 switch (errnum) {
137 case EILSEQ:
138 case EINVAL:
139 return (MYFS4_ERR_INVAL);
140 case EBADF:
141 return (MYFS4_ERR_BADNAME);
142 case ERANGE:
143 return (MYFS4_ERR_BADCHAR);
144 default:
145 return (-10);
146 }
147 }
148
149
150 Example 3 Check if there is any invalid character, including prohibited
151 characters, in the entire string.
152
153 #include <sys/u8_textprep.h>
154
155 char u8[MAXPATHLEN];
156 int n;
157 int errnum;
158 char *prohibited[4] = {
159 ".", "..", "\\", NULL
160 };
161 .
162 .
163 .
164 n = strlen(u8);
165 len = u8_validate(u8, n, prohibited,
166 (U8_VALIDATE_ENTIRE|U8_VALIDATE_CHECK_ADDITIONAL), &errnum);
167 if (len == -1) {
168 switch (errnum) {
169 case EILSEQ:
170 case EINVAL:
171 return (MYFS4_ERR_INVAL);
172 case EBADF:
173 return (MYFS4_ERR_BADNAME);
174 case ERANGE:
175 return (MYFS4_ERR_BADCHAR);
176 default:
177 return (-10);
178 }
179 }
180
181
183 See attributes(5) for descriptions of the following attributes:
184
185
186
187
188 ┌─────────────────────────────┬─────────────────────────────┐
189 │ ATTRIBUTE TYPE │ ATTRIBUTE VALUE │
190 ├─────────────────────────────┼─────────────────────────────┤
191 │Interface Stability │Committed │
192 ├─────────────────────────────┼─────────────────────────────┤
193 │MT-Level │MT-Safe │
194 └─────────────────────────────┴─────────────────────────────┘
195
197 u8_strcmp(3C), u8_textprep_str(3C), attributes(5), u8_strcmp(9F),
198 u8_textprep_str(9F), u8_validate(9F)
199
200
201 The Unicode Standard (http://www.unicode.org)
202
203
204
205SunOS 5.11 18 Sep 2007 u8_validate(3C)