1u8_validate(3C)          Standard C Library Functions          u8_validate(3C)
2
3
4

NAME

6       u8_validate - validate UTF-8 characters and calculate the byte length
7

SYNOPSIS

9       #include <sys/u8_textprep.h>
10
11       int u8_validate(char *u8str, size_t n, char **list, int flag,
12            int *errnum);
13
14

PARAMETERS

16       u8str     The UTF-8 string to be validated.
17
18
19       n         The maximum number of bytes in u8str that can be examined and
20                 validated.
21
22
23       list      A list of null-terminated character  strings  in  UTF-8  that
24                 must  be  additionally checked against as invalid characters.
25                 The last string in list must be null to indicate there is  no
26                 further string.
27
28
29       flag      Possible  validation  options constructed by a bitwise-inclu‐
30                 sive-OR of the following values:
31
32                 U8_VALIDATE_ENTIRE
33
34                     By default, u8_validate() looks at the first character or
35                     up  to n bytes, whichever is smaller in terms of the num‐
36                     ber of bytes to be consumed, and returns with the result.
37
38                     When this option is used, u8_validate() will check up  to
39                     n  bytes  from  u8str  and possibly more than a character
40                     before returning the result.
41
42
43                 U8_VALIDATE_CHECK_ADDITIONAL
44
45                     By default, u8_validate() does not use list supplied.
46
47                     When this option is supplied with  a  list  of  character
48                     strings,   u8_validate()   additionally  validates  u8str
49                     against the character  strings  supplied  with  list  and
50                     returns EBADF in errnum if u8str has any one of the char‐
51                     acter strings in list.
52
53
54                 U8_VALIDATE_UCS2_RANGE
55
56                     By default, u8_validate() uses the entire Unicode  coding
57                     space of U+0000 to U+10FFFF.
58
59                     When  this  option is specified, the valid Unicode coding
60                     space is smaller to U+0000 to U+FFFF.
61
62
63
64       errnum    An error occurred during validation.   The  following  values
65                 are supported:
66
67                 EBADF     Validation failed because list-specified characters
68                           were found in the string pointed to by u8str.
69
70
71                 EILSEQ    Validation failed because an illegal byte was found
72                           in the string pointed to by  u8str.
73
74
75                 EINVAL    Validation  failed  because  an incomplete byte was
76                           found in the string pointed to by  u8str.
77
78
79                 ERANGE    Validation  failed  because  character  bytes  were
80                           encountered  that are outside the range of the Uni‐
81                           code coding space.
82
83
84

DESCRIPTION

86       The u8_validate() function validates u8str in UTF-8 and determines  the
87       number of bytes constituting the character(s) pointed to by u8str.
88

RETURN VALUES

90       If u8str is a null pointer, u8_validate() returns 0. Otherwise, u8_val‐
91       idate() returns either the number of bytes that constitute the  charac‐
92       ters if the next n or fewer bytes form valid characters, or -1 if there
93       is an validation failure, in which case it may set errnum  to  indicate
94       the error.
95

EXAMPLES

97       Example 1 Determine the length of the first UTF-8 character.
98
99         #include <sys/u8_textprep.h>
100
101         char u8[MAXPATHLEN];
102         int errnum;
103         .
104         .
105         .
106         len = u8_validate(u8, 4, (char **)NULL, 0, &errnum);
107         if (len == -1) {
108             switch (errnum) {
109                 case EILSEQ:
110                 case EINVAL:
111                     return (MYFS4_ERR_INVAL);
112                 case EBADF:
113                     return (MYFS4_ERR_BADNAME);
114                 case ERANGE:
115                     return (MYFS4_ERR_BADCHAR);
116                 default:
117                     return (-10);
118             }
119         }
120
121
122       Example  2  Check  if  there  are  any invalid characters in the entire
123       string.
124
125         #include <sys/u8_textprep.h>
126
127         char u8[MAXPATHLEN];
128         int n;
129         int errnum;
130         .
131         .
132         .
133         n = strlen(u8);
134         len = u8_validate(u8, n, (char **)NULL, U8_VALIDATE_ENTIRE, &errnum);
135         if (len == -1) {
136             switch (errnum) {
137                 case EILSEQ:
138                 case EINVAL:
139                     return (MYFS4_ERR_INVAL);
140                 case EBADF:
141                     return (MYFS4_ERR_BADNAME);
142                 case ERANGE:
143                     return (MYFS4_ERR_BADCHAR);
144                 default:
145                     return (-10);
146             }
147         }
148
149
150       Example 3 Check if there is any invalid character, including prohibited
151       characters, in the entire string.
152
153         #include <sys/u8_textprep.h>
154
155         char u8[MAXPATHLEN];
156         int n;
157         int errnum;
158         char *prohibited[4] = {
159             ".", "..", "\\", NULL
160         };
161         .
162         .
163         .
164         n = strlen(u8);
165         len = u8_validate(u8, n, prohibited,
166             (U8_VALIDATE_ENTIRE|U8_VALIDATE_CHECK_ADDITIONAL), &errnum);
167         if (len == -1) {
168             switch (errnum) {
169                 case EILSEQ:
170                 case EINVAL:
171                     return (MYFS4_ERR_INVAL);
172                 case EBADF:
173                     return (MYFS4_ERR_BADNAME);
174                 case ERANGE:
175                     return (MYFS4_ERR_BADCHAR);
176                 default:
177                     return (-10);
178             }
179         }
180
181

ATTRIBUTES

183       See attributes(5) for descriptions of the following attributes:
184
185
186
187
188       ┌─────────────────────────────┬─────────────────────────────┐
189       │      ATTRIBUTE TYPE         │      ATTRIBUTE VALUE        │
190       ├─────────────────────────────┼─────────────────────────────┤
191       │Interface Stability          │Committed                    │
192       ├─────────────────────────────┼─────────────────────────────┤
193       │MT-Level                     │MT-Safe                      │
194       └─────────────────────────────┴─────────────────────────────┘
195

SEE ALSO

197       u8_strcmp(3C),   u8_textprep_str(3C),   attributes(5),   u8_strcmp(9F),
198       u8_textprep_str(9F), u8_validate(9F)
199
200
201       The Unicode Standard (http://www.unicode.org)
202
203
204
205SunOS 5.11                        18 Sep 2007                  u8_validate(3C)
Impressum