1u8_strcmp(9F) Kernel Functions for Drivers u8_strcmp(9F)
2
3
4
6 u8_strcmp - UTF-8 string comparison function
7
9 #include <sys/sunddi.h>
10
11 int u8_strcmp(const char *s1, const char *s2, size_t n,
12 int flag, size_t unicode_version, int *errno);
13
14
16 Solaris DDI specific (Solaris DDI)
17
19 s1, s2 Pointers to null-terminated UTF-8 strings
20
21
22 n The maximum number of bytes to be compared. If 0,
23 the comparison is performed until either or both of
24 the strings are examined to the string terminating
25 null byte.
26
27
28 flag The possible comparison options constructed by a
29 bit-wise-inclusive-OR of the following values:
30
31 U8_STRCMP_CS
32
33 Perform case-sensitive string comparison. This
34 is the default.
35
36
37 U8_STRCMP_CI_UPPER
38
39 Perform case-insensitive string comparison
40 based on Unicode upper case converted results
41 of s1 and s2.
42
43
44 U8_STRCMP_CI_LOWER
45
46 Perform case-insensitive string comparison
47 based on Unicode lower case converted results
48 of s1 and s2.
49
50
51 U8_STRCMP_NFD
52
53 Perform string comparison after s1 and s2 have
54 been normalized by using Unicode Normalization
55 Form D.
56
57
58 U8_STRCMP_NFC
59
60 Perform string comparison after s1 and s2 have
61 been normalized by using Unicode Normalization
62 Form C.
63
64
65 U8_STRCMP_NFKD
66
67 Perform string comparison after s1 and s2 have
68 been normalized by using Unicode Normalization
69 Form KD.
70
71
72 U8_STRCMP_NFKC
73
74 Perform string comparison after s1 and s2 have
75 been normalized by using Unicode Normalization
76 Form KC.
77
78 Only one case-sensitive or case-insensitive option
79 is allowed. Only one Unicode Normalization option
80 is allowed.
81
82
83 unicode_version The version of Unicode data that should be used
84 during comparison. The following values are sup‐
85 ported:
86
87 U8_UNICODE_320
88
89 Use Unicode 3.2.0 data during comparison.
90
91
92 U8_UNICODE_500
93
94 Use Unicode 5.0.0 data during comparison.
95
96
97 U8_UNICODE_LATEST
98
99 Use the latest Unicode version data available,
100 which is Unicode 5.0.0.
101
102
103
104 errno A non-zero value indicates that an error has
105 occurred during comparison. The following values
106 are supported:
107
108 EBADF The specified option values are conflict‐
109 ing and cannot be supported.
110
111
112 EILSEQ There was an illegal character at s1, s2,
113 or both.
114
115
116 EINVAL There was an incomplete character at s1,
117 s2, or both.
118
119
120 ERANGE The specified Unicode version value is
121 not supported.
122
123
124
126 After proper pre-processing, the u8_strcmp() function compares two
127 UTF-8 strings byte-by-byte, according to the machine ordering defined
128 by the corresponding version of the Unicode Standard.
129
130
131 When multiple comparison options are specified, Unicode Normalization
132 is performed after case-sensitive or case-insensitive processing is
133 performed.
134
136 The u8_strcmp() function returns an integer greater than, equal to, or
137 less than 0 if the string pointed to by s1 is greater than, equal to,
138 or less than the string pointed to by s2, respectively.
139
140
141 When u8_strcmp() detects an illegal or incomplete character, such char‐
142 acter causes the function to set errno to indicate the error. After‐
143 ward, the comparison is still performed on the resultant strings and a
144 value based on byte-by-byte comparison is always returned.
145
147 The u8_strcmp() function can be called from user or interrupt context.
148
150 Example 1 Perform simple default string comparison.
151
152 #include <sys/sunddi.h>
153
154 int
155 docmp_default(const char *u1, const char *u2) {
156 int result;
157 int ;
158
159 result = u8_strcmp(u1, u2, 0, 0, U8_UNICODE_LATEST, &errno);
160 if (errno == EILSEQ)
161 return (-1);
162 if (errno == EINVAL)
163 return (-2);
164 if (errno == EBADF)
165 return (-3);
166 if (errno == ERANGE)
167 return (-4);
168
169
170 Example 2 Perform upper case based case-insensitive comparison with
171 Unicode 3.2.0 date.
172
173 #include <sys/sunddi.h>
174
175 int
176 docmp_caseinsensitive_u320(const char *u1, const char *u2) {
177 int result;
178 int errno;
179
180 result = u8_strcmp(u1, u2, 0, U8_STRCMP_CI_UPPER,
181 U8_UNICODE_320, &errno);
182 if (errno == EILSEQ)
183 return (-1);
184 if (errno == EINVAL)
185 return (-2);
186 if (errno == EBADF)
187 return (-3);
188 if (errno == ERANGE)
189 return (-4);
190
191 return (result);
192 }
193
194
195 Example 3 Perform Unicode Normalization Form D.
196
197
198 Perform Unicode Normalization Form D and uppercase-based case-insensi‐
199 tive comparison with Unicode 3.2.0 date.
200
201
202 #include <sys/sunddi.h>
203
204 int
205 docmp_nfd_caseinsensitive_u320(const char *u1, const char *u2) {
206 int result;
207 int errno;
208
209 result = u8_strcmp(u1, u2, 0,
210 (U8_STRCMP_NFD|U8_STRCMP_CI_UPPER), U8_UNICODE_320,
211 &errno);
212 if (errno == EILSEQ)
213 return (-1);
214 if (errno == EINVAL)
215 return (-2);
216 if (errno == EBADF)
217 return (-3);
218 if (errno == ERANGE)
219 return (-4);
220
221 return (result);
222 }
223
224
226 See attributes(5) for descriptions of the following attributes:
227
228
229
230
231 ┌─────────────────────────────┬─────────────────────────────┐
232 │ ATTRIBUTE TYPE │ ATTRIBUTE VALUE │
233 ├─────────────────────────────┼─────────────────────────────┤
234 │Interface Stability │Committed │
235 └─────────────────────────────┴─────────────────────────────┘
236
238 u8_validate(3C), u8_textprep_str(3C), u8_validate(3C), attributes(5),
239 u8_textprep_str(9F), u8_validate(9F), uconv_u16tou32(9F)
240
241
242 The Unicode Standard (http://www.unicode.org)
243
244
245
246SunOS 5.11 18 Sep 2007 u8_strcmp(9F)