1u8_strcmp(3C) Standard C Library Functions u8_strcmp(3C)
2
3
4
6 u8_strcmp - UTF-8 string comparison function
7
9 #include <sys/u8_textprep.h>
10
11 int u8_strcmp(const char *s1, const char *s2, size_t n,
12 int flag, size_t version, int *errnum);
13
14
16 s1, s2 Pointers to null-terminated UTF-8 strings
17
18
19 n The maximum number of bytes to be compared. If 0, the
20 comparison is performed until either or both of the
21 strings are examined to the string terminating null byte.
22
23
24 flag The possible comparison options constructed by a bit-wise-
25 inclusive-OR of the following values:
26
27 U8_STRCMP_CS
28
29 Perform case-sensitive string comparison. This is the
30 default.
31
32
33 U8_STRCMP_CI_UPPER
34
35 Perform case-insensitive string comparison based on
36 Unicode upper case converted results of s1 and s2.
37
38
39 U8_STRCMP_CI_LOWER
40
41 Perform case-insensitive string comparison based on
42 Unicode lower case converted results of s1 and s2.
43
44
45 U8_STRCMP_NFD
46
47 Perform string comparison after s1 and s2 have been
48 normalized by using Unicode Normalization Form D.
49
50
51 U8_STRCMP_NFC
52
53 Perform string comparison after s1 and s2 have been
54 normalized by using Unicode Normalization Form C.
55
56
57 U8_STRCMP_NFKD
58
59 Perform string comparison after s1 and s2 have been
60 normalized by using Unicode Normalization Form KD.
61
62
63 U8_STRCMP_NFKC
64
65 Perform string comparison after s1 and s2 have been
66 normalized by using Unicode Normalization Form KC.
67
68 Only one case-sensitive or case-insensitive option is
69 allowed. Only one Unicode Normalization option is allowed.
70
71
72 version The version of Unicode data that should be used during
73 comparison. The following values are supported:
74
75 U8_UNICODE_320
76
77 Use Unicode 3.2.0 data during comparison.
78
79
80 U8_UNICODE_500
81
82 Use Unicode 5.0.0 data during comparison.
83
84
85 U8_UNICODE_LATEST
86
87 Use the latest Unicode version data available, which
88 is Unicode 5.0.0.
89
90
91
92 errnum A non-zero value indicates that an error has occurred dur‐
93 ing comparison. The following values are supported:
94
95 EBADF The specified option values are conflicting and
96 cannot be supported.
97
98
99 EILSEQ There was an illegal character at s1, s2, or
100 both.
101
102
103 EINVAL There was an incomplete character at s1, s2, or
104 both.
105
106
107 ERANGE The specified Unicode version value is not sup‐
108 ported.
109
110
111
113 The u8_stcmp() function internally processes UTF-8 strings pointed to
114 by s1 and s2 based on the corresponding version of the Unicode Standard
115 and other input arguments and compares the result strings in byte-by-
116 byte, machine ordering.
117
118
119 When multiple comparison options are specified, Unicode Normalization
120 is performed after case-sensitive or case-insensitive processing is
121 performed.
122
124 The u8_strcmp() function returns an integer greater than, equal to, or
125 less than 0 if the string pointed to by s1 is greater than, equal to,
126 or less than the string pointed to by s2, respectively.
127
128
129 When u8_strcmp() detects an illegal or incomplete character, such char‐
130 acter causes the function to set errnum to indicate the error. After‐
131 ward, the comparison is still performed on the resultant strings and a
132 value based on byte-by-byte comparison is always returned.
133
135 Example 1 Perform simple default string comparison.
136
137 #include <sys/u8_textprep.h>
138
139 int
140 docmp_default(const char *u1, const char *u2) {
141 int result;
142 int errnum;
143
144 result = u8_strcmp(u1, u2, 0, 0, U8_UNICODE_LATEST, &errnum);
145 if (errnum == EILSEQ)
146 return (-1);
147 if (errnum == EINVAL)
148 return (-2);
149 if (errnum == EBADF)
150 return (-3);
151 if (errnum == ERANGE)
152 return (-4);
153
154
155 Example 2 Perform upper case based case-insensitive comparison with
156 Unicode 3.2.0 date.
157
158 #include <sys/u8_textprep.h>
159
160 int
161 docmp_caseinsensitive_u320(const char *u1, const char *u2) {
162 int result;
163 int errnum;
164
165 result = u8_strcmp(u1, u2, 0, U8_STRCMP_CI_UPPER,
166 U8_UNICODE_320, &errnum);
167 if (errnum == EILSEQ)
168 return (-1);
169 if (errnum == EINVAL)
170 return (-2);
171 if (errnum == EBADF)
172 return (-3);
173 if (errnum == ERANGE)
174 return (-4);
175
176 return (result);
177 }
178
179
180 Example 3 Perform Unicode Normalization Form D.
181
182
183 Perform Unicode Normalization Form D and upper case based case-insensi‐
184 tive comparison with Unicode 3.2.0 date.
185
186
187 #include <sys/u8_textprep.h>
188
189 int
190 docmp_nfd_caseinsensitive_u320(const char *u1, const char *u2) {
191 int result;
192 int errnum;
193
194 result = u8_strcmp(u1, u2, 0,
195 (U8_STRCMP_NFD|U8_STRCMP_CI_UPPER), U8_UNICODE_320,
196 &errnum);
197 if (errnum == EILSEQ)
198 return (-1);
199 if (errnum == EINVAL)
200 return (-2);
201 if (errnum == EBADF)
202 return (-3);
203 if (errnum == ERANGE)
204 return (-4);
205
206 return (result);
207 }
208
209
211 See attributes(5) for descriptions of the following attributes:
212
213
214
215
216 ┌─────────────────────────────┬─────────────────────────────┐
217 │ ATTRIBUTE TYPE │ ATTRIBUTE VALUE │
218 ├─────────────────────────────┼─────────────────────────────┤
219 │Interface Stability │Committed │
220 ├─────────────────────────────┼─────────────────────────────┤
221 │MT-Level │MT-Safe │
222 └─────────────────────────────┴─────────────────────────────┘
223
225 u8_textprep_str(3C), u8_validate(3C), attributes(5), u8_strcmp(9F),
226 u8_textprep_str(9F), u8_validate(9F)
227
228
229 The Unicode Standard (http://www.unicode.org)
230
231
232
233SunOS 5.11 12 Sep 2007 u8_strcmp(3C)