u8_strcmp(3c)

1u8_strcmp(3C)            Standard C Library Functions            u8_strcmp(3C)
2
3
4

NAME

6       u8_strcmp - UTF-8 string comparison function
7

SYNOPSIS

9       #include <sys/u8_textprep.h>
10
11       int u8_strcmp(const char *s1, const char *s2, size_t n,
12            int flag, size_t version, int *errnum);
13
14

PARAMETERS

16       s1, s2       Pointers to null-terminated UTF-8 strings
17
18
19       n            The  maximum  number  of  bytes to be compared.  If 0, the
20                    comparison is  performed  until  either  or  both  of  the
21                    strings are examined to the string terminating null byte.
22
23
24       flag         The possible comparison options constructed by a bit-wise-
25                    inclusive-OR of the following values:
26
27                    U8_STRCMP_CS
28
29                        Perform case-sensitive string comparison. This is  the
30                        default.
31
32
33                    U8_STRCMP_CI_UPPER
34
35                        Perform  case-insensitive  string  comparison based on
36                        Unicode upper case converted results of s1 and s2.
37
38
39                    U8_STRCMP_CI_LOWER
40
41                        Perform case-insensitive string  comparison  based  on
42                        Unicode lower case converted results of s1 and s2.
43
44
45                    U8_STRCMP_NFD
46
47                        Perform  string  comparison  after s1 and s2 have been
48                        normalized by using Unicode Normalization Form D.
49
50
51                    U8_STRCMP_NFC
52
53                        Perform string comparison after s1 and  s2  have  been
54                        normalized by using Unicode Normalization Form C.
55
56
57                    U8_STRCMP_NFKD
58
59                        Perform  string  comparison  after s1 and s2 have been
60                        normalized by using Unicode Normalization Form KD.
61
62
63                    U8_STRCMP_NFKC
64
65                        Perform string comparison after s1 and  s2  have  been
66                        normalized by using Unicode Normalization Form KC.
67
68                    Only  one  case-sensitive  or  case-insensitive  option is
69                    allowed. Only one Unicode Normalization option is allowed.
70
71
72       version      The version of Unicode data that  should  be  used  during
73                    comparison. The following values are supported:
74
75                    U8_UNICODE_320
76
77                        Use Unicode 3.2.0 data during comparison.
78
79
80                    U8_UNICODE_500
81
82                        Use Unicode 5.0.0 data during comparison.
83
84
85                    U8_UNICODE_LATEST
86
87                        Use  the  latest Unicode version data available, which
88                        is Unicode 5.0.0.
89
90
91
92       errnum       A non-zero value indicates that an error has occurred dur‐
93                    ing comparison. The following values are supported:
94
95                    EBADF     The  specified option values are conflicting and
96                              cannot be supported.
97
98
99                    EILSEQ    There was an illegal character  at  s1,  s2,  or
100                              both.
101
102
103                    EINVAL    There  was an incomplete character at s1, s2, or
104                              both.
105
106
107                    ERANGE    The specified Unicode version value is not  sup‐
108                              ported.
109
110
111

DESCRIPTION

113       The  u8_stcmp()  function internally processes UTF-8 strings pointed to
114       by s1 and s2 based on the corresponding version of the Unicode Standard
115       and  other  input arguments and compares the result strings in byte-by-
116       byte, machine ordering.
117
118
119       When multiple comparison options are specified,  Unicode  Normalization
120       is  performed  after  case-sensitive  or case-insensitive processing is
121       performed.
122

RETURN VALUES

124       The u8_strcmp() function returns an integer greater than, equal to,  or
125       less  than  0 if the string pointed to by s1 is greater than, equal to,
126       or less than the string pointed to by s2, respectively.
127
128
129       When u8_strcmp() detects an illegal or incomplete character, such char‐
130       acter  causes  the function to set errnum to indicate the error. After‐
131       ward, the comparison is still performed on the resultant strings and  a
132       value based on byte-by-byte comparison is always returned.
133

EXAMPLES

135       Example 1 Perform simple default string comparison.
136
137         #include <sys/u8_textprep.h>
138
139         int
140         docmp_default(const char *u1, const char *u2) {
141             int result;
142             int errnum;
143
144             result = u8_strcmp(u1, u2, 0, 0, U8_UNICODE_LATEST, &errnum);
145             if (errnum == EILSEQ)
146                 return (-1);
147             if (errnum == EINVAL)
148                 return (-2);
149             if (errnum == EBADF)
150                 return (-3);
151             if (errnum == ERANGE)
152                 return (-4);
153
154
155       Example  2  Perform  upper  case based case-insensitive comparison with
156       Unicode 3.2.0 date.
157
158         #include <sys/u8_textprep.h>
159
160         int
161         docmp_caseinsensitive_u320(const char *u1, const char *u2) {
162             int result;
163             int errnum;
164
165             result = u8_strcmp(u1, u2, 0, U8_STRCMP_CI_UPPER,
166                 U8_UNICODE_320, &errnum);
167             if (errnum == EILSEQ)
168                 return (-1);
169             if (errnum == EINVAL)
170                 return (-2);
171             if (errnum == EBADF)
172                 return (-3);
173             if (errnum == ERANGE)
174                 return (-4);
175
176             return (result);
177         }
178
179
180       Example 3 Perform Unicode Normalization Form D.
181
182
183       Perform Unicode Normalization Form D and upper case based case-insensi‐
184       tive comparison with Unicode 3.2.0 date.
185
186
187         #include <sys/u8_textprep.h>
188
189         int
190         docmp_nfd_caseinsensitive_u320(const char *u1, const char *u2) {
191             int result;
192             int errnum;
193
194             result = u8_strcmp(u1, u2, 0,
195                 (U8_STRCMP_NFD|U8_STRCMP_CI_UPPER), U8_UNICODE_320,
196                 &errnum);
197             if (errnum == EILSEQ)
198                 return (-1);
199             if (errnum == EINVAL)
200                 return (-2);
201             if (errnum == EBADF)
202                 return (-3);
203             if (errnum == ERANGE)
204                 return (-4);
205
206             return (result);
207         }
208
209

ATTRIBUTES

211       See attributes(5) for descriptions of the following attributes:
212
213
214
215
216       ┌─────────────────────────────┬─────────────────────────────┐
217       │      ATTRIBUTE TYPE         │      ATTRIBUTE VALUE        │
218       ├─────────────────────────────┼─────────────────────────────┤
219       │Interface Stability          │Committed                    │
220       ├─────────────────────────────┼─────────────────────────────┤
221       │MT-Level                     │MT-Safe                      │
222       └─────────────────────────────┴─────────────────────────────┘
223