1UNICODE_CONVERT(3)          Courier Unicode Library         UNICODE_CONVERT(3)
2
3
4

NAME

6       unicode_u_ucs4_native, unicode_u_ucs2_native, unicode_convert_init,
7       unicode_convert, unicode_convert_deinit, unicode_convert_tocbuf_init,
8       unicode_convert_tou_init, unicode_convert_fromu_init,
9       unicode_convert_uc, unicode_convert_tocbuf_toutf8_init,
10       unicode_convert_tocbuf_fromutf8_init, unicode_convert_toutf8,
11       unicode_convert_fromutf8, unicode_convert_tobuf,
12       unicode_convert_tou_tobuf, unicode_convert_fromu_tobuf - unicode
13       character set conversion
14

SYNOPSIS

16       #include <courier-unicode.h>
17
18                extern const char unicode_u_ucs4_native[];
19
20                extern const char unicode_u_ucs2_native[];
21
22       unicode_convert_handle_t unicode_convert_init(const char *src_chset,
23                                                     const char *dst_chset,
24                                                     void *cb_arg);
25
26       int unicode_convert(unicode_convert_handle_t handle, const char *text,
27                           size_t cnt);
28
29       int unicode_convert_deinit(unicode_convert_handle_t handle,
30                                  int *errptr);
31
32       unicode_convert_handle_t
33                                                            unicode_convert_tocbuf_init(const char *src_chset,
34                                                            const char *dst_chset,
35                                                            char **cbufptr_ret,
36                                                            size_t *cbufsize_ret,
37                                                            int nullterminate);
38
39       unicode_convert_handle_t
40                                                                   unicode_convert_tocbuf_toutf8_init(const char *src_chset,
41                                                                   char **cbufptr_ret,
42                                                                   size_t *cbufsize_ret,
43                                                                   int nullterminate);
44
45       unicode_convert_handle_t
46                                                                     unicode_convert_tocbuf_fromutf8_init(const char *dst_chset,
47                                                                     char **cbufptr_ret,
48                                                                     size_t *cbufsize_ret,
49                                                                     int nullterminate);
50
51       unicode_convert_handle_t
52                                                         unicode_convert_tou_init(const char *src_chset,
53                                                         char32_t **ucptr_ret,
54                                                         size_t *ucsize_ret,
55                                                         int nullterminate);
56
57       unicode_convert_handle_t
58                                                           unicode_convert_fromu_init(const char *dst_chset,
59                                                           char **cbufptr_ret,
60                                                           size_t *cbufsize_ret,
61                                                           int nullterminate);
62
63       int unicode_convert_uc(unicode_convert_handle_t handle,
64                              const char32_t *text, size_t cnt);
65
66       char *unicode_convert_toutf8(const char *text, const char *charset,
67                                    int *error);
68
69       char *unicode_convert_fromutf8(const char *text, const char *charset,
70                                      int *error);
71
72       char *unicode_convert_tobuf(const char *text, const char *charset,
73                                   const char *dstcharset, int *error);
74
75       int unicode_convert_toubuf(const char *text, size_t text_l,
76                                  const char *charset, char32_t **uc,
77                                  size_t *ucsize, int *error);
78
79       int unicode_convert_fromu_tobuf(const char32_t *utext, size_t utext_l,
80                                       const char *charset, char **c,
81                                       size_t *csize, int *error);
82

DESCRIPTION

84       unicode_u_ucs4_native[] contains the string “UCS-4BE” or “UCS-4LE”,
85       matching the native char32_t endianness.
86
87       unicode_u_ucs2_native[] contains the string “UCS-2BE” or “UCS-2LE”,
88       matching the native char32_t endianness.
89
90       unicode_convert_init(), unicode_convert(), and unicode_convert_deinit()
91       are an adaption of th iconv(3)[1] API that uses the same calling
92       convention as the other algorithms in this unicode library, with some
93       value-added features. These functions use iconv(3) to effect the actual
94       character set conversion.
95
96       unicode_convert_init() returns a non-NULL handle for the requested
97       conversion, or NULL if the requested conversion is not available.
98       unicode_convert_init() takes a pointer to the output function that
99       receives receives converted character text. The output function
100       receives a pointer to the converted character text, and the number of
101       characters in the converted text. The output function gets repeatedly
102       called, until it receives the entire converted text.
103
104       The character text to convert gets passed, repeatedly, to
105       unicode_convert(). Each call to unicode_convert() results in the output
106       function getting invoked, zero or more times, with each successive part
107       of the converted text. Finally, unicode_convert_deinit() stops the
108       conversion and deallocates the conversion handle.
109
110       It's possible that a call to unicode_convert_deinit() results in some
111       additional calls to the output function, passing the remaining, final
112       parts, of the converted text, before unicode_convert_deinit()
113       deallocates the handle, and returns.
114
115       The output function should return 0 normally. A non-0 return indicates
116       n error condition.  unicode_convert_deinit() returns non-zero if any
117       previous invocation of the output function returned non-zero (this
118       includes any invocations of the output function resulting from this
119       call, or prior unicode_convert() calls), or 0 if all invocations of the
120       output function returned 0.
121
122       If the errptr is not NULL, *errptr gets set to non-zero if there were
123       any conversion errors -- if there was any text that could not be
124       converted to the destination character text.
125
126       unicode_convert() also returns non-zero if it calls the output function
127       and it returns non-zero, however the conversion handle remains
128       allocated, so unicode_convert_deinit() must still be called, to clean
129       that up.
130
131   Collecting converted text into a buffer
132       Call unicode_convert_tocbuf_init() instead of unicode_convert_init(),
133       then call unicode_convert() and unicode_convert_deinit() normally. The
134       parameters to unicode_convert_init() specify the source and the
135       destination character sets.  unicode_convert_tocbuf_toutf8_init() is
136       just an alias that specifies UTF-8 as the destination character set.
137       unicode_convert_tocbuf_fromutf8_init() is just an alias that specifies
138       UTF-8 as the source character st.
139
140       These functions supply an output function that collects the converted
141       text into a malloc()ed buffer. If unicode_convert_deinit() returns 0,
142       *cbufptr_ret gets initialized to a malloc()ed buffer, and the number of
143       converted characters, the size of the malloc()ed buffer, get placed
144       into *cbufsize_ret.
145
146           Note
147           If the converted string is an empty string, *cbufsize_ret gets set
148           to 0, but *cbufptr_ret still gets initialized (to a dummy malloced
149           buffer).
150
151       A non-zero nullterminate places a trailing \0 character after the
152       converted string (this is included in *cbufsize_ret).
153
154   Converting between character sets and unicode
155       unicode_convert_tou_init() converts character text into a char32_t
156       buffer. It works just like unicode_convert_tocbuf_init(), except that
157       only the source character set gets specified and the output buffer is a
158       char32_t buffer.  nullterminate terminates the converted unicode
159       characters with a U+0000.
160
161       unicode_convert_fromu_init() converts char32_ts to the output character
162       set, and also works like unicode_convert_tocbuf_init(). Additionally,
163       in this case, unicode_convert_uc() works just like unicode_convert()
164       except that the input sequence is a char32_t sequence, and the count
165       parameter is th enumber of unicode characters.
166
167   One-shot conversions
168       unicode_convert_toutf8() converts the specified text in the specified
169       text into a UTF-8 string, returning a malloced buffer. If error is not
170       NULL, even if unicode_convert_toutf8() returns a non NULL value *error
171       gets set to a non-zero value if a character conversion error has
172       occurred, and some characters could not be converted.
173
174       unicode_convert_fromutf8() does a similar conversion from UTF-8 text to
175       the specified character set.
176
177       unicode_convert_tobuf() does a similar conversion between two different
178       character sets.
179
180       unicode_convert_tou_tobuf() calls unicode_convert_tou_init(), feeds the
181       character string through unicode_convert(), then calls
182       unicode_convert_deinit(). If this function returns 0, *uc and *ucsize
183       are set to a malloced buffer+size holding the unicode char array.
184
185       unicode_convert_fromu_tobuf() calls unicode_convert_fromu_init(), feeds
186       the unicode array through unicode_convert_uc(), then calls
187       unicode_convert_deinit(). If this function returns 0, *c and *csize are
188       set to a malloced buffer+size holding the char array.
189

SEE ALSO

191       courier-unicode(7), unicode_convert_tocase(3),
192       unicode_default_chset(3).
193

AUTHOR

195       Sam Varshavchik
196           Author
197

NOTES

199        1.
200
201                      iconv(3)
202           https://manpages.courier-mta.org/htmlman3/iconv.3.html
203
204
205
206Courier Unicode Library           05/31/2022                UNICODE_CONVERT(3)
Impressum