1UNICODE::BIDI(3)            Courier Unicode Library           UNICODE::BIDI(3)
2
3
4

NAME

6       unicode::bidi, unicode::bidi_calc, unicode::bidi_calc_types,
7       unicode::bidi_reorder, unicode::bidi_cleanup,
8       unicode::bidi_logical_order, unicode::bidi_combinings,
9       unicode::bidi_needs_embed, unicode::bidi_embed,
10       unicode::bidi_embed_paragraph_level, unicode::bidi_get_direction,
11       unicode::bidi_override - unicode bi-directional algorithm
12

SYNOPSIS

14       #include <courier-unicode.h>
15
16       struct unicode::bidi_calc_types {
17         bidi_calc_types(const std::u32string & string);
18         std::vector<unicode_bidi_type_t> types ;
19         void setbnl(std::u32string & string);
20       }.fi
21
22       std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types &ustring);
23
24       std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types &ustring, unicode_bidi_level_t embedding_level);
25
26       int unicode::bidi_reorder(std::u32string &string, std::vector<unicode_bidi_level_t> &embedding_level, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1);
27
28       void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> &embedding_level, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1);
29
30       void unicode::bidi_cleanup(std::u32string &string, const std::function<void (size_t)> &removed_callback=[](size_t){}, int cleanup_options);
31
32       int unicode::bidi_cleanup(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, const std::function<void (size_t)> &removed_callback=[](size_t){}, int cleanup_options=0);
33
34       int unicode::bidi_cleanup(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, const std::function<void (size_t)> &removed_callback, int cleanup_options, size_t starting_pos, size_t n);
35
36       int unicode::bidi_logical_order(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1);
37
38       void unicode::bidi_combinings(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> &callback);
39
40       void unicode::bidi_combinings(const std::u32string &string, const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> &callback);
41
42       void unicode::bidi_logical_order(std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (size_t, size_t)> &reorder_callback, size_t starting_pos=0, size_t n=(size_t)-1);
43
44       bool unicode::bidi_needs_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, const unicode_bidi_level_t (paragraph_embedding=NULL, size_t starting_pos=0, size_t n=(size_t)-1);
45
46       int unicode::bidi_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (const char32_t *, size_t, bool)> &callback);
47
48       std::u32string unicode::bidi_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding);
49
50       char32_t unicode_bidi_embed_paragraph_level(const std::u32string &string, unicode_bidi_level_t paragraph_embedding);
51
52       unicode_bidi_direction bidi_get_direction(const std::u32string &string, size_t starting_pos=0, size_t n=(size_t)-1);
53
54       std::u32string bidi_override(const std::u32string &string, unicode_bidi_level_t direction, int cleanup_options=0);
55

DESCRIPTION

57       These functions implement the C++ interface for the Unicode
58       Bi-Directional algorithm[1]. See the description of the underlying
59       unicode_bidi(3) C library API for more information. C++ specific notes:
60
61unicode::bidi_calc returns the directional embedding value buffer
62           and the calculated paragraph embedding level. Its ustring is
63           implicitly converted from a std::u32string:
64
65               std::u32string text;
66
67               auto [levels, direction]=unicode::bidi_calc(text);
68
69           Alternatively a unicode::bidi_calc_types objects gets constructed
70           from the same std::u32string and then passed directly to
71           unicode::bidi_calc:
72
73               std::u32string text;
74
75               unicode::bidi_calc_types types{text};
76
77               types.setbnl(text); // Optional
78
79               // types.types is a std::vector of enum_bidi_types_t values
80
81               auto [levels, direction]=unicode::bidi_calc(types);
82
83           This provides the means to access the intermediate
84           enum_bidi_types_t values that get calculated from the Unicode text
85           string.
86
87               Note
88               In all cases the std::u32string cannot be a temporary object,
89               and it must remain in scope until unicode::bidi_calc() returns.
90           The optional setbnl() method uses unicode_bidi_setbnl(3) to replace
91           paragraph separators with newline characters, in the unicode
92           string. It requires the same unicode string that was passed to the
93           constructor as a parameter (because the constructor takes a
94           constant reference, but this method modifies the string.
95
96       •   Several C functions provide a “dry-run” mode by passing a NULL
97           pointer. The C++ API provides separate overloads, with and without
98           the nullable parameter.
99
100       •   Several C functions accept a nullable function pointer, with the
101           NULL function pointer specifying no callback. The C++ functions
102           have a std::function parameter with a default do-nothing closure.
103
104       •   Several C functions accept two parameters, a Unicode character
105           pointer and the embedding level buffer, and a single parameter that
106           specifies the size of both. The equivalent C++ function takes two
107           discrete parameters, a std::u32string and a std::vector and returns
108           an int; a negative value if their sizes differ, and 0 if their
109           sizes match, and the requested function completes. The
110           unicode::bidi_embed overload that returns a std::u32string returns
111           an empty string in case of a mismatch.
112
113unicode::bidi_reorder reorders the entire string and its
114           embedding_levels by default. The optional starting_pos and n
115           parameters limit the reordering to the indicated subset of the
116           original string (specified as the starting position offset index,
117           and the number of characters).
118
119unicode::bidi_reorder, unicode::bidi_cleanup,
120           unicode::bidi_logical_order, unicode::bidi_needs_embed and
121           unicode::bidi_get_direction take two optional parameters (defaulted
122           values or overloaded) specifying an optional starting position and
123           number of characters that define a subset of the original string
124           that gets reordered, cleaned up, or has its direction determined.
125
126           This unicode::bidi_cleanup does not trim off the passed in string
127           and embedding level buffer, since it affects only a subset of the
128           string. The number of times the removed character callback gets
129           invoked indicates how much the substring should be trimmed off.
130
131unicode::bidi_override modifies the passed-in string as follows:
132
133unicode::bidi_cleanup() is applied with the specified, or
134               defaulted, cleanup_options
135
136           •   Either the LRO or an RLO override marker gets prepended to the
137               Unicode string, forcing the entire string to be interpreted in
138               a single rendering direction, when processed by the Unicode
139               bi-directional algorithm.
140
141           unicode::bidi_override makes it possible to use a Unicode-aware
142           application or algorithm in a context that only works with text
143           that's always displayed in a fixed direction, allowing graceful
144           handling of input containing bi-directional text.
145
146   unicode::literals namespace
147           using namespace unicode::literals;
148
149           std::u32string foo(std::u32string bar)
150           {
151                return bar + LRO;
152           }
153
154       This namespace contains the following constexpr definitions:
155
156       •   char32_t arrays with literal Unicode character strings containing
157           Unicode directional, isolate, and override markers, like LRO, RLO
158           and others.
159
160       •   CLEANUP_EXTRA, CLEANUP_BNL, and CLEANUP_CANONICAL options for
161           unicode::bidi_cleanup().
162

SEE ALSO

164       courier-unicode(7), unicode_bidi(3).
165

AUTHOR

167       Sam Varshavchik
168           Author
169

NOTES

171        1. Unicode Bi-Directional algorithm
172           https://www.unicode.org/reports/tr9/tr9-42.html
173
174
175
176Courier Unicode Library           04/16/2022                  UNICODE::BIDI(3)
Impressum