1UNICODE::BIDI(3) Courier Unicode Library UNICODE::BIDI(3)
2
3
4
6 unicode::bidi, unicode::bidi_calc, unicode::bidi_calc_types,
7 unicode::bidi_reorder, unicode::bidi_cleanup,
8 unicode::bidi_logical_order, unicode::bidi_combinings,
9 unicode::bidi_needs_embed, unicode::bidi_embed,
10 unicode::bidi_embed_paragraph_level, unicode::bidi_get_direction,
11 unicode::bidi_override - unicode bi-directional algorithm
12
14 #include <courier-unicode.h>
15
16 struct unicode::bidi_calc_types {
17 bidi_calc_types(const std::u32string & string);
18 std::vector<unicode_bidi_type_t> types ;
19 void setbnl(std::u32string & string);
20 }.fi
21
22 std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types &ustring);
23
24 std::tuple<std::vector<unicode_bidi_level_t>, struct unicode_bidi_direction> unicode::bidi_calc(const unicode::bidi_calc_types &ustring, unicode_bidi_level_t embedding_level);
25
26 int unicode::bidi_reorder(std::u32string &string, std::vector<unicode_bidi_level_t> &embedding_level, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1);
27
28 void unicode::bidi_reorder(std::vector<unicode_bidi_level_t> &embedding_level, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1);
29
30 void unicode::bidi_cleanup(std::u32string &string, const std::function<void (size_t)> &removed_callback=[](size_t){}, int cleanup_options);
31
32 int unicode::bidi_cleanup(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, const std::function<void (size_t)> &removed_callback=[](size_t){}, int cleanup_options=0);
33
34 int unicode::bidi_cleanup(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, const std::function<void (size_t)> &removed_callback, int cleanup_options, size_t starting_pos, size_t n);
35
36 int unicode::bidi_logical_order(std::u32string &string, std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (size_t, size_t)> &reorder_callback=[](size_t, size_t){}, size_t starting_pos=0, size_t n=(size_t)-1);
37
38 void unicode::bidi_combinings(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> &callback);
39
40 void unicode::bidi_combinings(const std::u32string &string, const std::function <void (unicode_bidi_level_t level, size_t level_start, size_t n_chars, size_t comb_start, size_t n_comb_chars)> &callback);
41
42 void unicode::bidi_logical_order(std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (size_t, size_t)> &reorder_callback, size_t starting_pos=0, size_t n=(size_t)-1);
43
44 bool unicode::bidi_needs_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, const unicode_bidi_level_t (paragraph_embedding=NULL, size_t starting_pos=0, size_t n=(size_t)-1);
45
46 int unicode::bidi_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding, const std::function<void (const char32_t *, size_t, bool)> &callback);
47
48 std::u32string unicode::bidi_embed(const std::u32string &string, const std::vector <unicode_bidi_level_t> &levels, unicode_bidi_level_t paragraph_embedding);
49
50 char32_t unicode_bidi_embed_paragraph_level(const std::u32string &string, unicode_bidi_level_t paragraph_embedding);
51
52 unicode_bidi_direction bidi_get_direction(const std::u32string &string, size_t starting_pos=0, size_t n=(size_t)-1);
53
54 std::u32string bidi_override(const std::u32string &string, unicode_bidi_level_t direction, int cleanup_options=0);
55
57 These functions implement the C++ interface for the Unicode
58 Bi-Directional algorithm[1]. See the description of the underlying
59 unicode_bidi(3) C library API for more information. C++ specific notes:
60
61 • unicode::bidi_calc returns the directional embedding value buffer
62 and the calculated paragraph embedding level. Its ustring is
63 implicitly converted from a std::u32string:
64
65 std::u32string text;
66
67 auto [levels, direction]=unicode::bidi_calc(text);
68
69 Alternatively a unicode::bidi_calc_types objects gets constructed
70 from the same std::u32string and then passed directly to
71 unicode::bidi_calc:
72
73 std::u32string text;
74
75 unicode::bidi_calc_types types{text};
76
77 types.setbnl(text); // Optional
78
79 // types.types is a std::vector of enum_bidi_types_t values
80
81 auto [levels, direction]=unicode::bidi_calc(types);
82
83 This provides the means to access the intermediate
84 enum_bidi_types_t values that get calculated from the Unicode text
85 string.
86
87 Note
88 In all cases the std::u32string cannot be a temporary object,
89 and it must remain in scope until unicode::bidi_calc() returns.
90 The optional setbnl() method uses unicode_bidi_setbnl(3) to replace
91 paragraph separators with newline characters, in the unicode
92 string. It requires the same unicode string that was passed to the
93 constructor as a parameter (because the constructor takes a
94 constant reference, but this method modifies the string.
95
96 • Several C functions provide a “dry-run” mode by passing a NULL
97 pointer. The C++ API provides separate overloads, with and without
98 the nullable parameter.
99
100 • Several C functions accept a nullable function pointer, with the
101 NULL function pointer specifying no callback. The C++ functions
102 have a std::function parameter with a default do-nothing closure.
103
104 • Several C functions accept two parameters, a Unicode character
105 pointer and the embedding level buffer, and a single parameter that
106 specifies the size of both. The equivalent C++ function takes two
107 discrete parameters, a std::u32string and a std::vector and returns
108 an int; a negative value if their sizes differ, and 0 if their
109 sizes match, and the requested function completes. The
110 unicode::bidi_embed overload that returns a std::u32string returns
111 an empty string in case of a mismatch.
112
113 • unicode::bidi_reorder reorders the entire string and its
114 embedding_levels by default. The optional starting_pos and n
115 parameters limit the reordering to the indicated subset of the
116 original string (specified as the starting position offset index,
117 and the number of characters).
118
119 • unicode::bidi_reorder, unicode::bidi_cleanup,
120 unicode::bidi_logical_order, unicode::bidi_needs_embed and
121 unicode::bidi_get_direction take two optional parameters (defaulted
122 values or overloaded) specifying an optional starting position and
123 number of characters that define a subset of the original string
124 that gets reordered, cleaned up, or has its direction determined.
125
126 This unicode::bidi_cleanup does not trim off the passed in string
127 and embedding level buffer, since it affects only a subset of the
128 string. The number of times the removed character callback gets
129 invoked indicates how much the substring should be trimmed off.
130
131 • unicode::bidi_override modifies the passed-in string as follows:
132
133 • unicode::bidi_cleanup() is applied with the specified, or
134 defaulted, cleanup_options
135
136 • Either the LRO or an RLO override marker gets prepended to the
137 Unicode string, forcing the entire string to be interpreted in
138 a single rendering direction, when processed by the Unicode
139 bi-directional algorithm.
140
141 unicode::bidi_override makes it possible to use a Unicode-aware
142 application or algorithm in a context that only works with text
143 that's always displayed in a fixed direction, allowing graceful
144 handling of input containing bi-directional text.
145
146 unicode::literals namespace
147 using namespace unicode::literals;
148
149 std::u32string foo(std::u32string bar)
150 {
151 return bar + LRO;
152 }
153
154 This namespace contains the following constexpr definitions:
155
156 • char32_t arrays with literal Unicode character strings containing
157 Unicode directional, isolate, and override markers, like LRO, RLO
158 and others.
159
160 • CLEANUP_EXTRA, CLEANUP_BNL, and CLEANUP_CANONICAL options for
161 unicode::bidi_cleanup().
162
164 courier-unicode(7), unicode_bidi(3).
165
167 Sam Varshavchik
168 Author
169
171 1. Unicode Bi-Directional algorithm
172 https://www.unicode.org/reports/tr9/tr9-42.html
173
174
175
176Courier Unicode Library 04/16/2022 UNICODE::BIDI(3)