1 | /* Pango |
2 | * pango-bidi-type.c: Bidirectional Character Types |
3 | * |
4 | * Copyright (C) 2008 Jürg Billeter <j@bitron.ch> |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public |
17 | * License along with this library; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 02111-1307, USA. |
20 | */ |
21 | |
22 | #include "config.h" |
23 | |
24 | #include <string.h> |
25 | |
26 | #include <fribidi.h> |
27 | |
28 | #undef PANGO_DISABLE_DEPRECATED |
29 | |
30 | #include "pango-bidi-type.h" |
31 | #include "pango-utils.h" |
32 | |
33 | /** |
34 | * pango_bidi_type_for_unichar: |
35 | * @ch: a Unicode character |
36 | * |
37 | * Determines the bidirectional type of a character. |
38 | * |
39 | * The bidirectional type is specified in the Unicode Character Database. |
40 | * |
41 | * A simplified version of this function is available as [func@unichar_direction]. |
42 | * |
43 | * Return value: the bidirectional character type, as used in the |
44 | * Unicode bidirectional algorithm. |
45 | * |
46 | * Since: 1.22 |
47 | */ |
48 | PangoBidiType |
49 | pango_bidi_type_for_unichar (gunichar ch) |
50 | { |
51 | FriBidiCharType fribidi_ch_type; |
52 | |
53 | G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar)); |
54 | |
55 | fribidi_ch_type = fribidi_get_bidi_type (ch); |
56 | |
57 | switch (fribidi_ch_type) |
58 | { |
59 | case FRIBIDI_TYPE_LTR: return PANGO_BIDI_TYPE_L; |
60 | case FRIBIDI_TYPE_LRE: return PANGO_BIDI_TYPE_LRE; |
61 | case FRIBIDI_TYPE_LRO: return PANGO_BIDI_TYPE_LRO; |
62 | case FRIBIDI_TYPE_RTL: return PANGO_BIDI_TYPE_R; |
63 | case FRIBIDI_TYPE_AL: return PANGO_BIDI_TYPE_AL; |
64 | case FRIBIDI_TYPE_RLE: return PANGO_BIDI_TYPE_RLE; |
65 | case FRIBIDI_TYPE_RLO: return PANGO_BIDI_TYPE_RLO; |
66 | case FRIBIDI_TYPE_PDF: return PANGO_BIDI_TYPE_PDF; |
67 | case FRIBIDI_TYPE_EN: return PANGO_BIDI_TYPE_EN; |
68 | case FRIBIDI_TYPE_ES: return PANGO_BIDI_TYPE_ES; |
69 | case FRIBIDI_TYPE_ET: return PANGO_BIDI_TYPE_ET; |
70 | case FRIBIDI_TYPE_AN: return PANGO_BIDI_TYPE_AN; |
71 | case FRIBIDI_TYPE_CS: return PANGO_BIDI_TYPE_CS; |
72 | case FRIBIDI_TYPE_NSM: return PANGO_BIDI_TYPE_NSM; |
73 | case FRIBIDI_TYPE_BN: return PANGO_BIDI_TYPE_BN; |
74 | case FRIBIDI_TYPE_BS: return PANGO_BIDI_TYPE_B; |
75 | case FRIBIDI_TYPE_SS: return PANGO_BIDI_TYPE_S; |
76 | case FRIBIDI_TYPE_WS: return PANGO_BIDI_TYPE_WS; |
77 | case FRIBIDI_TYPE_ON: return PANGO_BIDI_TYPE_ON; |
78 | case FRIBIDI_TYPE_LRI: return PANGO_BIDI_TYPE_LRI; |
79 | case FRIBIDI_TYPE_RLI: return PANGO_BIDI_TYPE_RLI; |
80 | case FRIBIDI_TYPE_FSI: return PANGO_BIDI_TYPE_FSI; |
81 | case FRIBIDI_TYPE_PDI: return PANGO_BIDI_TYPE_PDI; |
82 | case _FRIBIDI_TYPE_SENTINEL: |
83 | default: |
84 | return PANGO_BIDI_TYPE_ON; |
85 | } |
86 | } |
87 | |
88 | /* Some bidi-related functions */ |
89 | |
90 | /** |
91 | * pango_log2vis_get_embedding_levels: |
92 | * @text: the text to itemize. |
93 | * @length: the number of bytes (not characters) to process, or -1 |
94 | * if @text is nul-terminated and the length should be calculated. |
95 | * @pbase_dir: input base direction, and output resolved direction. |
96 | * |
97 | * Return the bidirectional embedding levels of the input paragraph. |
98 | * |
99 | * The bidirectional embedding levels are defined by the [Unicode Bidirectional |
100 | * Algorithm](http://www.unicode.org/reports/tr9/). |
101 | * |
102 | * If the input base direction is a weak direction, the direction of the |
103 | * characters in the text will determine the final resolved direction. |
104 | * |
105 | * Return value: a newly allocated array of embedding levels, one item per |
106 | * character (not byte), that should be freed using [func@GLib.free]. |
107 | * |
108 | * Since: 1.4 |
109 | */ |
110 | guint8 * |
111 | pango_log2vis_get_embedding_levels (const gchar *text, |
112 | int length, |
113 | PangoDirection *pbase_dir) |
114 | { |
115 | glong n_chars, i; |
116 | guint8 *embedding_levels_list; |
117 | const gchar *p; |
118 | FriBidiParType fribidi_base_dir; |
119 | FriBidiCharType *bidi_types; |
120 | FriBidiBracketType *bracket_types; |
121 | FriBidiLevel max_level; |
122 | FriBidiCharType ored_types = 0; |
123 | FriBidiCharType anded_strongs = FRIBIDI_TYPE_RLE; |
124 | |
125 | G_STATIC_ASSERT (sizeof (FriBidiLevel) == sizeof (guint8)); |
126 | G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar)); |
127 | |
128 | switch (*pbase_dir) |
129 | { |
130 | case PANGO_DIRECTION_LTR: |
131 | case PANGO_DIRECTION_TTB_RTL: |
132 | fribidi_base_dir = FRIBIDI_PAR_LTR; |
133 | break; |
134 | case PANGO_DIRECTION_RTL: |
135 | case PANGO_DIRECTION_TTB_LTR: |
136 | fribidi_base_dir = FRIBIDI_PAR_RTL; |
137 | break; |
138 | case PANGO_DIRECTION_WEAK_RTL: |
139 | fribidi_base_dir = FRIBIDI_PAR_WRTL; |
140 | break; |
141 | case PANGO_DIRECTION_WEAK_LTR: |
142 | case PANGO_DIRECTION_NEUTRAL: |
143 | default: |
144 | fribidi_base_dir = FRIBIDI_PAR_WLTR; |
145 | break; |
146 | } |
147 | |
148 | if (length < 0) |
149 | length = strlen (s: text); |
150 | |
151 | n_chars = g_utf8_strlen (p: text, max: length); |
152 | |
153 | bidi_types = g_new (FriBidiCharType, n_chars); |
154 | bracket_types = g_new (FriBidiBracketType, n_chars); |
155 | embedding_levels_list = g_new (guint8, n_chars); |
156 | |
157 | for (i = 0, p = text; p < text + length; p = g_utf8_next_char(p), i++) |
158 | { |
159 | gunichar ch = g_utf8_get_char (p); |
160 | FriBidiCharType char_type = fribidi_get_bidi_type (ch); |
161 | |
162 | if (i == n_chars) |
163 | break; |
164 | |
165 | bidi_types[i] = char_type; |
166 | ored_types |= char_type; |
167 | if (FRIBIDI_IS_STRONG (char_type)) |
168 | anded_strongs &= char_type; |
169 | if (G_UNLIKELY(bidi_types[i] == FRIBIDI_TYPE_ON)) |
170 | bracket_types[i] = fribidi_get_bracket (ch); |
171 | else |
172 | bracket_types[i] = FRIBIDI_NO_BRACKET; |
173 | } |
174 | |
175 | /* Short-circuit (malloc-expensive) FriBidi call for unidirectional |
176 | * text. |
177 | * |
178 | * For details see: |
179 | * https://bugzilla.gnome.org/show_bug.cgi?id=590183 |
180 | */ |
181 | |
182 | /* The case that all resolved levels will be ltr. |
183 | * No isolates, all strongs be LTR, there should be no Arabic numbers |
184 | * (or letters for that matter), and one of the following: |
185 | * |
186 | * o base_dir doesn't have an RTL taste. |
187 | * o there are letters, and base_dir is weak. |
188 | */ |
189 | if (!FRIBIDI_IS_ISOLATE (ored_types) && |
190 | !FRIBIDI_IS_RTL (ored_types) && |
191 | !FRIBIDI_IS_ARABIC (ored_types) && |
192 | (!FRIBIDI_IS_RTL (fribidi_base_dir) || |
193 | (FRIBIDI_IS_WEAK (fribidi_base_dir) && |
194 | FRIBIDI_IS_LETTER (ored_types)) |
195 | )) |
196 | { |
197 | /* all LTR */ |
198 | fribidi_base_dir = FRIBIDI_PAR_LTR; |
199 | memset (s: embedding_levels_list, c: 0, n: n_chars); |
200 | goto resolved; |
201 | } |
202 | /* The case that all resolved levels will be RTL is much more complex. |
203 | * No isolates, no numbers, all strongs are RTL, and one of |
204 | * the following: |
205 | * |
206 | * o base_dir has an RTL taste (may be weak). |
207 | * o there are letters, and base_dir is weak. |
208 | */ |
209 | else if (!FRIBIDI_IS_ISOLATE (ored_types) && |
210 | !FRIBIDI_IS_NUMBER (ored_types) && |
211 | FRIBIDI_IS_RTL (anded_strongs) && |
212 | (FRIBIDI_IS_RTL (fribidi_base_dir) || |
213 | (FRIBIDI_IS_WEAK (fribidi_base_dir) && |
214 | FRIBIDI_IS_LETTER (ored_types)) |
215 | )) |
216 | { |
217 | /* all RTL */ |
218 | fribidi_base_dir = FRIBIDI_PAR_RTL; |
219 | memset (s: embedding_levels_list, c: 1, n: n_chars); |
220 | goto resolved; |
221 | } |
222 | |
223 | |
224 | max_level = fribidi_get_par_embedding_levels_ex (bidi_types, bracket_types, len: n_chars, |
225 | pbase_dir: &fribidi_base_dir, |
226 | embedding_levels: (FriBidiLevel*)embedding_levels_list); |
227 | |
228 | if (G_UNLIKELY(max_level == 0)) |
229 | { |
230 | /* fribidi_get_par_embedding_levels() failed. */ |
231 | memset (s: embedding_levels_list, c: 0, n: length); |
232 | } |
233 | |
234 | resolved: |
235 | g_free (mem: bidi_types); |
236 | g_free (mem: bracket_types); |
237 | |
238 | *pbase_dir = (fribidi_base_dir == FRIBIDI_PAR_LTR) ? PANGO_DIRECTION_LTR : PANGO_DIRECTION_RTL; |
239 | |
240 | return embedding_levels_list; |
241 | } |
242 | |
243 | /** |
244 | * pango_unichar_direction: |
245 | * @ch: a Unicode character |
246 | * |
247 | * Determines the inherent direction of a character. |
248 | * |
249 | * The inherent direction is either `PANGO_DIRECTION_LTR`, `PANGO_DIRECTION_RTL`, |
250 | * or `PANGO_DIRECTION_NEUTRAL`. |
251 | * |
252 | * This function is useful to categorize characters into left-to-right |
253 | * letters, right-to-left letters, and everything else. If full Unicode |
254 | * bidirectional type of a character is needed, [func@Pango.BidiType.for_unichar] |
255 | * can be used instead. |
256 | * |
257 | * Return value: the direction of the character. |
258 | */ |
259 | PangoDirection |
260 | pango_unichar_direction (gunichar ch) |
261 | { |
262 | FriBidiCharType fribidi_ch_type; |
263 | |
264 | G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar)); |
265 | |
266 | fribidi_ch_type = fribidi_get_bidi_type (ch); |
267 | |
268 | if (!FRIBIDI_IS_STRONG (fribidi_ch_type)) |
269 | return PANGO_DIRECTION_NEUTRAL; |
270 | else if (FRIBIDI_IS_RTL (fribidi_ch_type)) |
271 | return PANGO_DIRECTION_RTL; |
272 | else |
273 | return PANGO_DIRECTION_LTR; |
274 | } |
275 | |
276 | |
277 | /** |
278 | * pango_get_mirror_char: |
279 | * @ch: a Unicode character |
280 | * @mirrored_ch: location to store the mirrored character |
281 | * |
282 | * Returns the mirrored character of a Unicode character. |
283 | * |
284 | * Mirror characters are determined by the Unicode mirrored property. |
285 | * |
286 | * Return value: %TRUE if @ch has a mirrored character and @mirrored_ch is |
287 | * filled in, %FALSE otherwise |
288 | * |
289 | * Deprecated: 1.30: Use [func@GLib.unichar_get_mirror_char] instead; |
290 | * the docs for that function provide full details. |
291 | */ |
292 | gboolean |
293 | pango_get_mirror_char (gunichar ch, |
294 | gunichar *mirrored_ch) |
295 | { |
296 | return g_unichar_get_mirror_char (ch, mirrored_ch); |
297 | } |
298 | |
299 | |