1/* Pango
2 * pango-bidi-type.c: Bidirectional Character Types
3 *
4 * Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22#include "config.h"
23
24#include <string.h>
25
26#include <fribidi.h>
27
28#undef PANGO_DISABLE_DEPRECATED
29
30#include "pango-bidi-type.h"
31#include "pango-utils.h"
32
33/**
34 * pango_bidi_type_for_unichar:
35 * @ch: a Unicode character
36 *
37 * Determines the bidirectional type of a character.
38 *
39 * The bidirectional type is specified in the Unicode Character Database.
40 *
41 * A simplified version of this function is available as [func@unichar_direction].
42 *
43 * Return value: the bidirectional character type, as used in the
44 * Unicode bidirectional algorithm.
45 *
46 * Since: 1.22
47 */
48PangoBidiType
49pango_bidi_type_for_unichar (gunichar ch)
50{
51 FriBidiCharType fribidi_ch_type;
52
53 G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar));
54
55 fribidi_ch_type = fribidi_get_bidi_type (ch);
56
57 switch (fribidi_ch_type)
58 {
59 case FRIBIDI_TYPE_LTR: return PANGO_BIDI_TYPE_L;
60 case FRIBIDI_TYPE_LRE: return PANGO_BIDI_TYPE_LRE;
61 case FRIBIDI_TYPE_LRO: return PANGO_BIDI_TYPE_LRO;
62 case FRIBIDI_TYPE_RTL: return PANGO_BIDI_TYPE_R;
63 case FRIBIDI_TYPE_AL: return PANGO_BIDI_TYPE_AL;
64 case FRIBIDI_TYPE_RLE: return PANGO_BIDI_TYPE_RLE;
65 case FRIBIDI_TYPE_RLO: return PANGO_BIDI_TYPE_RLO;
66 case FRIBIDI_TYPE_PDF: return PANGO_BIDI_TYPE_PDF;
67 case FRIBIDI_TYPE_EN: return PANGO_BIDI_TYPE_EN;
68 case FRIBIDI_TYPE_ES: return PANGO_BIDI_TYPE_ES;
69 case FRIBIDI_TYPE_ET: return PANGO_BIDI_TYPE_ET;
70 case FRIBIDI_TYPE_AN: return PANGO_BIDI_TYPE_AN;
71 case FRIBIDI_TYPE_CS: return PANGO_BIDI_TYPE_CS;
72 case FRIBIDI_TYPE_NSM: return PANGO_BIDI_TYPE_NSM;
73 case FRIBIDI_TYPE_BN: return PANGO_BIDI_TYPE_BN;
74 case FRIBIDI_TYPE_BS: return PANGO_BIDI_TYPE_B;
75 case FRIBIDI_TYPE_SS: return PANGO_BIDI_TYPE_S;
76 case FRIBIDI_TYPE_WS: return PANGO_BIDI_TYPE_WS;
77 case FRIBIDI_TYPE_ON: return PANGO_BIDI_TYPE_ON;
78 case FRIBIDI_TYPE_LRI: return PANGO_BIDI_TYPE_LRI;
79 case FRIBIDI_TYPE_RLI: return PANGO_BIDI_TYPE_RLI;
80 case FRIBIDI_TYPE_FSI: return PANGO_BIDI_TYPE_FSI;
81 case FRIBIDI_TYPE_PDI: return PANGO_BIDI_TYPE_PDI;
82 case _FRIBIDI_TYPE_SENTINEL:
83 default:
84 return PANGO_BIDI_TYPE_ON;
85 }
86}
87
88/* Some bidi-related functions */
89
90/**
91 * pango_log2vis_get_embedding_levels:
92 * @text: the text to itemize.
93 * @length: the number of bytes (not characters) to process, or -1
94 * if @text is nul-terminated and the length should be calculated.
95 * @pbase_dir: input base direction, and output resolved direction.
96 *
97 * Return the bidirectional embedding levels of the input paragraph.
98 *
99 * The bidirectional embedding levels are defined by the [Unicode Bidirectional
100 * Algorithm](http://www.unicode.org/reports/tr9/).
101 *
102 * If the input base direction is a weak direction, the direction of the
103 * characters in the text will determine the final resolved direction.
104 *
105 * Return value: a newly allocated array of embedding levels, one item per
106 * character (not byte), that should be freed using [func@GLib.free].
107 *
108 * Since: 1.4
109 */
110guint8 *
111pango_log2vis_get_embedding_levels (const gchar *text,
112 int length,
113 PangoDirection *pbase_dir)
114{
115 glong n_chars, i;
116 guint8 *embedding_levels_list;
117 const gchar *p;
118 FriBidiParType fribidi_base_dir;
119 FriBidiCharType *bidi_types;
120 FriBidiBracketType *bracket_types;
121 FriBidiLevel max_level;
122 FriBidiCharType ored_types = 0;
123 FriBidiCharType anded_strongs = FRIBIDI_TYPE_RLE;
124
125 G_STATIC_ASSERT (sizeof (FriBidiLevel) == sizeof (guint8));
126 G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar));
127
128 switch (*pbase_dir)
129 {
130 case PANGO_DIRECTION_LTR:
131 case PANGO_DIRECTION_TTB_RTL:
132 fribidi_base_dir = FRIBIDI_PAR_LTR;
133 break;
134 case PANGO_DIRECTION_RTL:
135 case PANGO_DIRECTION_TTB_LTR:
136 fribidi_base_dir = FRIBIDI_PAR_RTL;
137 break;
138 case PANGO_DIRECTION_WEAK_RTL:
139 fribidi_base_dir = FRIBIDI_PAR_WRTL;
140 break;
141 case PANGO_DIRECTION_WEAK_LTR:
142 case PANGO_DIRECTION_NEUTRAL:
143 default:
144 fribidi_base_dir = FRIBIDI_PAR_WLTR;
145 break;
146 }
147
148 if (length < 0)
149 length = strlen (s: text);
150
151 n_chars = g_utf8_strlen (p: text, max: length);
152
153 bidi_types = g_new (FriBidiCharType, n_chars);
154 bracket_types = g_new (FriBidiBracketType, n_chars);
155 embedding_levels_list = g_new (guint8, n_chars);
156
157 for (i = 0, p = text; p < text + length; p = g_utf8_next_char(p), i++)
158 {
159 gunichar ch = g_utf8_get_char (p);
160 FriBidiCharType char_type = fribidi_get_bidi_type (ch);
161
162 if (i == n_chars)
163 break;
164
165 bidi_types[i] = char_type;
166 ored_types |= char_type;
167 if (FRIBIDI_IS_STRONG (char_type))
168 anded_strongs &= char_type;
169 if (G_UNLIKELY(bidi_types[i] == FRIBIDI_TYPE_ON))
170 bracket_types[i] = fribidi_get_bracket (ch);
171 else
172 bracket_types[i] = FRIBIDI_NO_BRACKET;
173 }
174
175 /* Short-circuit (malloc-expensive) FriBidi call for unidirectional
176 * text.
177 *
178 * For details see:
179 * https://bugzilla.gnome.org/show_bug.cgi?id=590183
180 */
181
182 /* The case that all resolved levels will be ltr.
183 * No isolates, all strongs be LTR, there should be no Arabic numbers
184 * (or letters for that matter), and one of the following:
185 *
186 * o base_dir doesn't have an RTL taste.
187 * o there are letters, and base_dir is weak.
188 */
189 if (!FRIBIDI_IS_ISOLATE (ored_types) &&
190 !FRIBIDI_IS_RTL (ored_types) &&
191 !FRIBIDI_IS_ARABIC (ored_types) &&
192 (!FRIBIDI_IS_RTL (fribidi_base_dir) ||
193 (FRIBIDI_IS_WEAK (fribidi_base_dir) &&
194 FRIBIDI_IS_LETTER (ored_types))
195 ))
196 {
197 /* all LTR */
198 fribidi_base_dir = FRIBIDI_PAR_LTR;
199 memset (s: embedding_levels_list, c: 0, n: n_chars);
200 goto resolved;
201 }
202 /* The case that all resolved levels will be RTL is much more complex.
203 * No isolates, no numbers, all strongs are RTL, and one of
204 * the following:
205 *
206 * o base_dir has an RTL taste (may be weak).
207 * o there are letters, and base_dir is weak.
208 */
209 else if (!FRIBIDI_IS_ISOLATE (ored_types) &&
210 !FRIBIDI_IS_NUMBER (ored_types) &&
211 FRIBIDI_IS_RTL (anded_strongs) &&
212 (FRIBIDI_IS_RTL (fribidi_base_dir) ||
213 (FRIBIDI_IS_WEAK (fribidi_base_dir) &&
214 FRIBIDI_IS_LETTER (ored_types))
215 ))
216 {
217 /* all RTL */
218 fribidi_base_dir = FRIBIDI_PAR_RTL;
219 memset (s: embedding_levels_list, c: 1, n: n_chars);
220 goto resolved;
221 }
222
223
224 max_level = fribidi_get_par_embedding_levels_ex (bidi_types, bracket_types, len: n_chars,
225 pbase_dir: &fribidi_base_dir,
226 embedding_levels: (FriBidiLevel*)embedding_levels_list);
227
228 if (G_UNLIKELY(max_level == 0))
229 {
230 /* fribidi_get_par_embedding_levels() failed. */
231 memset (s: embedding_levels_list, c: 0, n: length);
232 }
233
234resolved:
235 g_free (mem: bidi_types);
236 g_free (mem: bracket_types);
237
238 *pbase_dir = (fribidi_base_dir == FRIBIDI_PAR_LTR) ? PANGO_DIRECTION_LTR : PANGO_DIRECTION_RTL;
239
240 return embedding_levels_list;
241}
242
243/**
244 * pango_unichar_direction:
245 * @ch: a Unicode character
246 *
247 * Determines the inherent direction of a character.
248 *
249 * The inherent direction is either `PANGO_DIRECTION_LTR`, `PANGO_DIRECTION_RTL`,
250 * or `PANGO_DIRECTION_NEUTRAL`.
251 *
252 * This function is useful to categorize characters into left-to-right
253 * letters, right-to-left letters, and everything else. If full Unicode
254 * bidirectional type of a character is needed, [func@Pango.BidiType.for_unichar]
255 * can be used instead.
256 *
257 * Return value: the direction of the character.
258 */
259PangoDirection
260pango_unichar_direction (gunichar ch)
261{
262 FriBidiCharType fribidi_ch_type;
263
264 G_STATIC_ASSERT (sizeof (FriBidiChar) == sizeof (gunichar));
265
266 fribidi_ch_type = fribidi_get_bidi_type (ch);
267
268 if (!FRIBIDI_IS_STRONG (fribidi_ch_type))
269 return PANGO_DIRECTION_NEUTRAL;
270 else if (FRIBIDI_IS_RTL (fribidi_ch_type))
271 return PANGO_DIRECTION_RTL;
272 else
273 return PANGO_DIRECTION_LTR;
274}
275
276
277/**
278 * pango_get_mirror_char:
279 * @ch: a Unicode character
280 * @mirrored_ch: location to store the mirrored character
281 *
282 * Returns the mirrored character of a Unicode character.
283 *
284 * Mirror characters are determined by the Unicode mirrored property.
285 *
286 * Return value: %TRUE if @ch has a mirrored character and @mirrored_ch is
287 * filled in, %FALSE otherwise
288 *
289 * Deprecated: 1.30: Use [func@GLib.unichar_get_mirror_char] instead;
290 * the docs for that function provide full details.
291 */
292gboolean
293pango_get_mirror_char (gunichar ch,
294 gunichar *mirrored_ch)
295{
296 return g_unichar_get_mirror_char (ch, mirrored_ch);
297}
298
299

source code of gtk/subprojects/pango/pango/pango-bidi-type.c