1 | /* Pango |
2 | * break-arabic.c: |
3 | * |
4 | * Copyright (C) 2006 Red Hat Software |
5 | * Copyright (C) 2006 Sharif FarsiWeb, Inc. |
6 | * Authors: Behdad Esfahbod <besfahbo@redhat.com> |
7 | * Roozbeh Pournader <roozbeh@farsiweb.info> |
8 | * |
9 | * This library is free software; you can redistribute it and/or |
10 | * modify it under the terms of the GNU Library General Public |
11 | * License as published by the Free Software Foundation; either |
12 | * version 2 of the License, or (at your option) any later version. |
13 | * |
14 | * This library is distributed in the hope that it will be useful, |
15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | * Library General Public License for more details. |
18 | * |
19 | * You should have received a copy of the GNU Library General Public |
20 | * License along with this library; if not, write to the |
21 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
22 | * Boston, MA 02111-1307, USA. |
23 | */ |
24 | |
25 | #include "config.h" |
26 | |
27 | #include "pango-break.h" |
28 | |
29 | #define ALEF_WITH_MADDA_ABOVE 0x0622 |
30 | #define YEH_WITH_HAMZA_ABOVE 0x0626 |
31 | #define ALEF 0x0627 |
32 | #define WAW 0x0648 |
33 | #define YEH 0x064A |
34 | |
35 | #define MADDAH_ABOVE 0x0653 |
36 | #define HAMZA_ABOVE 0x0654 |
37 | #define HAMZA_BELOW 0x0655 |
38 | |
39 | /* |
40 | * Arabic characters with canonical decompositions that are not just |
41 | * ligatures. The characters U+06C0, U+06C2, and U+06D3 are intentionally |
42 | * excluded as they are marked as "not an independent letter" in Unicode |
43 | * Character Database's NamesList.txt |
44 | */ |
45 | #define IS_COMPOSITE(c) (ALEF_WITH_MADDA_ABOVE <= (c) && (c) <= YEH_WITH_HAMZA_ABOVE) |
46 | |
47 | /* If a character is the second part of a composite Arabic character with an Alef */ |
48 | #define IS_COMPOSITE_WITH_ALEF(c) (MADDAH_ABOVE <= (c) && (c) <= HAMZA_BELOW) |
49 | |
50 | static void |
51 | break_arabic (const char *text, |
52 | int length, |
53 | const PangoAnalysis *analysis G_GNUC_UNUSED, |
54 | PangoLogAttr *attrs, |
55 | int attrs_len G_GNUC_UNUSED) |
56 | { |
57 | int i; |
58 | const char *p; |
59 | gunichar prev_wc, this_wc; |
60 | |
61 | /* See http://bugzilla.gnome.org/show_bug.cgi?id=350132 for issues this |
62 | * module tries to solve. |
63 | */ |
64 | |
65 | for (p = text, i = 0, prev_wc = 0; |
66 | p < text + length; |
67 | p = g_utf8_next_char (p), i++, prev_wc = this_wc) |
68 | { |
69 | this_wc = g_utf8_get_char (p); |
70 | |
71 | /* |
72 | * Unset backspace_deletes_character for various combinations. |
73 | * |
74 | * A few more combinations may need to be handled here, but are not |
75 | * handled yet, as expectations of users is not known or may differ |
76 | * among different languages or users: |
77 | * some letters combined with U+0658 ARABIC MARK NOON GHUNNA; |
78 | * combinations considered one letter in Azerbaijani (WAW+SUKUN and |
79 | * FARSI_YEH+HAMZA_ABOVE); combinations of YEH and ALEF_MAKSURA with |
80 | * HAMZA_BELOW (Qur'anic); TATWEEL+HAMZA_ABOVE (Qur'anic). |
81 | * |
82 | * FIXME: Ordering these in some other way may lower the time spent here, or not. |
83 | */ |
84 | if (G_UNLIKELY ( |
85 | IS_COMPOSITE (this_wc) || |
86 | (prev_wc == ALEF && IS_COMPOSITE_WITH_ALEF (this_wc)) || |
87 | (this_wc == HAMZA_ABOVE && (prev_wc == WAW || prev_wc == YEH)) |
88 | )) |
89 | attrs[i+1].backspace_deletes_character = FALSE; |
90 | } |
91 | } |
92 | |