1 | /* Pango |
2 | * break-indic.c: |
3 | * |
4 | * Copyright (C) 2006 Red Hat Software |
5 | * Author: Akira TAGOH <tagoh@redhat.com> |
6 | * |
7 | * This library is free software; you can redistribute it and/or |
8 | * modify it under the terms of the GNU Library General Public |
9 | * License as published by the Free Software Foundation; either |
10 | * version 2 of the License, or (at your option) any later version. |
11 | * |
12 | * This library is distributed in the hope that it will be useful, |
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | * Library General Public License for more details. |
16 | * |
17 | * You should have received a copy of the GNU Library General Public |
18 | * License along with this library; if not, write to the |
19 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
20 | * Boston, MA 02111-1307, USA. |
21 | */ |
22 | |
23 | #include "config.h" |
24 | |
25 | #include "pango-break.h" |
26 | |
27 | #define DEV_RRA 0x0931 /* 0930 + 093c */ |
28 | #define DEV_QA 0x0958 /* 0915 + 093c */ |
29 | #define DEV_YA 0x095F /* 092f + 003c */ |
30 | #define DEV_KHHA 0x0959 |
31 | #define DEV_GHHA 0x095A |
32 | #define DEV_ZA 0x095B |
33 | #define DEV_DDDHA 0x095C |
34 | #define DEV_RHA 0x095D |
35 | #define DEV_FA 0x095E |
36 | #define DEV_YYA 0x095F |
37 | |
38 | /* Bengali */ |
39 | /* for split matras in all brahmi based script */ |
40 | #define BENGALI_SIGN_O 0x09CB /* 09c7 + 09be */ |
41 | #define BENGALI_SIGN_AU 0x09CC /* 09c7 + 09d7 */ |
42 | #define BENGALI_RRA 0x09DC |
43 | #define BENGALI_RHA 0x09DD |
44 | #define BENGALI_YYA 0x09DF |
45 | |
46 | /* Gurumukhi */ |
47 | #define GURUMUKHI_LLA 0x0A33 |
48 | #define GURUMUKHI_SHA 0x0A36 |
49 | #define GURUMUKHI_KHHA 0x0A59 |
50 | #define GURUMUKHI_GHHA 0x0A5A |
51 | #define GURUMUKHI_ZA 0x0A5B |
52 | #define GURUMUKHI_RRA 0x0A5C |
53 | #define GURUMUKHI_FA 0x0A5E |
54 | |
55 | /* Oriya */ |
56 | #define ORIYA_AI 0x0B48 |
57 | #define ORIYA_O 0x0B4B |
58 | #define ORIYA_AU 0x0B4C |
59 | |
60 | /* Telugu */ |
61 | #define TELUGU_EE 0x0C47 |
62 | #define TELUGU_AI 0x0C48 |
63 | |
64 | /* Tamil */ |
65 | #define TAMIL_O 0x0BCA |
66 | #define TAMIL_OO 0x0BCB |
67 | #define TAMIL_AU 0x0BCC |
68 | |
69 | /* Kannada */ |
70 | #define KNDA_EE 0x0CC7 |
71 | #define KNDA_AI 0x0CC8 |
72 | #define KNDA_O 0x0CCA |
73 | #define KNDA_OO 0x0CCB |
74 | |
75 | /* Malayalam */ |
76 | #define MLYM_O 0x0D4A |
77 | #define MLYM_OO 0x0D4B |
78 | #define MLYM_AU 0x0D4C |
79 | |
80 | #define IS_COMPOSITE_WITH_BRAHMI_NUKTA(c) ( \ |
81 | (c >= BENGALI_RRA && c <= BENGALI_YYA) || \ |
82 | (c >= DEV_QA && c <= DEV_YA) || (c == DEV_RRA) || (c >= DEV_KHHA && c <= DEV_YYA) || \ |
83 | (c >= KNDA_EE && c <= KNDA_AI) ||(c >= KNDA_O && c <= KNDA_OO) || \ |
84 | (c == TAMIL_O) || (c == TAMIL_OO) || (c == TAMIL_AU) || \ |
85 | (c == TELUGU_EE) || (c == TELUGU_AI) || \ |
86 | (c == ORIYA_AI) || (c == ORIYA_O) || (c == ORIYA_AU) || \ |
87 | (c >= GURUMUKHI_KHHA && c <= GURUMUKHI_RRA) || (c == GURUMUKHI_FA)|| (c == GURUMUKHI_LLA)|| (c == GURUMUKHI_SHA) || \ |
88 | FALSE) |
89 | #define IS_SPLIT_MATRA_BRAHMI(c) ( \ |
90 | (c == BENGALI_SIGN_O) || (c == BENGALI_SIGN_AU) || \ |
91 | (c >= MLYM_O && c <= MLYM_AU) || \ |
92 | FALSE) |
93 | |
94 | static void |
95 | not_cursor_position (PangoLogAttr *attr) |
96 | { |
97 | if (!attr->is_mandatory_break) |
98 | { |
99 | attr->is_cursor_position = FALSE; |
100 | attr->is_char_break = FALSE; |
101 | attr->is_line_break = FALSE; |
102 | attr->is_mandatory_break = FALSE; |
103 | } |
104 | } |
105 | |
106 | static void |
107 | break_indic (const char *text, |
108 | int length, |
109 | const PangoAnalysis *analysis, |
110 | PangoLogAttr *attrs, |
111 | int attrs_len G_GNUC_UNUSED) |
112 | { |
113 | const gchar *p, *next = NULL, *next_next; |
114 | gunichar prev_wc, this_wc, next_wc, next_next_wc; |
115 | gboolean is_conjunct = FALSE; |
116 | int i; |
117 | |
118 | for (p = text, prev_wc = 0, i = 0; |
119 | p != NULL && p < (text + length); |
120 | p = next, prev_wc = this_wc, i++) |
121 | { |
122 | this_wc = g_utf8_get_char (p); |
123 | next = g_utf8_next_char (p); |
124 | |
125 | if (G_UNLIKELY ( |
126 | IS_COMPOSITE_WITH_BRAHMI_NUKTA(this_wc) || IS_SPLIT_MATRA_BRAHMI(this_wc))) { |
127 | attrs[i+1].backspace_deletes_character = FALSE; |
128 | } |
129 | |
130 | if (next != NULL && next < (text + length)) |
131 | { |
132 | next_wc = g_utf8_get_char (p: next); |
133 | next_next = g_utf8_next_char (next); |
134 | } |
135 | else |
136 | { |
137 | next_wc = 0; |
138 | next_next = NULL; |
139 | } |
140 | if (next_next != NULL && next_next < (text + length)) |
141 | next_next_wc = g_utf8_get_char (p: next_next); |
142 | else |
143 | next_next_wc = 0; |
144 | |
145 | switch (analysis->script) |
146 | { |
147 | case PANGO_SCRIPT_SINHALA: |
148 | /* |
149 | * TODO: The cursor position should be based on the state table. |
150 | * This is the wrong place to be doing this. |
151 | */ |
152 | |
153 | /* |
154 | * The cursor should treat as a single glyph: |
155 | * SINHALA CONS + 0x0DCA + 0x200D + SINHALA CONS |
156 | * SINHALA CONS + 0x200D + 0x0DCA + SINHALA CONS |
157 | */ |
158 | if ((this_wc == 0x0DCA && next_wc == 0x200D) |
159 | || (this_wc == 0x200D && next_wc == 0x0DCA)) |
160 | { |
161 | not_cursor_position(attr: &attrs[i]); |
162 | not_cursor_position(attr: &attrs[i + 1]); |
163 | is_conjunct = TRUE; |
164 | } |
165 | else if (is_conjunct |
166 | && (prev_wc == 0x200D || prev_wc == 0x0DCA) |
167 | && this_wc >= 0x0D9A |
168 | && this_wc <= 0x0DC6) |
169 | { |
170 | not_cursor_position(attr: &attrs[i]); |
171 | is_conjunct = FALSE; |
172 | } |
173 | /* |
174 | * Consonant clusters do NOT result in implicit conjuncts |
175 | * in SINHALA orthography. |
176 | */ |
177 | else if (!is_conjunct && prev_wc == 0x0DCA && this_wc != 0x200D) |
178 | { |
179 | attrs[i].is_cursor_position = TRUE; |
180 | } |
181 | |
182 | break; |
183 | |
184 | default: |
185 | |
186 | if (prev_wc != 0 && (this_wc == 0x200D || this_wc == 0x200C)) |
187 | { |
188 | not_cursor_position(attr: &attrs[i]); |
189 | if (next_wc != 0) |
190 | { |
191 | not_cursor_position(attr: &attrs[i+1]); |
192 | if ((next_next_wc != 0) && |
193 | (next_wc == 0x09CD || /* Bengali */ |
194 | next_wc == 0x0ACD || /* Gujarati */ |
195 | next_wc == 0x094D || /* Hindi */ |
196 | next_wc == 0x0CCD || /* Kannada */ |
197 | next_wc == 0x0D4D || /* Malayalam */ |
198 | next_wc == 0x0B4D || /* Oriya */ |
199 | next_wc == 0x0A4D || /* Punjabi */ |
200 | next_wc == 0x0BCD || /* Tamil */ |
201 | next_wc == 0x0C4D)) /* Telugu */ |
202 | { |
203 | not_cursor_position(attr: &attrs[i+2]); |
204 | } |
205 | } |
206 | } |
207 | |
208 | break; |
209 | } |
210 | } |
211 | } |
212 | |