1/* Pango
2 * break-indic.c:
3 *
4 * Copyright (C) 2006 Red Hat Software
5 * Author: Akira TAGOH <tagoh@redhat.com>
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 */
22
23#include "config.h"
24
25#include "pango-break.h"
26
27#define DEV_RRA 0x0931 /* 0930 + 093c */
28#define DEV_QA 0x0958 /* 0915 + 093c */
29#define DEV_YA 0x095F /* 092f + 003c */
30#define DEV_KHHA 0x0959
31#define DEV_GHHA 0x095A
32#define DEV_ZA 0x095B
33#define DEV_DDDHA 0x095C
34#define DEV_RHA 0x095D
35#define DEV_FA 0x095E
36#define DEV_YYA 0x095F
37
38/* Bengali */
39/* for split matras in all brahmi based script */
40#define BENGALI_SIGN_O 0x09CB /* 09c7 + 09be */
41#define BENGALI_SIGN_AU 0x09CC /* 09c7 + 09d7 */
42#define BENGALI_RRA 0x09DC
43#define BENGALI_RHA 0x09DD
44#define BENGALI_YYA 0x09DF
45
46/* Gurumukhi */
47#define GURUMUKHI_LLA 0x0A33
48#define GURUMUKHI_SHA 0x0A36
49#define GURUMUKHI_KHHA 0x0A59
50#define GURUMUKHI_GHHA 0x0A5A
51#define GURUMUKHI_ZA 0x0A5B
52#define GURUMUKHI_RRA 0x0A5C
53#define GURUMUKHI_FA 0x0A5E
54
55/* Oriya */
56#define ORIYA_AI 0x0B48
57#define ORIYA_O 0x0B4B
58#define ORIYA_AU 0x0B4C
59
60/* Telugu */
61#define TELUGU_EE 0x0C47
62#define TELUGU_AI 0x0C48
63
64/* Tamil */
65#define TAMIL_O 0x0BCA
66#define TAMIL_OO 0x0BCB
67#define TAMIL_AU 0x0BCC
68
69/* Kannada */
70#define KNDA_EE 0x0CC7
71#define KNDA_AI 0x0CC8
72#define KNDA_O 0x0CCA
73#define KNDA_OO 0x0CCB
74
75/* Malayalam */
76#define MLYM_O 0x0D4A
77#define MLYM_OO 0x0D4B
78#define MLYM_AU 0x0D4C
79
80#define IS_COMPOSITE_WITH_BRAHMI_NUKTA(c) ( \
81 (c >= BENGALI_RRA && c <= BENGALI_YYA) || \
82 (c >= DEV_QA && c <= DEV_YA) || (c == DEV_RRA) || (c >= DEV_KHHA && c <= DEV_YYA) || \
83 (c >= KNDA_EE && c <= KNDA_AI) ||(c >= KNDA_O && c <= KNDA_OO) || \
84 (c == TAMIL_O) || (c == TAMIL_OO) || (c == TAMIL_AU) || \
85 (c == TELUGU_EE) || (c == TELUGU_AI) || \
86 (c == ORIYA_AI) || (c == ORIYA_O) || (c == ORIYA_AU) || \
87 (c >= GURUMUKHI_KHHA && c <= GURUMUKHI_RRA) || (c == GURUMUKHI_FA)|| (c == GURUMUKHI_LLA)|| (c == GURUMUKHI_SHA) || \
88 FALSE)
89#define IS_SPLIT_MATRA_BRAHMI(c) ( \
90 (c == BENGALI_SIGN_O) || (c == BENGALI_SIGN_AU) || \
91 (c >= MLYM_O && c <= MLYM_AU) || \
92 FALSE)
93
94static void
95not_cursor_position (PangoLogAttr *attr)
96{
97 if (!attr->is_mandatory_break)
98 {
99 attr->is_cursor_position = FALSE;
100 attr->is_char_break = FALSE;
101 attr->is_line_break = FALSE;
102 attr->is_mandatory_break = FALSE;
103 }
104}
105
106static void
107break_indic (const char *text,
108 int length,
109 const PangoAnalysis *analysis,
110 PangoLogAttr *attrs,
111 int attrs_len G_GNUC_UNUSED)
112{
113 const gchar *p, *next = NULL, *next_next;
114 gunichar prev_wc, this_wc, next_wc, next_next_wc;
115 gboolean is_conjunct = FALSE;
116 int i;
117
118 for (p = text, prev_wc = 0, i = 0;
119 p != NULL && p < (text + length);
120 p = next, prev_wc = this_wc, i++)
121 {
122 this_wc = g_utf8_get_char (p);
123 next = g_utf8_next_char (p);
124
125 if (G_UNLIKELY (
126 IS_COMPOSITE_WITH_BRAHMI_NUKTA(this_wc) || IS_SPLIT_MATRA_BRAHMI(this_wc))) {
127 attrs[i+1].backspace_deletes_character = FALSE;
128 }
129
130 if (next != NULL && next < (text + length))
131 {
132 next_wc = g_utf8_get_char (p: next);
133 next_next = g_utf8_next_char (next);
134 }
135 else
136 {
137 next_wc = 0;
138 next_next = NULL;
139 }
140 if (next_next != NULL && next_next < (text + length))
141 next_next_wc = g_utf8_get_char (p: next_next);
142 else
143 next_next_wc = 0;
144
145 switch (analysis->script)
146 {
147 case PANGO_SCRIPT_SINHALA:
148 /*
149 * TODO: The cursor position should be based on the state table.
150 * This is the wrong place to be doing this.
151 */
152
153 /*
154 * The cursor should treat as a single glyph:
155 * SINHALA CONS + 0x0DCA + 0x200D + SINHALA CONS
156 * SINHALA CONS + 0x200D + 0x0DCA + SINHALA CONS
157 */
158 if ((this_wc == 0x0DCA && next_wc == 0x200D)
159 || (this_wc == 0x200D && next_wc == 0x0DCA))
160 {
161 not_cursor_position(attr: &attrs[i]);
162 not_cursor_position(attr: &attrs[i + 1]);
163 is_conjunct = TRUE;
164 }
165 else if (is_conjunct
166 && (prev_wc == 0x200D || prev_wc == 0x0DCA)
167 && this_wc >= 0x0D9A
168 && this_wc <= 0x0DC6)
169 {
170 not_cursor_position(attr: &attrs[i]);
171 is_conjunct = FALSE;
172 }
173 /*
174 * Consonant clusters do NOT result in implicit conjuncts
175 * in SINHALA orthography.
176 */
177 else if (!is_conjunct && prev_wc == 0x0DCA && this_wc != 0x200D)
178 {
179 attrs[i].is_cursor_position = TRUE;
180 }
181
182 break;
183
184 default:
185
186 if (prev_wc != 0 && (this_wc == 0x200D || this_wc == 0x200C))
187 {
188 not_cursor_position(attr: &attrs[i]);
189 if (next_wc != 0)
190 {
191 not_cursor_position(attr: &attrs[i+1]);
192 if ((next_next_wc != 0) &&
193 (next_wc == 0x09CD || /* Bengali */
194 next_wc == 0x0ACD || /* Gujarati */
195 next_wc == 0x094D || /* Hindi */
196 next_wc == 0x0CCD || /* Kannada */
197 next_wc == 0x0D4D || /* Malayalam */
198 next_wc == 0x0B4D || /* Oriya */
199 next_wc == 0x0A4D || /* Punjabi */
200 next_wc == 0x0BCD || /* Tamil */
201 next_wc == 0x0C4D)) /* Telugu */
202 {
203 not_cursor_position(attr: &attrs[i+2]);
204 }
205 }
206 }
207
208 break;
209 }
210 }
211}
212

source code of gtk/subprojects/pango/pango/break-indic.c