1 | /* FriBidi |
2 | * fribidi-char-sets-cap-rtl.c - CapRTL character set conversion routines |
3 | * |
4 | * Authors: |
5 | * Behdad Esfahbod, 2001, 2002, 2004 |
6 | * Dov Grobgeld, 1999, 2000 |
7 | * |
8 | * Copyright (C) 2004 Sharif FarsiWeb, Inc |
9 | * Copyright (C) 2001,2002 Behdad Esfahbod |
10 | * Copyright (C) 1999,2000 Dov Grobgeld |
11 | * |
12 | * This library is free software; you can redistribute it and/or |
13 | * modify it under the terms of the GNU Lesser General Public |
14 | * License as published by the Free Software Foundation; either |
15 | * version 2.1 of the License, or (at your option) any later version. |
16 | * |
17 | * This library is distributed in the hope that it will be useful, |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 | * Lesser General Public License for more details. |
21 | * |
22 | * You should have received a copy of the GNU Lesser General Public License |
23 | * along with this library, in a file named COPYING; if not, write to the |
24 | * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
25 | * Boston, MA 02110-1301, USA |
26 | * |
27 | * For licensing issues, contact <fribidi.license@gmail.com>. |
28 | */ |
29 | |
30 | #include <common.h> |
31 | |
32 | #include <fribidi-char-sets-cap-rtl.h> |
33 | |
34 | #include <fribidi-unicode.h> |
35 | #include <fribidi-mirroring.h> |
36 | #include <fribidi-bidi-types.h> |
37 | |
38 | #include <bidi-types.h> |
39 | |
40 | #include <stdio.h> |
41 | |
42 | enum |
43 | { |
44 | # define _FRIBIDI_ADD_TYPE(TYPE,SYMBOL) TYPE = FRIBIDI_TYPE_##TYPE, |
45 | # include "fribidi-bidi-types-list.h" |
46 | # undef _FRIBIDI_ADD_TYPE |
47 | _FRIBIDI_MAX_TYPES_VALUE |
48 | }; |
49 | |
50 | enum |
51 | { |
52 | # define _FRIBIDI_ADD_TYPE(TYPE,SYMBOL) DUMMY_##TYPE, |
53 | # include "fribidi-bidi-types-list.h" |
54 | # undef _FRIBIDI_ADD_TYPE |
55 | _FRIBIDI_NUM_TYPES |
56 | }; |
57 | |
58 | static FriBidiCharType CapRTLCharTypes[] = { |
59 | /* *INDENT-OFF* */ |
60 | ON, ON, ON, ON, LTR,RTL,ON, ON, ON, ON, ON, ON, ON, BS, RLO,RLE, /* 00-0f */ |
61 | LRO,LRE,PDF,WS, LRI, RLI, FSI, PDI, ON, ON, ON, ON, ON, ON, ON, ON, /* 10-1f */ |
62 | WS, ON, ON, ON, ET, ON, ON, ON, ON, ON, ON, ET, CS, ON, ES, ES, /* 20-2f */ |
63 | EN, EN, EN, EN, EN, EN, AN, AN, AN, AN, CS, ON, ON, ON, ON, ON, /* 30-3f */ |
64 | RTL,AL, AL, AL, AL, AL, AL, RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL, /* 40-4f */ |
65 | RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,ON, BS, ON, BN, ON, /* 50-5f */ |
66 | NSM,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR, /* 60-6f */ |
67 | LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,ON, SS, ON, WS, ON, /* 70-7f */ |
68 | /* *INDENT-ON* */ |
69 | }; |
70 | |
71 | #define CAPRTL_CHARS (int)(sizeof CapRTLCharTypes / sizeof CapRTLCharTypes[0]) |
72 | |
73 | static FriBidiChar *caprtl_to_unicode = NULL; |
74 | |
75 | static void |
76 | init_cap_rtl ( |
77 | void |
78 | ) |
79 | { |
80 | int request[_FRIBIDI_NUM_TYPES]; |
81 | FriBidiCharType to_type[_FRIBIDI_NUM_TYPES]; |
82 | int num_types = 0, count = 0; |
83 | FriBidiCharType i; |
84 | char mark[CAPRTL_CHARS]; |
85 | |
86 | caprtl_to_unicode = |
87 | (FriBidiChar *) fribidi_malloc (CAPRTL_CHARS * |
88 | sizeof caprtl_to_unicode[0]); |
89 | for (i = 0; i < CAPRTL_CHARS; i++) |
90 | if (CapRTLCharTypes[i] == fribidi_get_bidi_type (ch: i)) |
91 | { |
92 | caprtl_to_unicode[i] = i; |
93 | mark[i] = 1; |
94 | } |
95 | else |
96 | { |
97 | int j; |
98 | |
99 | caprtl_to_unicode[i] = FRIBIDI_UNICODE_CHARS; |
100 | mark[i] = 0; |
101 | if (fribidi_get_mirror_char (ch: i, NULL)) |
102 | { |
103 | DBG ("warning: I could not map mirroring character map to itself in CapRTL" ); |
104 | } |
105 | |
106 | for (j = 0; j < num_types; j++) |
107 | if (to_type[j] == CapRTLCharTypes[i]) |
108 | break; |
109 | if (j == num_types) |
110 | { |
111 | num_types++; |
112 | to_type[j] = CapRTLCharTypes[i]; |
113 | request[j] = 0; |
114 | } |
115 | request[j]++; |
116 | count++; |
117 | } |
118 | for (i = 0; i < 0x10000 && count; i++) /* Assign BMP chars to CapRTL entries */ |
119 | if (!fribidi_get_mirror_char (ch: i, NULL) && !(i < CAPRTL_CHARS && mark[i])) |
120 | { |
121 | int j, k; |
122 | FriBidiCharType t = fribidi_get_bidi_type (ch: i); |
123 | for (j = 0; j < num_types; j++) |
124 | if (to_type[j] == t) |
125 | break; |
126 | if (j >= num_types || !request[j]) /* Do not need this type */ |
127 | continue; |
128 | for (k = 0; k < CAPRTL_CHARS; k++) |
129 | if (caprtl_to_unicode[k] == FRIBIDI_UNICODE_CHARS |
130 | && to_type[j] == CapRTLCharTypes[k]) |
131 | { |
132 | request[j]--; |
133 | count--; |
134 | caprtl_to_unicode[k] = i; |
135 | break; |
136 | } |
137 | } |
138 | if (count) |
139 | { |
140 | int j; |
141 | |
142 | DBG ("warning: could not find a mapping for CapRTL to Unicode:" ); |
143 | for (j = 0; j < num_types; j++) |
144 | if (request[j]) |
145 | { |
146 | DBG2 (" need this type: %s" , fribidi_get_bidi_type_name (to_type[j])); |
147 | } |
148 | } |
149 | } |
150 | |
151 | static char |
152 | fribidi_unicode_to_cap_rtl_c ( |
153 | /* input */ |
154 | FriBidiChar uch |
155 | ) |
156 | { |
157 | int i; |
158 | |
159 | if (!caprtl_to_unicode) |
160 | init_cap_rtl (); |
161 | |
162 | for (i = 0; i < CAPRTL_CHARS; i++) |
163 | if (uch == caprtl_to_unicode[i]) |
164 | return (unsigned char) i; |
165 | return '?'; |
166 | } |
167 | |
168 | FriBidiStrIndex |
169 | fribidi_cap_rtl_to_unicode ( |
170 | /* input */ |
171 | const char *s, |
172 | FriBidiStrIndex len, |
173 | /* output */ |
174 | FriBidiChar *us |
175 | ) |
176 | { |
177 | FriBidiStrIndex i, j; |
178 | |
179 | if (!caprtl_to_unicode) |
180 | init_cap_rtl (); |
181 | |
182 | j = 0; |
183 | for (i = 0; i < len; i++) |
184 | { |
185 | char ch; |
186 | |
187 | ch = s[i]; |
188 | if (ch == '_') |
189 | { |
190 | switch (ch = s[++i]) |
191 | { |
192 | case '>': |
193 | us[j++] = FRIBIDI_CHAR_LRM; |
194 | break; |
195 | case '<': |
196 | us[j++] = FRIBIDI_CHAR_RLM; |
197 | break; |
198 | case 'l': |
199 | us[j++] = FRIBIDI_CHAR_LRE; |
200 | break; |
201 | case 'r': |
202 | us[j++] = FRIBIDI_CHAR_RLE; |
203 | break; |
204 | case 'o': |
205 | us[j++] = FRIBIDI_CHAR_PDF; |
206 | break; |
207 | case 'L': |
208 | us[j++] = FRIBIDI_CHAR_LRO; |
209 | break; |
210 | case 'R': |
211 | us[j++] = FRIBIDI_CHAR_RLO; |
212 | break; |
213 | case 'i': |
214 | us[j++] = FRIBIDI_CHAR_LRI; |
215 | break; |
216 | case 'y': |
217 | us[j++] = FRIBIDI_CHAR_RLI; |
218 | break; |
219 | case 'f': |
220 | us[j++] = FRIBIDI_CHAR_FSI; |
221 | break; |
222 | case 'I': |
223 | us[j++] = FRIBIDI_CHAR_PDI; |
224 | break; |
225 | case '_': |
226 | us[j++] = '_'; |
227 | break; |
228 | default: |
229 | us[j++] = '_'; |
230 | i--; |
231 | break; |
232 | } |
233 | } |
234 | else |
235 | { |
236 | if ((int)s[i] < 0) |
237 | us[j++] = '?'; |
238 | else |
239 | us[j++] = caprtl_to_unicode[(int) s[i]]; |
240 | } |
241 | } |
242 | |
243 | return j; |
244 | } |
245 | |
246 | FriBidiStrIndex |
247 | fribidi_unicode_to_cap_rtl ( |
248 | /* input */ |
249 | const FriBidiChar *us, |
250 | FriBidiStrIndex len, |
251 | /* output */ |
252 | char *s |
253 | ) |
254 | { |
255 | FriBidiStrIndex i; |
256 | int j; |
257 | |
258 | j = 0; |
259 | for (i = 0; i < len; i++) |
260 | { |
261 | FriBidiChar ch = us[i]; |
262 | if (!FRIBIDI_IS_EXPLICIT (fribidi_get_bidi_type (ch)) |
263 | && !FRIBIDI_IS_ISOLATE (fribidi_get_bidi_type (ch)) |
264 | && ch != '_' && ch != FRIBIDI_CHAR_LRM && ch != FRIBIDI_CHAR_RLM) |
265 | s[j++] = fribidi_unicode_to_cap_rtl_c (uch: ch); |
266 | else |
267 | { |
268 | s[j++] = '_'; |
269 | switch (ch) |
270 | { |
271 | case FRIBIDI_CHAR_LRM: |
272 | s[j++] = '>'; |
273 | break; |
274 | case FRIBIDI_CHAR_RLM: |
275 | s[j++] = '<'; |
276 | break; |
277 | case FRIBIDI_CHAR_LRE: |
278 | s[j++] = 'l'; |
279 | break; |
280 | case FRIBIDI_CHAR_RLE: |
281 | s[j++] = 'r'; |
282 | break; |
283 | case FRIBIDI_CHAR_PDF: |
284 | s[j++] = 'o'; |
285 | break; |
286 | case FRIBIDI_CHAR_LRO: |
287 | s[j++] = 'L'; |
288 | break; |
289 | case FRIBIDI_CHAR_RLO: |
290 | s[j++] = 'R'; |
291 | break; |
292 | case FRIBIDI_CHAR_LRI: |
293 | s[j++] = 'i'; |
294 | break; |
295 | case FRIBIDI_CHAR_RLI: |
296 | s[j++] = 'y'; |
297 | break; |
298 | case FRIBIDI_CHAR_FSI: |
299 | s[j++] = 'f'; |
300 | break; |
301 | case FRIBIDI_CHAR_PDI: |
302 | s[j++] = 'I'; |
303 | break; |
304 | case '_': |
305 | s[j++] = '_'; |
306 | break; |
307 | default: |
308 | j--; |
309 | if (ch < 256) |
310 | s[j++] = fribidi_unicode_to_cap_rtl_c (uch: ch); |
311 | else |
312 | s[j++] = '?'; |
313 | break; |
314 | } |
315 | } |
316 | } |
317 | s[j] = 0; |
318 | |
319 | return j; |
320 | } |
321 | |
322 | const char * |
323 | fribidi_char_set_desc_cap_rtl ( |
324 | void |
325 | ) |
326 | { |
327 | static char *s = 0; |
328 | int l, i, j; |
329 | |
330 | if (s) |
331 | return s; |
332 | |
333 | l = 10000; |
334 | s = (char *) fribidi_malloc (size: l); |
335 | i = 0; |
336 | i += sprintf (s: s + i, /*l - i, */ |
337 | format: "CapRTL is a character set for testing with the reference\n" |
338 | "implementation, with explicit marks escape strings, and\n" |
339 | "the property that contains all unicode character types in\n" |
340 | "ASCII range 1-127.\n" |
341 | "\n" |
342 | "Warning: CapRTL character types are subject to change.\n" |
343 | "\n" "CapRTL's character types:\n" ); |
344 | for (j = 0; j < CAPRTL_CHARS; j++) |
345 | { |
346 | if (j % 4 == 0) |
347 | s[i++] = '\n'; |
348 | i += sprintf (s: s + i, /*l - i, */ format: " * 0x%02x %c%c %-3s " , j, |
349 | j < 0x20 ? '^' : ' ', |
350 | j < 0x20 ? j + '@' : j < 0x7f ? j : ' ', |
351 | fribidi_get_bidi_type_name (t: CapRTLCharTypes[j])); |
352 | } |
353 | i += sprintf (s: s + i, /*l - i, */ |
354 | format: "\n\n" |
355 | "Escape sequences:\n" |
356 | " Character `_' is used to escape explicit marks. The list is:\n" |
357 | " * _> LRM\n" " * _< RLM\n" |
358 | " * _l LRE\n" " * _r RLE\n" |
359 | " * _L LRO\n" " * _R RLO\n" |
360 | " * _o PDF\n" " * _i LRI\n" |
361 | " * _y RLI\n" " * _f FSI\n" |
362 | " * _I PDI\n" " * __ `_' itself\n" |
363 | "\n" ); |
364 | return s; |
365 | } |
366 | |
367 | /* Editor directions: |
368 | * vim:textwidth=78:tabstop=8:shiftwidth=2:autoindent:cindent |
369 | */ |
370 | |