1 | /* Pango |
2 | * gen-script-for-lang.c: Utility program to generate pango-script-lang-table.h |
3 | * |
4 | * Copyright (C) 2003 Red Hat, Inc. |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public |
17 | * License along with this library; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 02111-1307, USA. |
20 | */ |
21 | |
22 | #include "config.h" |
23 | #include <stdarg.h> |
24 | #include <stdio.h> |
25 | #include <stdlib.h> |
26 | #include <string.h> |
27 | #include <time.h> |
28 | |
29 | #include <pango/pango-enum-types.h> |
30 | #include <pango/pango-script.h> |
31 | #include <pango/pango-types.h> |
32 | |
33 | #include <fontconfig/fontconfig.h> |
34 | |
35 | #define MAX_SCRIPTS 3 |
36 | |
37 | typedef struct { |
38 | GUnicodeScript script; |
39 | int freq; |
40 | } ScriptInfo; |
41 | |
42 | typedef struct { |
43 | PangoLanguage *lang; |
44 | ScriptInfo scripts[MAX_SCRIPTS]; |
45 | } LangInfo; |
46 | |
47 | static const char *get_script_name (GUnicodeScript script) |
48 | { |
49 | static GEnumClass *class = NULL; |
50 | GEnumValue *value; |
51 | if (!class) |
52 | class = g_type_class_ref (G_TYPE_UNICODE_SCRIPT); |
53 | |
54 | value = g_enum_get_value (enum_class: class, value: script); |
55 | g_assert (value); |
56 | |
57 | return value->value_name; |
58 | } |
59 | |
60 | static void fail (const char *format, ...) G_GNUC_PRINTF (1, 2) G_GNUC_NORETURN; |
61 | static void fail (const char *format, ...) |
62 | { |
63 | va_list vap; |
64 | |
65 | va_start (vap, format); |
66 | vfprintf (stderr, format: format, arg: vap); |
67 | va_end (vap); |
68 | |
69 | exit (status: 1); |
70 | } |
71 | |
72 | static void |
73 | script_for_char (gunichar ch, |
74 | LangInfo *info) |
75 | { |
76 | GUnicodeScript script = g_unichar_get_script (ch); |
77 | if (script != G_UNICODE_SCRIPT_COMMON && |
78 | script != G_UNICODE_SCRIPT_INHERITED) |
79 | { |
80 | int j; |
81 | |
82 | if (script == G_UNICODE_SCRIPT_UNKNOWN) |
83 | { |
84 | g_message ("Script unknown for U+%04X" , ch); |
85 | return; |
86 | } |
87 | |
88 | for (j = 0; j < MAX_SCRIPTS; j++) |
89 | { |
90 | if (info->scripts[j].script == script) |
91 | break; |
92 | if (info->scripts[j].script == G_UNICODE_SCRIPT_COMMON) |
93 | { |
94 | info->scripts[j].script = script; |
95 | break; |
96 | } |
97 | } |
98 | |
99 | if (j == MAX_SCRIPTS) |
100 | fail (format: "More than %d scripts found for %s. Increase MAX_SCRIPTS.\n" , MAX_SCRIPTS, pango_language_to_string (info->lang)); |
101 | |
102 | info->scripts[j].freq++; |
103 | } |
104 | } |
105 | |
106 | static void |
107 | scripts_for_lang (LangInfo *info) |
108 | { |
109 | const FcCharSet *charset; |
110 | FcChar32 ucs4, pos; |
111 | FcChar32 map[FC_CHARSET_MAP_SIZE]; |
112 | int i; |
113 | |
114 | charset = FcLangGetCharSet (lang: (const FcChar8 *) info->lang); |
115 | if (!charset) |
116 | return; |
117 | |
118 | for (ucs4 = FcCharSetFirstPage (a: charset, map, next: &pos); |
119 | ucs4 != FC_CHARSET_DONE; |
120 | ucs4 = FcCharSetNextPage (a: charset, map, next: &pos)) |
121 | { |
122 | |
123 | for (i = 0; i < FC_CHARSET_MAP_SIZE; i++) |
124 | { |
125 | FcChar32 bits = map[i]; |
126 | FcChar32 base = ucs4 + i * 32; |
127 | int b = 0; |
128 | bits = map[i]; |
129 | while (bits) |
130 | { |
131 | if (bits & 1) |
132 | script_for_char (ch: base + b, info); |
133 | |
134 | bits >>= 1; |
135 | b++; |
136 | } |
137 | } |
138 | } |
139 | } |
140 | |
141 | static void |
142 | do_lang (GArray *script_array, |
143 | const FcChar8 *lang) |
144 | { |
145 | LangInfo info; |
146 | int j; |
147 | |
148 | info.lang = pango_language_from_string (language: (const char *)lang); |
149 | |
150 | for (j = 0; j < MAX_SCRIPTS; j++) |
151 | { |
152 | info.scripts[j].script = G_UNICODE_SCRIPT_COMMON; |
153 | info.scripts[j].freq = 0; |
154 | } |
155 | |
156 | scripts_for_lang (info: &info); |
157 | |
158 | g_array_append_val (script_array, info); |
159 | } |
160 | |
161 | static int |
162 | compare_script (gconstpointer a, |
163 | gconstpointer b, |
164 | gpointer data) |
165 | { |
166 | const ScriptInfo *info_a = a; |
167 | const ScriptInfo *info_b = b; |
168 | G_GNUC_UNUSED LangInfo *lang_info = data; |
169 | |
170 | /* first compare frequencies, higher first */ |
171 | if (info_a->freq > info_b->freq) |
172 | return -1; |
173 | if (info_a->freq < info_b->freq) |
174 | return +1; |
175 | |
176 | /* next compare script indices, higher first (it's more specific) */ |
177 | if (info_a->script > info_b->script) |
178 | return -1; |
179 | if (info_a->script < info_b->script) |
180 | return +1; |
181 | |
182 | /* for stability, next compare pointers themselves, smaller first */ |
183 | if (info_a < info_b) |
184 | return -1; |
185 | if (info_a > info_b) |
186 | return +1; |
187 | |
188 | return 0; |
189 | } |
190 | |
191 | static int |
192 | compare_lang (gconstpointer a, |
193 | gconstpointer b) |
194 | { |
195 | const LangInfo *info_a = a; |
196 | const LangInfo *info_b = b; |
197 | |
198 | return strcmp (pango_language_to_string (info_a->lang), |
199 | pango_language_to_string (info_b->lang)); |
200 | } |
201 | |
202 | int main (void) |
203 | { |
204 | GArray *script_array; |
205 | |
206 | unsigned int i; |
207 | int j; |
208 | int max_lang_len = 0; |
209 | int max_script_len = 0; |
210 | |
211 | FcStrSet *langs_set; |
212 | FcStrList *langs; |
213 | FcChar8* lang; |
214 | |
215 | char date_buf[200]; |
216 | const char *date_str = "unknown" ; |
217 | time_t t; |
218 | struct tm *tmp; |
219 | int fc_version; |
220 | |
221 | script_array = g_array_new (FALSE, FALSE, element_size: sizeof (LangInfo)); |
222 | |
223 | |
224 | langs_set = FcGetLangs (); |
225 | langs = FcStrListCreate (set: langs_set); |
226 | FcStrSetDestroy (set: langs_set); |
227 | |
228 | while ((lang = FcStrListNext (list: langs))) |
229 | do_lang (script_array, lang); |
230 | |
231 | FcStrListDone (list: langs); |
232 | |
233 | |
234 | g_array_sort (array: script_array, compare_func: compare_lang); |
235 | |
236 | for (i = 0; i < script_array->len; i++) |
237 | { |
238 | LangInfo *info = &g_array_index (script_array, LangInfo, i); |
239 | |
240 | max_lang_len = MAX (max_lang_len, |
241 | (int)strlen (pango_language_to_string (info->lang))); |
242 | |
243 | g_qsort_with_data (pbase: info->scripts, |
244 | G_N_ELEMENTS (info->scripts), |
245 | size: sizeof (info->scripts[0]), |
246 | compare_func: compare_script, |
247 | user_data: info); |
248 | |
249 | for (j = 0; j < MAX_SCRIPTS; j++) |
250 | if (!info->scripts[j].freq) |
251 | break; |
252 | |
253 | max_script_len = MAX (max_script_len, j); |
254 | } |
255 | |
256 | if ((t = time(NULL), tmp = localtime (timer: &t)) && strftime(s: date_buf, maxsize: sizeof(date_buf), format: "%Y-%m-%d" , tp: tmp)) |
257 | date_str = date_buf; |
258 | |
259 | fc_version = FcGetVersion (); |
260 | |
261 | g_print (format: "/* pango-script-lang-table.h:\n" |
262 | " * \n" |
263 | " * Generated by %s\n" |
264 | " * Date: %s\n" |
265 | " * Source: fontconfig-%d.%d.%d\n" |
266 | " * \n" |
267 | " * Do not edit.\n" |
268 | " */\n" , |
269 | __FILE__, |
270 | date_str, |
271 | fc_version / 10000, (fc_version / 100) % 100, fc_version % 100); |
272 | |
273 | g_print (format: "typedef struct _PangoScriptForLang {\n" |
274 | " const char lang[%d];\n" |
275 | " GUnicodeScript scripts[%d];\n" |
276 | "} PangoScriptForLang;\n" |
277 | "\n" |
278 | "static const PangoScriptForLang pango_script_for_lang[] = {\n" , |
279 | max_lang_len + 1, |
280 | max_script_len); |
281 | |
282 | for (i = 0; i < script_array->len; i++) |
283 | { |
284 | LangInfo *info = &g_array_index (script_array, LangInfo, i); |
285 | |
286 | g_print (format: " { \"%s\", %*s{ " , |
287 | pango_language_to_string (info->lang), |
288 | (int)(max_lang_len - strlen (pango_language_to_string (info->lang))), "" ); |
289 | for (j = 0; j < MAX_SCRIPTS; j++) |
290 | { |
291 | if (!info->scripts[j].freq) |
292 | { |
293 | /* Avoid generating code with empty arrays */ |
294 | if (j == 0) |
295 | g_print (format: "0" ); |
296 | |
297 | break; |
298 | } |
299 | |
300 | if (j != 0) |
301 | g_print (format: ", " ); |
302 | g_print (format: "%s/*%d*/" , |
303 | get_script_name (script: info->scripts[j].script), |
304 | info->scripts[j].freq); |
305 | } |
306 | g_print (format: " } }" ); |
307 | if (i + 1 != script_array->len) |
308 | g_print (format: "," ); |
309 | g_print (format: "\n" ); |
310 | } |
311 | |
312 | g_print (format: "};\n" ); |
313 | |
314 | return 0; |
315 | } |
316 | |