1 | /* Pango |
2 | * pango-language.c: Language handling routines |
3 | * |
4 | * Copyright (C) 2000 Red Hat Software |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public |
17 | * License along with this library; if not, write to the |
18 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
19 | * Boston, MA 02111-1307, USA. |
20 | */ |
21 | |
22 | #include "config.h" |
23 | #include <errno.h> |
24 | #include <string.h> |
25 | #include <stdlib.h> |
26 | #include <math.h> |
27 | #include <locale.h> |
28 | |
29 | #include "pango-language.h" |
30 | #include "pango-impl-utils.h" |
31 | |
32 | #ifdef HAVE_CORE_TEXT |
33 | #include <CoreFoundation/CoreFoundation.h> |
34 | #endif /* HAVE_CORE_TEXT */ |
35 | |
36 | |
37 | /* We embed a private struct right *before* a where a PangoLanguage * |
38 | * points to. |
39 | */ |
40 | |
41 | typedef struct { |
42 | gconstpointer lang_info; |
43 | gconstpointer script_for_lang; |
44 | |
45 | int magic; /* Used for verification */ |
46 | } PangoLanguagePrivate; |
47 | |
48 | #define PANGO_LANGUAGE_PRIVATE_MAGIC 0x0BE4DAD0 |
49 | |
50 | static void |
51 | pango_language_private_init (PangoLanguagePrivate *priv) |
52 | { |
53 | priv->magic = PANGO_LANGUAGE_PRIVATE_MAGIC; |
54 | |
55 | priv->lang_info = (gconstpointer) -1; |
56 | priv->script_for_lang = (gconstpointer) -1; |
57 | } |
58 | |
59 | static PangoLanguagePrivate * pango_language_get_private (PangoLanguage *language) G_GNUC_CONST; |
60 | |
61 | static PangoLanguagePrivate * |
62 | pango_language_get_private (PangoLanguage *language) |
63 | { |
64 | PangoLanguagePrivate *priv; |
65 | |
66 | if (!language) |
67 | return NULL; |
68 | |
69 | priv = (PangoLanguagePrivate *)(void *)((char *)language - sizeof (PangoLanguagePrivate)); |
70 | |
71 | if (G_UNLIKELY (priv->magic != PANGO_LANGUAGE_PRIVATE_MAGIC)) |
72 | { |
73 | g_critical ("Invalid PangoLanguage. Did you pass in a straight string instead of calling pango_language_from_string()?" ); |
74 | return NULL; |
75 | } |
76 | |
77 | return priv; |
78 | } |
79 | |
80 | |
81 | |
82 | #define LANGUAGE_SEPARATORS ";:, \t" |
83 | |
84 | static const char canon_map[256] = { |
85 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
86 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
87 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0, |
88 | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0, |
89 | '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
90 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-', |
91 | 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', |
92 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0 |
93 | }; |
94 | |
95 | static gboolean |
96 | lang_equal (gconstpointer v1, |
97 | gconstpointer v2) |
98 | { |
99 | const guchar *p1 = v1; |
100 | const guchar *p2 = v2; |
101 | |
102 | while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2]) |
103 | { |
104 | p1++, p2++; |
105 | } |
106 | |
107 | return (canon_map[*p1] == canon_map[*p2]); |
108 | } |
109 | |
110 | static guint |
111 | lang_hash (gconstpointer key) |
112 | { |
113 | const guchar *p = key; |
114 | guint h = 0; |
115 | while (canon_map[*p]) |
116 | { |
117 | h = (h << 5) - h + canon_map[*p]; |
118 | p++; |
119 | } |
120 | |
121 | return h; |
122 | } |
123 | |
124 | static PangoLanguage * |
125 | pango_language_copy (PangoLanguage *language) |
126 | { |
127 | return language; /* language tags are const */ |
128 | } |
129 | |
130 | static void |
131 | pango_language_free (PangoLanguage *language G_GNUC_UNUSED) |
132 | { |
133 | return; /* nothing */ |
134 | } |
135 | |
136 | /** |
137 | * PangoLanguage: |
138 | * |
139 | * The `PangoLanguage` structure is used to |
140 | * represent a language. |
141 | * |
142 | * `PangoLanguage` pointers can be efficiently |
143 | * copied and compared with each other. |
144 | */ |
145 | G_DEFINE_BOXED_TYPE (PangoLanguage, pango_language, |
146 | pango_language_copy, |
147 | pango_language_free); |
148 | |
149 | /** |
150 | * _pango_get_lc_ctype: |
151 | * |
152 | * Return the Unix-style locale string for the language currently in |
153 | * effect. On Unix systems, this is the return value from |
154 | * `setlocale (LC_CTYPE, NULL)`, and the user can affect this through |
155 | * the environment variables LC_ALL, LC_CTYPE or LANG (checked |
156 | * in that order). The locale strings typically is in the form lang_COUNTRY, |
157 | * where lang is an ISO-639 language code, and COUNTRY is an ISO-3166 country |
158 | * code. For instance, sv_FI for Swedish as written in Finland or pt_BR for |
159 | * Portuguese as written in Brazil. |
160 | * |
161 | * On Windows, the C library doesn't use any such environment |
162 | * variables, and setting them won't affect the behavior of functions |
163 | * like ctime(). The user sets the locale through the Regional Options |
164 | * in the Control Panel. The C library (in the setlocale() function) |
165 | * does not use country and language codes, but country and language |
166 | * names spelled out in English. |
167 | * However, this function does check the above environment |
168 | * variables, and does return a Unix-style locale string based on |
169 | * either said environment variables or the thread's current locale. |
170 | * |
171 | * Return value: a dynamically allocated string, free with g_free(). |
172 | */ |
173 | static gchar * |
174 | _pango_get_lc_ctype (void) |
175 | { |
176 | #ifdef G_OS_WIN32 |
177 | /* Somebody might try to set the locale for this process using the |
178 | * LANG or LC_ environment variables. The Microsoft C library |
179 | * doesn't know anything about them. You set the locale in the |
180 | * Control Panel. Setting these env vars won't have any affect on |
181 | * locale-dependent C library functions like ctime(). But just for |
182 | * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes |
183 | * it easier to test GTK and Pango in various default languages, you |
184 | * don't have to clickety-click in the Control Panel, you can simply |
185 | * start the program with LC_ALL=something on the command line.) |
186 | */ |
187 | |
188 | gchar *p; |
189 | |
190 | p = getenv ("LC_ALL" ); |
191 | if (p != NULL) |
192 | return g_strdup (p); |
193 | |
194 | p = getenv ("LC_CTYPE" ); |
195 | if (p != NULL) |
196 | return g_strdup (p); |
197 | |
198 | p = getenv ("LANG" ); |
199 | if (p != NULL) |
200 | return g_strdup (p); |
201 | |
202 | return g_win32_getlocale (); |
203 | #elif defined(HAVE_CORE_TEXT) |
204 | CFArrayRef languages; |
205 | CFStringRef language; |
206 | gchar ret[16]; |
207 | gchar *p; |
208 | |
209 | /* Take the same approach as done for Windows above. First we check |
210 | * if somebody tried to set the locale through environment variables. |
211 | */ |
212 | p = getenv ("LC_ALL" ); |
213 | if (p != NULL) |
214 | return g_strdup (p); |
215 | |
216 | p = getenv ("LC_CTYPE" ); |
217 | if (p != NULL) |
218 | return g_strdup (p); |
219 | |
220 | p = getenv ("LANG" ); |
221 | if (p != NULL) |
222 | return g_strdup (p); |
223 | |
224 | /* If the environment variables are not set, determine the locale |
225 | * through the platform-native API. |
226 | */ |
227 | languages = CFLocaleCopyPreferredLanguages (); |
228 | language = CFArrayGetValueAtIndex (languages, 0); |
229 | |
230 | if (!CFStringGetCString (language, ret, 16, kCFStringEncodingUTF8)) |
231 | { |
232 | CFRelease (languages); |
233 | return g_strdup (setlocale (LC_CTYPE, NULL)); |
234 | } |
235 | |
236 | CFRelease (languages); |
237 | |
238 | return g_strdup (ret); |
239 | #else |
240 | { |
241 | gchar *lc_ctype = setlocale (LC_CTYPE, NULL); |
242 | |
243 | if (lc_ctype) |
244 | return g_strdup (str: lc_ctype); |
245 | else |
246 | return g_strdup (str: "C" ); |
247 | } |
248 | #endif |
249 | } |
250 | |
251 | /** |
252 | * pango_language_get_default: |
253 | * |
254 | * Returns the `PangoLanguage` for the current locale of the process. |
255 | * |
256 | * On Unix systems, this is the return value is derived from |
257 | * `setlocale (LC_CTYPE, NULL)`, and the user can |
258 | * affect this through the environment variables LC_ALL, LC_CTYPE or |
259 | * LANG (checked in that order). The locale string typically is in |
260 | * the form lang_COUNTRY, where lang is an ISO-639 language code, and |
261 | * COUNTRY is an ISO-3166 country code. For instance, sv_FI for |
262 | * Swedish as written in Finland or pt_BR for Portuguese as written in |
263 | * Brazil. |
264 | * |
265 | * On Windows, the C library does not use any such environment |
266 | * variables, and setting them won't affect the behavior of functions |
267 | * like ctime(). The user sets the locale through the Regional Options |
268 | * in the Control Panel. The C library (in the setlocale() function) |
269 | * does not use country and language codes, but country and language |
270 | * names spelled out in English. |
271 | * However, this function does check the above environment |
272 | * variables, and does return a Unix-style locale string based on |
273 | * either said environment variables or the thread's current locale. |
274 | * |
275 | * Your application should call `setlocale(LC_ALL, "")` for the user |
276 | * settings to take effect. GTK does this in its initialization |
277 | * functions automatically (by calling gtk_set_locale()). |
278 | * See the setlocale() manpage for more details. |
279 | * |
280 | * Note that the default language can change over the life of an application. |
281 | * |
282 | * Also note that this function will not do the right thing if you |
283 | * use per-thread locales with uselocale(). In that case, you should |
284 | * just call pango_language_from_string() yourself. |
285 | * |
286 | * Return value: (transfer none): the default language as a `PangoLanguage` |
287 | * |
288 | * Since: 1.16 |
289 | **/ |
290 | PangoLanguage * |
291 | pango_language_get_default (void) |
292 | { |
293 | static PangoLanguage *result = NULL; /* MT-safe */ |
294 | |
295 | if (g_once_init_enter (&result)) |
296 | { |
297 | gchar *lc_ctype; |
298 | PangoLanguage *lang; |
299 | |
300 | lc_ctype = _pango_get_lc_ctype (); |
301 | lang = pango_language_from_string (language: lc_ctype); |
302 | g_free (mem: lc_ctype); |
303 | |
304 | g_once_init_leave (&result, lang); |
305 | } |
306 | |
307 | return result; |
308 | } |
309 | |
310 | /** |
311 | * pango_language_from_string: |
312 | * @language: (nullable): a string representing a language tag |
313 | * |
314 | * Convert a language tag to a `PangoLanguage`. |
315 | * |
316 | * The language tag must be in a RFC-3066 format. `PangoLanguage` pointers |
317 | * can be efficiently copied (copy the pointer) and compared with other |
318 | * language tags (compare the pointer.) |
319 | * |
320 | * This function first canonicalizes the string by converting it to |
321 | * lowercase, mapping '_' to '-', and stripping all characters other |
322 | * than letters and '-'. |
323 | * |
324 | * Use [func@Pango.Language.get_default] if you want to get the |
325 | * `PangoLanguage` for the current locale of the process. |
326 | * |
327 | * Return value: (transfer none) (nullable): a `PangoLanguage` |
328 | */ |
329 | PangoLanguage * |
330 | pango_language_from_string (const char *language) |
331 | { |
332 | G_LOCK_DEFINE_STATIC (lang_from_string); |
333 | static GHashTable *hash = NULL; /* MT-safe */ |
334 | PangoLanguagePrivate *priv; |
335 | char *result; |
336 | int len; |
337 | char *p; |
338 | |
339 | if (language == NULL) |
340 | return NULL; |
341 | |
342 | G_LOCK (lang_from_string); |
343 | |
344 | if (G_UNLIKELY (!hash)) |
345 | hash = g_hash_table_new (hash_func: lang_hash, key_equal_func: lang_equal); |
346 | else |
347 | { |
348 | result = g_hash_table_lookup (hash_table: hash, key: language); |
349 | if (result) |
350 | goto out; |
351 | } |
352 | |
353 | len = strlen (s: language); |
354 | priv = g_malloc0 (n_bytes: sizeof (PangoLanguagePrivate) + len + 1); |
355 | g_assert (priv); |
356 | |
357 | result = (char *)priv; |
358 | result += sizeof (PangoLanguagePrivate); |
359 | |
360 | pango_language_private_init (priv); |
361 | |
362 | p = result; |
363 | while ((*(p++) = canon_map[*(guchar *)language++])) |
364 | ; |
365 | |
366 | g_hash_table_insert (hash_table: hash, key: result, value: result); |
367 | |
368 | out: |
369 | G_UNLOCK (lang_from_string); |
370 | |
371 | return (PangoLanguage *)result; |
372 | } |
373 | |
374 | /** |
375 | * pango_language_to_string: |
376 | * @language: a language tag. |
377 | * |
378 | * Gets the RFC-3066 format string representing the given language tag. |
379 | * |
380 | * Returns (transfer none): a string representing the language tag |
381 | */ |
382 | const char * |
383 | (pango_language_to_string) (PangoLanguage *language) |
384 | { |
385 | return pango_language_to_string (language); |
386 | } |
387 | |
388 | /** |
389 | * pango_language_matches: |
390 | * @language: (nullable): a language tag (see [func@Pango.Language.from_string]), |
391 | * %NULL is allowed and matches nothing but '*' |
392 | * @range_list: a list of language ranges, separated by ';', ':', |
393 | * ',', or space characters. |
394 | * Each element must either be '*', or a RFC 3066 language range |
395 | * canonicalized as by [func@Pango.Language.from_string] |
396 | * |
397 | * Checks if a language tag matches one of the elements in a list of |
398 | * language ranges. |
399 | * |
400 | * A language tag is considered to match a range in the list if the |
401 | * range is '*', the range is exactly the tag, or the range is a prefix |
402 | * of the tag, and the character after it in the tag is '-'. |
403 | * |
404 | * Return value: %TRUE if a match was found |
405 | */ |
406 | gboolean |
407 | pango_language_matches (PangoLanguage *language, |
408 | const char *range_list) |
409 | { |
410 | const char *lang_str = pango_language_to_string (language); |
411 | const char *p = range_list; |
412 | gboolean done = FALSE; |
413 | |
414 | while (!done) |
415 | { |
416 | const char *end = strpbrk (s: p, LANGUAGE_SEPARATORS); |
417 | if (!end) |
418 | { |
419 | end = p + strlen (s: p); |
420 | done = TRUE; |
421 | } |
422 | |
423 | if (strncmp (s1: p, s2: "*" , n: 1) == 0 || |
424 | (lang_str && strncmp (s1: lang_str, s2: p, n: end - p) == 0 && |
425 | (lang_str[end - p] == '\0' || lang_str[end - p] == '-'))) |
426 | return TRUE; |
427 | |
428 | if (!done) |
429 | p = end + 1; |
430 | } |
431 | |
432 | return FALSE; |
433 | } |
434 | |
435 | static int |
436 | lang_compare_first_component (gconstpointer pa, |
437 | gconstpointer pb) |
438 | { |
439 | const char *a = pa, *b = pb; |
440 | unsigned int da, db; |
441 | const char *p; |
442 | |
443 | p = strstr (haystack: a, needle: "-" ); |
444 | da = p ? (unsigned int) (p - a) : strlen (s: a); |
445 | |
446 | p = strstr (haystack: b, needle: "-" ); |
447 | db = p ? (unsigned int) (p - b) : strlen (s: b); |
448 | |
449 | return strncmp (s1: a, s2: b, MAX (da, db)); |
450 | } |
451 | |
452 | /* Finds the best record for @language in an array of records. |
453 | * Each record should start with the string representation of the language |
454 | * code for the record (embedded, not a pointer), and the records must be |
455 | * sorted on language code. |
456 | */ |
457 | static gconstpointer |
458 | find_best_lang_match (PangoLanguage *language, |
459 | gconstpointer records, |
460 | guint num_records, |
461 | guint record_size) |
462 | { |
463 | const char *lang_str; |
464 | const char *record, *start, *end; |
465 | |
466 | if (language == NULL) |
467 | return NULL; |
468 | |
469 | lang_str = pango_language_to_string (language); |
470 | |
471 | record = bsearch (key: lang_str, |
472 | base: records, nmemb: num_records, size: record_size, |
473 | compar: lang_compare_first_component); |
474 | if (!record) |
475 | return NULL; |
476 | |
477 | start = (const char *) records; |
478 | end = start + num_records * record_size; |
479 | |
480 | /* find the best match among all those that have the same first-component */ |
481 | |
482 | /* go to the final one matching in the first component */ |
483 | while (record < end - record_size && |
484 | lang_compare_first_component (pa: lang_str, pb: record + record_size) == 0) |
485 | record += record_size; |
486 | |
487 | /* go back, find which one matches completely */ |
488 | while (start <= record && |
489 | lang_compare_first_component (pa: lang_str, pb: record) == 0) |
490 | { |
491 | if (pango_language_matches (language, range_list: record)) |
492 | return record; |
493 | |
494 | record -= record_size; |
495 | } |
496 | |
497 | return NULL; |
498 | } |
499 | |
500 | static gconstpointer |
501 | find_best_lang_match_cached (PangoLanguage *language, |
502 | gconstpointer *cache, |
503 | gconstpointer records, |
504 | guint num_records, |
505 | guint record_size) |
506 | { |
507 | gconstpointer result; |
508 | |
509 | if (G_LIKELY (cache && *cache != (gconstpointer) -1)) |
510 | return *cache; |
511 | |
512 | result = find_best_lang_match (language, |
513 | records, |
514 | num_records, |
515 | record_size); |
516 | |
517 | if (cache) |
518 | *cache = result; |
519 | |
520 | return result; |
521 | } |
522 | |
523 | #define FIND_BEST_LANG_MATCH_CACHED(language, cache_key, records) \ |
524 | find_best_lang_match_cached ((language), \ |
525 | pango_language_get_private (language) ? \ |
526 | &(pango_language_get_private (language)->cache_key) : NULL, \ |
527 | records, \ |
528 | G_N_ELEMENTS (records), \ |
529 | sizeof (*records)); |
530 | |
531 | typedef struct { |
532 | char lang[6]; |
533 | guint16 offset; |
534 | } LangInfo; |
535 | |
536 | /* Pure black magic, based on appendix of dsohowto.pdf */ |
537 | #define POOLSTRFIELD(line) POOLSTRFIELD1(line) |
538 | #define POOLSTRFIELD1(line) str##line |
539 | struct _LangPoolStruct { |
540 | char str0[1]; |
541 | #define LANGUAGE(id, source, sample) char POOLSTRFIELD(__LINE__)[sizeof(sample)]; |
542 | #include "pango-language-sample-table.h" |
543 | #undef LANGUAGE |
544 | }; |
545 | |
546 | static const union _LangPool { |
547 | struct _LangPoolStruct lang_pool_struct; |
548 | const char str[1]; |
549 | } lang_pool = { { |
550 | "" , |
551 | #define LANGUAGE(id, source, sample) sample, |
552 | #include "pango-language-sample-table.h" |
553 | #undef LANGUAGE |
554 | } }; |
555 | static const LangInfo lang_texts[] = { |
556 | #define LANGUAGE(id, source, sample) {G_STRINGIFY(id), G_STRUCT_OFFSET(struct _LangPoolStruct, POOLSTRFIELD(__LINE__))}, |
557 | #include "pango-language-sample-table.h" |
558 | #undef LANGUAGE |
559 | /* One extra entry with no final comma, to make it C89-happy */ |
560 | {"~~" , 0} |
561 | }; |
562 | |
563 | /** |
564 | * pango_language_get_sample_string: |
565 | * @language: (nullable): a `PangoLanguage` |
566 | * |
567 | * Get a string that is representative of the characters needed to |
568 | * render a particular language. |
569 | * |
570 | * The sample text may be a pangram, but is not necessarily. It is chosen |
571 | * to be demonstrative of normal text in the language, as well as exposing |
572 | * font feature requirements unique to the language. It is suitable for use |
573 | * as sample text in a font selection dialog. |
574 | * |
575 | * If @language is %NULL, the default language as found by |
576 | * [func@Pango.Language.get_default] is used. |
577 | * |
578 | * If Pango does not have a sample string for @language, the classic |
579 | * "The quick brown fox..." is returned. This can be detected by |
580 | * comparing the returned pointer value to that returned for (non-existent) |
581 | * language code "xx". That is, compare to: |
582 | * |
583 | * ``` |
584 | * pango_language_get_sample_string (pango_language_from_string ("xx")) |
585 | * ``` |
586 | * |
587 | * Return value: (transfer none): the sample string |
588 | */ |
589 | const char * |
590 | pango_language_get_sample_string (PangoLanguage *language) |
591 | { |
592 | const LangInfo *lang_info; |
593 | |
594 | if (!language) |
595 | language = pango_language_get_default (); |
596 | |
597 | lang_info = FIND_BEST_LANG_MATCH_CACHED (language, |
598 | lang_info, |
599 | lang_texts); |
600 | |
601 | if (lang_info) |
602 | return lang_pool.str + lang_info->offset; |
603 | |
604 | return "The quick brown fox jumps over the lazy dog." ; |
605 | } |
606 | |
607 | |
608 | |
609 | |
610 | /* |
611 | * From language to script |
612 | */ |
613 | |
614 | |
615 | #include "pango-script-lang-table.h" |
616 | |
617 | /** |
618 | * pango_language_get_scripts: |
619 | * @language: (nullable): a `PangoLanguage` |
620 | * @num_scripts: (out caller-allocates) (optional): location to |
621 | * return number of scripts |
622 | * |
623 | * Determines the scripts used to to write @language. |
624 | * |
625 | * If nothing is known about the language tag @language, |
626 | * or if @language is %NULL, then %NULL is returned. |
627 | * The list of scripts returned starts with the script that the |
628 | * language uses most and continues to the one it uses least. |
629 | * |
630 | * The value @num_script points at will be set to the number |
631 | * of scripts in the returned array (or zero if %NULL is returned). |
632 | * |
633 | * Most languages use only one script for writing, but there are |
634 | * some that use two (Latin and Cyrillic for example), and a few |
635 | * use three (Japanese for example). Applications should not make |
636 | * any assumptions on the maximum number of scripts returned |
637 | * though, except that it is positive if the return value is not |
638 | * %NULL, and it is a small number. |
639 | * |
640 | * The [method@Pango.Language.includes_script] function uses this |
641 | * function internally. |
642 | * |
643 | * Note: while the return value is declared as `PangoScript`, the |
644 | * returned values are from the `GUnicodeScript` enumeration, which |
645 | * may have more values. Callers need to handle unknown values. |
646 | * |
647 | * Return value: (transfer none) (array length=num_scripts) (nullable): |
648 | * An array of `PangoScript` values, with the number of entries in |
649 | * the array stored in @num_scripts, or %NULL if Pango does not have |
650 | * any information about this particular language tag (also the case |
651 | * if @language is %NULL). |
652 | * |
653 | * Since: 1.22 |
654 | */ |
655 | const PangoScript * |
656 | pango_language_get_scripts (PangoLanguage *language, |
657 | int *num_scripts) |
658 | { |
659 | const PangoScriptForLang *script_for_lang; |
660 | unsigned int j; |
661 | |
662 | script_for_lang = FIND_BEST_LANG_MATCH_CACHED (language, |
663 | script_for_lang, |
664 | pango_script_for_lang); |
665 | |
666 | if (!script_for_lang || script_for_lang->scripts[0] == 0) |
667 | { |
668 | if (num_scripts) |
669 | *num_scripts = 0; |
670 | |
671 | return NULL; |
672 | } |
673 | |
674 | if (num_scripts) |
675 | { |
676 | for (j = 0; j < G_N_ELEMENTS (script_for_lang->scripts); j++) |
677 | if (script_for_lang->scripts[j] == 0) |
678 | break; |
679 | |
680 | g_assert (j > 0); |
681 | |
682 | *num_scripts = j; |
683 | } |
684 | |
685 | return (const PangoScript *) script_for_lang->scripts; |
686 | } |
687 | |
688 | /** |
689 | * pango_language_includes_script: |
690 | * @language: (nullable): a `PangoLanguage` |
691 | * @script: a `PangoScript` |
692 | * |
693 | * Determines if @script is one of the scripts used to |
694 | * write @language. |
695 | * |
696 | * The returned value is conservative; if nothing is known about |
697 | * the language tag @language, %TRUE will be returned, since, as |
698 | * far as Pango knows, @script might be used to write @language. |
699 | * |
700 | * This routine is used in Pango's itemization process when |
701 | * determining if a supplied language tag is relevant to |
702 | * a particular section of text. It probably is not useful |
703 | * for applications in most circumstances. |
704 | * |
705 | * This function uses [method@Pango.Language.get_scripts] internally. |
706 | * |
707 | * Return value: %TRUE if @script is one of the scripts used |
708 | * to write @language or if nothing is known about @language |
709 | * (including the case that @language is %NULL), %FALSE otherwise. |
710 | * |
711 | * Since: 1.4 |
712 | */ |
713 | gboolean |
714 | pango_language_includes_script (PangoLanguage *language, |
715 | PangoScript script) |
716 | { |
717 | const PangoScript *scripts; |
718 | int num_scripts, j; |
719 | |
720 | /* copied from the one in pango-script.c */ |
721 | #define REAL_SCRIPT(script) \ |
722 | ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN) |
723 | |
724 | if (!REAL_SCRIPT (script)) |
725 | return TRUE; |
726 | |
727 | #undef REAL_SCRIPT |
728 | |
729 | scripts = pango_language_get_scripts (language, num_scripts: &num_scripts); |
730 | if (!scripts) |
731 | return TRUE; |
732 | |
733 | for (j = 0; j < num_scripts; j++) |
734 | if (scripts[j] == script) |
735 | return TRUE; |
736 | |
737 | return FALSE; |
738 | } |
739 | |
740 | |
741 | |
742 | |
743 | /* |
744 | * From script to language |
745 | */ |
746 | |
747 | |
748 | static PangoLanguage ** |
749 | parse_default_languages (void) |
750 | { |
751 | char *p, *p_copy; |
752 | gboolean done = FALSE; |
753 | GPtrArray *langs; |
754 | |
755 | p = getenv (name: "PANGO_LANGUAGE" ); |
756 | |
757 | if (p == NULL) |
758 | p = getenv (name: "LANGUAGE" ); |
759 | |
760 | if (p == NULL) |
761 | return NULL; |
762 | |
763 | p_copy = p = g_strdup (str: p); |
764 | |
765 | langs = g_ptr_array_new (); |
766 | |
767 | while (!done) |
768 | { |
769 | char *end = strpbrk (s: p, LANGUAGE_SEPARATORS); |
770 | if (!end) |
771 | { |
772 | end = p + strlen (s: p); |
773 | done = TRUE; |
774 | } |
775 | else |
776 | *end = '\0'; |
777 | |
778 | /* skip empty languages, and skip the language 'C' */ |
779 | if (p != end && !(p + 1 == end && *p == 'C')) |
780 | { |
781 | PangoLanguage *l = pango_language_from_string (language: p); |
782 | |
783 | g_ptr_array_add (array: langs, data: l); |
784 | } |
785 | |
786 | if (!done) |
787 | p = end + 1; |
788 | } |
789 | |
790 | g_ptr_array_add (array: langs, NULL); |
791 | |
792 | g_free (mem: p_copy); |
793 | |
794 | return (PangoLanguage **) g_ptr_array_free (array: langs, FALSE); |
795 | } |
796 | |
797 | G_LOCK_DEFINE_STATIC (languages); |
798 | static gboolean initialized = FALSE; /* MT-safe */ |
799 | static PangoLanguage * const * languages = NULL; /* MT-safe */ |
800 | static GHashTable *hash = NULL; /* MT-safe */ |
801 | |
802 | static PangoLanguage * |
803 | _pango_script_get_default_language (PangoScript script) |
804 | { |
805 | PangoLanguage *result, * const * p; |
806 | |
807 | G_LOCK (languages); |
808 | |
809 | if (G_UNLIKELY (!initialized)) |
810 | { |
811 | languages = parse_default_languages (); |
812 | |
813 | if (languages) |
814 | hash = g_hash_table_new (NULL, NULL); |
815 | |
816 | initialized = TRUE; |
817 | } |
818 | |
819 | if (!languages) |
820 | { |
821 | result = NULL; |
822 | goto out; |
823 | } |
824 | |
825 | if (g_hash_table_lookup_extended (hash_table: hash, GINT_TO_POINTER (script), NULL, value: (gpointer *) (gpointer) &result)) |
826 | goto out; |
827 | |
828 | for (p = languages; *p; p++) |
829 | if (pango_language_includes_script (language: *p, script)) |
830 | break; |
831 | result = *p; |
832 | |
833 | g_hash_table_insert (hash_table: hash, GINT_TO_POINTER (script), value: result); |
834 | |
835 | out: |
836 | G_UNLOCK (languages); |
837 | |
838 | return result; |
839 | } |
840 | |
841 | /** |
842 | * pango_language_get_preferred: |
843 | * |
844 | * Returns the list of languages that the user prefers. |
845 | * |
846 | * The list is specified by the `PANGO_LANGUAGE` or `LANGUAGE` |
847 | * environment variables, in order of preference. Note that this |
848 | * list does not necessarily include the language returned by |
849 | * [func@Pango.Language.get_default]. |
850 | * |
851 | * When choosing language-specific resources, such as the sample |
852 | * text returned by [method@Pango.Language.get_sample_string], |
853 | * you should first try the default language, followed by the |
854 | * languages returned by this function. |
855 | * |
856 | * Returns: (transfer none) (nullable): a %NULL-terminated array |
857 | * of `PangoLanguage`* |
858 | * |
859 | * Since: 1.48 |
860 | */ |
861 | PangoLanguage ** |
862 | pango_language_get_preferred (void) |
863 | { |
864 | /* We call this just for its side-effect of initializing languages */ |
865 | _pango_script_get_default_language (script: PANGO_SCRIPT_COMMON); |
866 | |
867 | return (PangoLanguage **) languages; |
868 | } |
869 | |
870 | /** |
871 | * pango_script_get_sample_language: |
872 | * @script: a `PangoScript` |
873 | * |
874 | * Finds a language tag that is reasonably representative of @script. |
875 | * |
876 | * The language will usually be the most widely spoken or used language |
877 | * written in that script: for instance, the sample language for |
878 | * %PANGO_SCRIPT_CYRILLIC is ru (Russian), the sample language for |
879 | * %PANGO_SCRIPT_ARABIC is ar. |
880 | * |
881 | * For some scripts, no sample language will be returned because |
882 | * there is no language that is sufficiently representative. The |
883 | * best example of this is %PANGO_SCRIPT_HAN, where various different |
884 | * variants of written Chinese, Japanese, and Korean all use |
885 | * significantly different sets of Han characters and forms |
886 | * of shared characters. No sample language can be provided |
887 | * for many historical scripts as well. |
888 | * |
889 | * As of 1.18, this function checks the environment variables |
890 | * `PANGO_LANGUAGE` and `LANGUAGE` (checked in that order) first. |
891 | * If one of them is set, it is parsed as a list of language tags |
892 | * separated by colons or other separators. This function |
893 | * will return the first language in the parsed list that Pango |
894 | * believes may use @script for writing. This last predicate |
895 | * is tested using [method@Pango.Language.includes_script]. This can |
896 | * be used to control Pango's font selection for non-primary |
897 | * languages. For example, a `PANGO_LANGUAGE` enviroment variable |
898 | * set to "en:fa" makes Pango choose fonts suitable for Persian (fa) |
899 | * instead of Arabic (ar) when a segment of Arabic text is found |
900 | * in an otherwise non-Arabic text. The same trick can be used to |
901 | * choose a default language for %PANGO_SCRIPT_HAN when setting |
902 | * context language is not feasible. |
903 | * |
904 | * Return value: (nullable): a `PangoLanguage` that is representative |
905 | * of the script |
906 | * |
907 | * Since: 1.4 |
908 | */ |
909 | PangoLanguage * |
910 | pango_script_get_sample_language (PangoScript script) |
911 | { |
912 | /* Note that in the following, we want |
913 | * pango_language_includes_script() for the sample language |
914 | * to include the script, so alternate orthographies |
915 | * (Shavian for English, Osmanya for Somali, etc), typically |
916 | * have no sample language |
917 | */ |
918 | static const char sample_languages[][4] = { |
919 | "" , /* PANGO_SCRIPT_COMMON */ |
920 | "" , /* PANGO_SCRIPT_INHERITED */ |
921 | "ar" , /* PANGO_SCRIPT_ARABIC */ |
922 | "hy" , /* PANGO_SCRIPT_ARMENIAN */ |
923 | "bn" , /* PANGO_SCRIPT_BENGALI */ |
924 | /* Used primarily in Taiwan, but not part of the standard |
925 | * zh-tw orthography */ |
926 | "" , /* PANGO_SCRIPT_BOPOMOFO */ |
927 | "chr" , /* PANGO_SCRIPT_CHEROKEE */ |
928 | "cop" , /* PANGO_SCRIPT_COPTIC */ |
929 | "ru" , /* PANGO_SCRIPT_CYRILLIC */ |
930 | /* Deseret was used to write English */ |
931 | "" , /* PANGO_SCRIPT_DESERET */ |
932 | "hi" , /* PANGO_SCRIPT_DEVANAGARI */ |
933 | "am" , /* PANGO_SCRIPT_ETHIOPIC */ |
934 | "ka" , /* PANGO_SCRIPT_GEORGIAN */ |
935 | "" , /* PANGO_SCRIPT_GOTHIC */ |
936 | "el" , /* PANGO_SCRIPT_GREEK */ |
937 | "gu" , /* PANGO_SCRIPT_GUJARATI */ |
938 | "pa" , /* PANGO_SCRIPT_GURMUKHI */ |
939 | "" , /* PANGO_SCRIPT_HAN */ |
940 | "ko" , /* PANGO_SCRIPT_HANGUL */ |
941 | "he" , /* PANGO_SCRIPT_HEBREW */ |
942 | "ja" , /* PANGO_SCRIPT_HIRAGANA */ |
943 | "kn" , /* PANGO_SCRIPT_KANNADA */ |
944 | "ja" , /* PANGO_SCRIPT_KATAKANA */ |
945 | "km" , /* PANGO_SCRIPT_KHMER */ |
946 | "lo" , /* PANGO_SCRIPT_LAO */ |
947 | "en" , /* PANGO_SCRIPT_LATIN */ |
948 | "ml" , /* PANGO_SCRIPT_MALAYALAM */ |
949 | "mn" , /* PANGO_SCRIPT_MONGOLIAN */ |
950 | "my" , /* PANGO_SCRIPT_MYANMAR */ |
951 | /* Ogham was used to write old Irish */ |
952 | "" , /* PANGO_SCRIPT_OGHAM */ |
953 | "" , /* PANGO_SCRIPT_OLD_ITALIC */ |
954 | "or" , /* PANGO_SCRIPT_ORIYA */ |
955 | "" , /* PANGO_SCRIPT_RUNIC */ |
956 | "si" , /* PANGO_SCRIPT_SINHALA */ |
957 | "syr" , /* PANGO_SCRIPT_SYRIAC */ |
958 | "ta" , /* PANGO_SCRIPT_TAMIL */ |
959 | "te" , /* PANGO_SCRIPT_TELUGU */ |
960 | "dv" , /* PANGO_SCRIPT_THAANA */ |
961 | "th" , /* PANGO_SCRIPT_THAI */ |
962 | "bo" , /* PANGO_SCRIPT_TIBETAN */ |
963 | "iu" , /* PANGO_SCRIPT_CANADIAN_ABORIGINAL */ |
964 | "" , /* PANGO_SCRIPT_YI */ |
965 | "tl" , /* PANGO_SCRIPT_TAGALOG */ |
966 | /* Phillipino languages/scripts */ |
967 | "hnn" , /* PANGO_SCRIPT_HANUNOO */ |
968 | "bku" , /* PANGO_SCRIPT_BUHID */ |
969 | "tbw" , /* PANGO_SCRIPT_TAGBANWA */ |
970 | |
971 | "" , /* PANGO_SCRIPT_BRAILLE */ |
972 | "" , /* PANGO_SCRIPT_CYPRIOT */ |
973 | "" , /* PANGO_SCRIPT_LIMBU */ |
974 | /* Used for Somali (so) in the past */ |
975 | "" , /* PANGO_SCRIPT_OSMANYA */ |
976 | /* The Shavian alphabet was designed for English */ |
977 | "" , /* PANGO_SCRIPT_SHAVIAN */ |
978 | "" , /* PANGO_SCRIPT_LINEAR_B */ |
979 | "" , /* PANGO_SCRIPT_TAI_LE */ |
980 | "uga" , /* PANGO_SCRIPT_UGARITIC */ |
981 | |
982 | "" , /* PANGO_SCRIPT_NEW_TAI_LUE */ |
983 | "bug" , /* PANGO_SCRIPT_BUGINESE */ |
984 | /* The original script for Old Church Slavonic (chu), later |
985 | * written with Cyrillic */ |
986 | "" , /* PANGO_SCRIPT_GLAGOLITIC */ |
987 | /* Used for for Berber (ber), but Arabic script is more common */ |
988 | "" , /* PANGO_SCRIPT_TIFINAGH */ |
989 | "syl" , /* PANGO_SCRIPT_SYLOTI_NAGRI */ |
990 | "peo" , /* PANGO_SCRIPT_OLD_PERSIAN */ |
991 | "" , /* PANGO_SCRIPT_KHAROSHTHI */ |
992 | |
993 | "" , /* PANGO_SCRIPT_UNKNOWN */ |
994 | "" , /* PANGO_SCRIPT_BALINESE */ |
995 | "" , /* PANGO_SCRIPT_CUNEIFORM */ |
996 | "" , /* PANGO_SCRIPT_PHOENICIAN */ |
997 | "" , /* PANGO_SCRIPT_PHAGS_PA */ |
998 | "nqo" , /* PANGO_SCRIPT_NKO */ |
999 | |
1000 | /* Unicode-5.1 additions */ |
1001 | "" , /* PANGO_SCRIPT_KAYAH_LI */ |
1002 | "" , /* PANGO_SCRIPT_LEPCHA */ |
1003 | "" , /* PANGO_SCRIPT_REJANG */ |
1004 | "" , /* PANGO_SCRIPT_SUNDANESE */ |
1005 | "" , /* PANGO_SCRIPT_SAURASHTRA */ |
1006 | "" , /* PANGO_SCRIPT_CHAM */ |
1007 | "" , /* PANGO_SCRIPT_OL_CHIKI */ |
1008 | "" , /* PANGO_SCRIPT_VAI */ |
1009 | "" , /* PANGO_SCRIPT_CARIAN */ |
1010 | "" , /* PANGO_SCRIPT_LYCIAN */ |
1011 | "" , /* PANGO_SCRIPT_LYDIAN */ |
1012 | |
1013 | /* Unicode-6.0 additions */ |
1014 | "" , /* PANGO_SCRIPT_BATAK */ |
1015 | "" , /* PANGO_SCRIPT_BRAHMI */ |
1016 | "" , /* PANGO_SCRIPT_MANDAIC */ |
1017 | |
1018 | /* Unicode-6.1 additions */ |
1019 | "" , /* PANGO_SCRIPT_CHAKMA */ |
1020 | "" , /* PANGO_SCRIPT_MEROITIC_CURSIVE */ |
1021 | "" , /* PANGO_SCRIPT_MEROITIC_HIEROGLYPHS */ |
1022 | "" , /* PANGO_SCRIPT_MIAO */ |
1023 | "" , /* PANGO_SCRIPT_SHARADA */ |
1024 | "" , /* PANGO_SCRIPT_SORA_SOMPENG */ |
1025 | "" , /* PANGO_SCRIPT_TAKRI */ |
1026 | }; |
1027 | const char *sample_language; |
1028 | PangoLanguage *result; |
1029 | |
1030 | g_return_val_if_fail (script >= 0, NULL); |
1031 | |
1032 | if ((guint)script >= G_N_ELEMENTS (sample_languages)) |
1033 | return NULL; |
1034 | |
1035 | result = _pango_script_get_default_language (script); |
1036 | if (result) |
1037 | return result; |
1038 | |
1039 | sample_language = sample_languages[script]; |
1040 | |
1041 | if (!sample_language[0]) |
1042 | return NULL; |
1043 | else |
1044 | return pango_language_from_string (language: sample_language); |
1045 | } |
1046 | |