1/* Pango
2 * pango-language.c: Language handling routines
3 *
4 * Copyright (C) 2000 Red Hat Software
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
20 */
21
22#include "config.h"
23#include <errno.h>
24#include <string.h>
25#include <stdlib.h>
26#include <math.h>
27#include <locale.h>
28
29#include "pango-language.h"
30#include "pango-impl-utils.h"
31
32#ifdef HAVE_CORE_TEXT
33#include <CoreFoundation/CoreFoundation.h>
34#endif /* HAVE_CORE_TEXT */
35
36
37/* We embed a private struct right *before* a where a PangoLanguage *
38 * points to.
39 */
40
41typedef struct {
42 gconstpointer lang_info;
43 gconstpointer script_for_lang;
44
45 int magic; /* Used for verification */
46} PangoLanguagePrivate;
47
48#define PANGO_LANGUAGE_PRIVATE_MAGIC 0x0BE4DAD0
49
50static void
51pango_language_private_init (PangoLanguagePrivate *priv)
52{
53 priv->magic = PANGO_LANGUAGE_PRIVATE_MAGIC;
54
55 priv->lang_info = (gconstpointer) -1;
56 priv->script_for_lang = (gconstpointer) -1;
57}
58
59static PangoLanguagePrivate * pango_language_get_private (PangoLanguage *language) G_GNUC_CONST;
60
61static PangoLanguagePrivate *
62pango_language_get_private (PangoLanguage *language)
63{
64 PangoLanguagePrivate *priv;
65
66 if (!language)
67 return NULL;
68
69 priv = (PangoLanguagePrivate *)(void *)((char *)language - sizeof (PangoLanguagePrivate));
70
71 if (G_UNLIKELY (priv->magic != PANGO_LANGUAGE_PRIVATE_MAGIC))
72 {
73 g_critical ("Invalid PangoLanguage. Did you pass in a straight string instead of calling pango_language_from_string()?");
74 return NULL;
75 }
76
77 return priv;
78}
79
80
81
82#define LANGUAGE_SEPARATORS ";:, \t"
83
84static const char canon_map[256] = {
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '-', 0, 0,
88 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 0, 0, 0, 0, 0, 0,
89 '-', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
90 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, '-',
91 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
92 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 0, 0, 0, 0, 0
93};
94
95static gboolean
96lang_equal (gconstpointer v1,
97 gconstpointer v2)
98{
99 const guchar *p1 = v1;
100 const guchar *p2 = v2;
101
102 while (canon_map[*p1] && canon_map[*p1] == canon_map[*p2])
103 {
104 p1++, p2++;
105 }
106
107 return (canon_map[*p1] == canon_map[*p2]);
108}
109
110static guint
111lang_hash (gconstpointer key)
112{
113 const guchar *p = key;
114 guint h = 0;
115 while (canon_map[*p])
116 {
117 h = (h << 5) - h + canon_map[*p];
118 p++;
119 }
120
121 return h;
122}
123
124static PangoLanguage *
125pango_language_copy (PangoLanguage *language)
126{
127 return language; /* language tags are const */
128}
129
130static void
131pango_language_free (PangoLanguage *language G_GNUC_UNUSED)
132{
133 return; /* nothing */
134}
135
136/**
137 * PangoLanguage:
138 *
139 * The `PangoLanguage` structure is used to
140 * represent a language.
141 *
142 * `PangoLanguage` pointers can be efficiently
143 * copied and compared with each other.
144 */
145G_DEFINE_BOXED_TYPE (PangoLanguage, pango_language,
146 pango_language_copy,
147 pango_language_free);
148
149/**
150 * _pango_get_lc_ctype:
151 *
152 * Return the Unix-style locale string for the language currently in
153 * effect. On Unix systems, this is the return value from
154 * `setlocale (LC_CTYPE, NULL)`, and the user can affect this through
155 * the environment variables LC_ALL, LC_CTYPE or LANG (checked
156 * in that order). The locale strings typically is in the form lang_COUNTRY,
157 * where lang is an ISO-639 language code, and COUNTRY is an ISO-3166 country
158 * code. For instance, sv_FI for Swedish as written in Finland or pt_BR for
159 * Portuguese as written in Brazil.
160 *
161 * On Windows, the C library doesn't use any such environment
162 * variables, and setting them won't affect the behavior of functions
163 * like ctime(). The user sets the locale through the Regional Options
164 * in the Control Panel. The C library (in the setlocale() function)
165 * does not use country and language codes, but country and language
166 * names spelled out in English.
167 * However, this function does check the above environment
168 * variables, and does return a Unix-style locale string based on
169 * either said environment variables or the thread's current locale.
170 *
171 * Return value: a dynamically allocated string, free with g_free().
172 */
173static gchar *
174_pango_get_lc_ctype (void)
175{
176#ifdef G_OS_WIN32
177 /* Somebody might try to set the locale for this process using the
178 * LANG or LC_ environment variables. The Microsoft C library
179 * doesn't know anything about them. You set the locale in the
180 * Control Panel. Setting these env vars won't have any affect on
181 * locale-dependent C library functions like ctime(). But just for
182 * kicks, do obey LC_ALL, LC_CTYPE and LANG in Pango. (This also makes
183 * it easier to test GTK and Pango in various default languages, you
184 * don't have to clickety-click in the Control Panel, you can simply
185 * start the program with LC_ALL=something on the command line.)
186 */
187
188 gchar *p;
189
190 p = getenv ("LC_ALL");
191 if (p != NULL)
192 return g_strdup (p);
193
194 p = getenv ("LC_CTYPE");
195 if (p != NULL)
196 return g_strdup (p);
197
198 p = getenv ("LANG");
199 if (p != NULL)
200 return g_strdup (p);
201
202 return g_win32_getlocale ();
203#elif defined(HAVE_CORE_TEXT)
204 CFArrayRef languages;
205 CFStringRef language;
206 gchar ret[16];
207 gchar *p;
208
209 /* Take the same approach as done for Windows above. First we check
210 * if somebody tried to set the locale through environment variables.
211 */
212 p = getenv ("LC_ALL");
213 if (p != NULL)
214 return g_strdup (p);
215
216 p = getenv ("LC_CTYPE");
217 if (p != NULL)
218 return g_strdup (p);
219
220 p = getenv ("LANG");
221 if (p != NULL)
222 return g_strdup (p);
223
224 /* If the environment variables are not set, determine the locale
225 * through the platform-native API.
226 */
227 languages = CFLocaleCopyPreferredLanguages ();
228 language = CFArrayGetValueAtIndex (languages, 0);
229
230 if (!CFStringGetCString (language, ret, 16, kCFStringEncodingUTF8))
231 {
232 CFRelease (languages);
233 return g_strdup (setlocale (LC_CTYPE, NULL));
234 }
235
236 CFRelease (languages);
237
238 return g_strdup (ret);
239#else
240 {
241 gchar *lc_ctype = setlocale (LC_CTYPE, NULL);
242
243 if (lc_ctype)
244 return g_strdup (str: lc_ctype);
245 else
246 return g_strdup (str: "C");
247 }
248#endif
249}
250
251/**
252 * pango_language_get_default:
253 *
254 * Returns the `PangoLanguage` for the current locale of the process.
255 *
256 * On Unix systems, this is the return value is derived from
257 * `setlocale (LC_CTYPE, NULL)`, and the user can
258 * affect this through the environment variables LC_ALL, LC_CTYPE or
259 * LANG (checked in that order). The locale string typically is in
260 * the form lang_COUNTRY, where lang is an ISO-639 language code, and
261 * COUNTRY is an ISO-3166 country code. For instance, sv_FI for
262 * Swedish as written in Finland or pt_BR for Portuguese as written in
263 * Brazil.
264 *
265 * On Windows, the C library does not use any such environment
266 * variables, and setting them won't affect the behavior of functions
267 * like ctime(). The user sets the locale through the Regional Options
268 * in the Control Panel. The C library (in the setlocale() function)
269 * does not use country and language codes, but country and language
270 * names spelled out in English.
271 * However, this function does check the above environment
272 * variables, and does return a Unix-style locale string based on
273 * either said environment variables or the thread's current locale.
274 *
275 * Your application should call `setlocale(LC_ALL, "")` for the user
276 * settings to take effect. GTK does this in its initialization
277 * functions automatically (by calling gtk_set_locale()).
278 * See the setlocale() manpage for more details.
279 *
280 * Note that the default language can change over the life of an application.
281 *
282 * Also note that this function will not do the right thing if you
283 * use per-thread locales with uselocale(). In that case, you should
284 * just call pango_language_from_string() yourself.
285 *
286 * Return value: (transfer none): the default language as a `PangoLanguage`
287 *
288 * Since: 1.16
289 **/
290PangoLanguage *
291pango_language_get_default (void)
292{
293 static PangoLanguage *result = NULL; /* MT-safe */
294
295 if (g_once_init_enter (&result))
296 {
297 gchar *lc_ctype;
298 PangoLanguage *lang;
299
300 lc_ctype = _pango_get_lc_ctype ();
301 lang = pango_language_from_string (language: lc_ctype);
302 g_free (mem: lc_ctype);
303
304 g_once_init_leave (&result, lang);
305 }
306
307 return result;
308}
309
310/**
311 * pango_language_from_string:
312 * @language: (nullable): a string representing a language tag
313 *
314 * Convert a language tag to a `PangoLanguage`.
315 *
316 * The language tag must be in a RFC-3066 format. `PangoLanguage` pointers
317 * can be efficiently copied (copy the pointer) and compared with other
318 * language tags (compare the pointer.)
319 *
320 * This function first canonicalizes the string by converting it to
321 * lowercase, mapping '_' to '-', and stripping all characters other
322 * than letters and '-'.
323 *
324 * Use [func@Pango.Language.get_default] if you want to get the
325 * `PangoLanguage` for the current locale of the process.
326 *
327 * Return value: (transfer none) (nullable): a `PangoLanguage`
328 */
329PangoLanguage *
330pango_language_from_string (const char *language)
331{
332 G_LOCK_DEFINE_STATIC (lang_from_string);
333 static GHashTable *hash = NULL; /* MT-safe */
334 PangoLanguagePrivate *priv;
335 char *result;
336 int len;
337 char *p;
338
339 if (language == NULL)
340 return NULL;
341
342 G_LOCK (lang_from_string);
343
344 if (G_UNLIKELY (!hash))
345 hash = g_hash_table_new (hash_func: lang_hash, key_equal_func: lang_equal);
346 else
347 {
348 result = g_hash_table_lookup (hash_table: hash, key: language);
349 if (result)
350 goto out;
351 }
352
353 len = strlen (s: language);
354 priv = g_malloc0 (n_bytes: sizeof (PangoLanguagePrivate) + len + 1);
355 g_assert (priv);
356
357 result = (char *)priv;
358 result += sizeof (PangoLanguagePrivate);
359
360 pango_language_private_init (priv);
361
362 p = result;
363 while ((*(p++) = canon_map[*(guchar *)language++]))
364 ;
365
366 g_hash_table_insert (hash_table: hash, key: result, value: result);
367
368out:
369 G_UNLOCK (lang_from_string);
370
371 return (PangoLanguage *)result;
372}
373
374/**
375 * pango_language_to_string:
376 * @language: a language tag.
377 *
378 * Gets the RFC-3066 format string representing the given language tag.
379 *
380 * Returns (transfer none): a string representing the language tag
381 */
382const char *
383(pango_language_to_string) (PangoLanguage *language)
384{
385 return pango_language_to_string (language);
386}
387
388/**
389 * pango_language_matches:
390 * @language: (nullable): a language tag (see [func@Pango.Language.from_string]),
391 * %NULL is allowed and matches nothing but '*'
392 * @range_list: a list of language ranges, separated by ';', ':',
393 * ',', or space characters.
394 * Each element must either be '*', or a RFC 3066 language range
395 * canonicalized as by [func@Pango.Language.from_string]
396 *
397 * Checks if a language tag matches one of the elements in a list of
398 * language ranges.
399 *
400 * A language tag is considered to match a range in the list if the
401 * range is '*', the range is exactly the tag, or the range is a prefix
402 * of the tag, and the character after it in the tag is '-'.
403 *
404 * Return value: %TRUE if a match was found
405 */
406gboolean
407pango_language_matches (PangoLanguage *language,
408 const char *range_list)
409{
410 const char *lang_str = pango_language_to_string (language);
411 const char *p = range_list;
412 gboolean done = FALSE;
413
414 while (!done)
415 {
416 const char *end = strpbrk (s: p, LANGUAGE_SEPARATORS);
417 if (!end)
418 {
419 end = p + strlen (s: p);
420 done = TRUE;
421 }
422
423 if (strncmp (s1: p, s2: "*", n: 1) == 0 ||
424 (lang_str && strncmp (s1: lang_str, s2: p, n: end - p) == 0 &&
425 (lang_str[end - p] == '\0' || lang_str[end - p] == '-')))
426 return TRUE;
427
428 if (!done)
429 p = end + 1;
430 }
431
432 return FALSE;
433}
434
435static int
436lang_compare_first_component (gconstpointer pa,
437 gconstpointer pb)
438{
439 const char *a = pa, *b = pb;
440 unsigned int da, db;
441 const char *p;
442
443 p = strstr (haystack: a, needle: "-");
444 da = p ? (unsigned int) (p - a) : strlen (s: a);
445
446 p = strstr (haystack: b, needle: "-");
447 db = p ? (unsigned int) (p - b) : strlen (s: b);
448
449 return strncmp (s1: a, s2: b, MAX (da, db));
450}
451
452/* Finds the best record for @language in an array of records.
453 * Each record should start with the string representation of the language
454 * code for the record (embedded, not a pointer), and the records must be
455 * sorted on language code.
456 */
457static gconstpointer
458find_best_lang_match (PangoLanguage *language,
459 gconstpointer records,
460 guint num_records,
461 guint record_size)
462{
463 const char *lang_str;
464 const char *record, *start, *end;
465
466 if (language == NULL)
467 return NULL;
468
469 lang_str = pango_language_to_string (language);
470
471 record = bsearch (key: lang_str,
472 base: records, nmemb: num_records, size: record_size,
473 compar: lang_compare_first_component);
474 if (!record)
475 return NULL;
476
477 start = (const char *) records;
478 end = start + num_records * record_size;
479
480 /* find the best match among all those that have the same first-component */
481
482 /* go to the final one matching in the first component */
483 while (record < end - record_size &&
484 lang_compare_first_component (pa: lang_str, pb: record + record_size) == 0)
485 record += record_size;
486
487 /* go back, find which one matches completely */
488 while (start <= record &&
489 lang_compare_first_component (pa: lang_str, pb: record) == 0)
490 {
491 if (pango_language_matches (language, range_list: record))
492 return record;
493
494 record -= record_size;
495 }
496
497 return NULL;
498}
499
500static gconstpointer
501find_best_lang_match_cached (PangoLanguage *language,
502 gconstpointer *cache,
503 gconstpointer records,
504 guint num_records,
505 guint record_size)
506{
507 gconstpointer result;
508
509 if (G_LIKELY (cache && *cache != (gconstpointer) -1))
510 return *cache;
511
512 result = find_best_lang_match (language,
513 records,
514 num_records,
515 record_size);
516
517 if (cache)
518 *cache = result;
519
520 return result;
521}
522
523#define FIND_BEST_LANG_MATCH_CACHED(language, cache_key, records) \
524 find_best_lang_match_cached ((language), \
525 pango_language_get_private (language) ? \
526 &(pango_language_get_private (language)->cache_key) : NULL, \
527 records, \
528 G_N_ELEMENTS (records), \
529 sizeof (*records));
530
531typedef struct {
532 char lang[6];
533 guint16 offset;
534} LangInfo;
535
536/* Pure black magic, based on appendix of dsohowto.pdf */
537#define POOLSTRFIELD(line) POOLSTRFIELD1(line)
538#define POOLSTRFIELD1(line) str##line
539struct _LangPoolStruct {
540 char str0[1];
541#define LANGUAGE(id, source, sample) char POOLSTRFIELD(__LINE__)[sizeof(sample)];
542#include "pango-language-sample-table.h"
543#undef LANGUAGE
544};
545
546static const union _LangPool {
547 struct _LangPoolStruct lang_pool_struct;
548 const char str[1];
549} lang_pool = { {
550 "",
551#define LANGUAGE(id, source, sample) sample,
552#include "pango-language-sample-table.h"
553#undef LANGUAGE
554} };
555static const LangInfo lang_texts[] = {
556#define LANGUAGE(id, source, sample) {G_STRINGIFY(id), G_STRUCT_OFFSET(struct _LangPoolStruct, POOLSTRFIELD(__LINE__))},
557#include "pango-language-sample-table.h"
558#undef LANGUAGE
559 /* One extra entry with no final comma, to make it C89-happy */
560 {"~~", 0}
561};
562
563/**
564 * pango_language_get_sample_string:
565 * @language: (nullable): a `PangoLanguage`
566 *
567 * Get a string that is representative of the characters needed to
568 * render a particular language.
569 *
570 * The sample text may be a pangram, but is not necessarily. It is chosen
571 * to be demonstrative of normal text in the language, as well as exposing
572 * font feature requirements unique to the language. It is suitable for use
573 * as sample text in a font selection dialog.
574 *
575 * If @language is %NULL, the default language as found by
576 * [func@Pango.Language.get_default] is used.
577 *
578 * If Pango does not have a sample string for @language, the classic
579 * "The quick brown fox..." is returned. This can be detected by
580 * comparing the returned pointer value to that returned for (non-existent)
581 * language code "xx". That is, compare to:
582 *
583 * ```
584 * pango_language_get_sample_string (pango_language_from_string ("xx"))
585 * ```
586 *
587 * Return value: (transfer none): the sample string
588 */
589const char *
590pango_language_get_sample_string (PangoLanguage *language)
591{
592 const LangInfo *lang_info;
593
594 if (!language)
595 language = pango_language_get_default ();
596
597 lang_info = FIND_BEST_LANG_MATCH_CACHED (language,
598 lang_info,
599 lang_texts);
600
601 if (lang_info)
602 return lang_pool.str + lang_info->offset;
603
604 return "The quick brown fox jumps over the lazy dog.";
605}
606
607
608
609
610/*
611 * From language to script
612 */
613
614
615#include "pango-script-lang-table.h"
616
617/**
618 * pango_language_get_scripts:
619 * @language: (nullable): a `PangoLanguage`
620 * @num_scripts: (out caller-allocates) (optional): location to
621 * return number of scripts
622 *
623 * Determines the scripts used to to write @language.
624 *
625 * If nothing is known about the language tag @language,
626 * or if @language is %NULL, then %NULL is returned.
627 * The list of scripts returned starts with the script that the
628 * language uses most and continues to the one it uses least.
629 *
630 * The value @num_script points at will be set to the number
631 * of scripts in the returned array (or zero if %NULL is returned).
632 *
633 * Most languages use only one script for writing, but there are
634 * some that use two (Latin and Cyrillic for example), and a few
635 * use three (Japanese for example). Applications should not make
636 * any assumptions on the maximum number of scripts returned
637 * though, except that it is positive if the return value is not
638 * %NULL, and it is a small number.
639 *
640 * The [method@Pango.Language.includes_script] function uses this
641 * function internally.
642 *
643 * Note: while the return value is declared as `PangoScript`, the
644 * returned values are from the `GUnicodeScript` enumeration, which
645 * may have more values. Callers need to handle unknown values.
646 *
647 * Return value: (transfer none) (array length=num_scripts) (nullable):
648 * An array of `PangoScript` values, with the number of entries in
649 * the array stored in @num_scripts, or %NULL if Pango does not have
650 * any information about this particular language tag (also the case
651 * if @language is %NULL).
652 *
653 * Since: 1.22
654 */
655const PangoScript *
656pango_language_get_scripts (PangoLanguage *language,
657 int *num_scripts)
658{
659 const PangoScriptForLang *script_for_lang;
660 unsigned int j;
661
662 script_for_lang = FIND_BEST_LANG_MATCH_CACHED (language,
663 script_for_lang,
664 pango_script_for_lang);
665
666 if (!script_for_lang || script_for_lang->scripts[0] == 0)
667 {
668 if (num_scripts)
669 *num_scripts = 0;
670
671 return NULL;
672 }
673
674 if (num_scripts)
675 {
676 for (j = 0; j < G_N_ELEMENTS (script_for_lang->scripts); j++)
677 if (script_for_lang->scripts[j] == 0)
678 break;
679
680 g_assert (j > 0);
681
682 *num_scripts = j;
683 }
684
685 return (const PangoScript *) script_for_lang->scripts;
686}
687
688/**
689 * pango_language_includes_script:
690 * @language: (nullable): a `PangoLanguage`
691 * @script: a `PangoScript`
692 *
693 * Determines if @script is one of the scripts used to
694 * write @language.
695 *
696 * The returned value is conservative; if nothing is known about
697 * the language tag @language, %TRUE will be returned, since, as
698 * far as Pango knows, @script might be used to write @language.
699 *
700 * This routine is used in Pango's itemization process when
701 * determining if a supplied language tag is relevant to
702 * a particular section of text. It probably is not useful
703 * for applications in most circumstances.
704 *
705 * This function uses [method@Pango.Language.get_scripts] internally.
706 *
707 * Return value: %TRUE if @script is one of the scripts used
708 * to write @language or if nothing is known about @language
709 * (including the case that @language is %NULL), %FALSE otherwise.
710 *
711 * Since: 1.4
712 */
713gboolean
714pango_language_includes_script (PangoLanguage *language,
715 PangoScript script)
716{
717 const PangoScript *scripts;
718 int num_scripts, j;
719
720/* copied from the one in pango-script.c */
721#define REAL_SCRIPT(script) \
722 ((script) > PANGO_SCRIPT_INHERITED && (script) != PANGO_SCRIPT_UNKNOWN)
723
724 if (!REAL_SCRIPT (script))
725 return TRUE;
726
727#undef REAL_SCRIPT
728
729 scripts = pango_language_get_scripts (language, num_scripts: &num_scripts);
730 if (!scripts)
731 return TRUE;
732
733 for (j = 0; j < num_scripts; j++)
734 if (scripts[j] == script)
735 return TRUE;
736
737 return FALSE;
738}
739
740
741
742
743/*
744 * From script to language
745 */
746
747
748static PangoLanguage **
749parse_default_languages (void)
750{
751 char *p, *p_copy;
752 gboolean done = FALSE;
753 GPtrArray *langs;
754
755 p = getenv (name: "PANGO_LANGUAGE");
756
757 if (p == NULL)
758 p = getenv (name: "LANGUAGE");
759
760 if (p == NULL)
761 return NULL;
762
763 p_copy = p = g_strdup (str: p);
764
765 langs = g_ptr_array_new ();
766
767 while (!done)
768 {
769 char *end = strpbrk (s: p, LANGUAGE_SEPARATORS);
770 if (!end)
771 {
772 end = p + strlen (s: p);
773 done = TRUE;
774 }
775 else
776 *end = '\0';
777
778 /* skip empty languages, and skip the language 'C' */
779 if (p != end && !(p + 1 == end && *p == 'C'))
780 {
781 PangoLanguage *l = pango_language_from_string (language: p);
782
783 g_ptr_array_add (array: langs, data: l);
784 }
785
786 if (!done)
787 p = end + 1;
788 }
789
790 g_ptr_array_add (array: langs, NULL);
791
792 g_free (mem: p_copy);
793
794 return (PangoLanguage **) g_ptr_array_free (array: langs, FALSE);
795}
796
797G_LOCK_DEFINE_STATIC (languages);
798static gboolean initialized = FALSE; /* MT-safe */
799static PangoLanguage * const * languages = NULL; /* MT-safe */
800static GHashTable *hash = NULL; /* MT-safe */
801
802static PangoLanguage *
803_pango_script_get_default_language (PangoScript script)
804{
805 PangoLanguage *result, * const * p;
806
807 G_LOCK (languages);
808
809 if (G_UNLIKELY (!initialized))
810 {
811 languages = parse_default_languages ();
812
813 if (languages)
814 hash = g_hash_table_new (NULL, NULL);
815
816 initialized = TRUE;
817 }
818
819 if (!languages)
820 {
821 result = NULL;
822 goto out;
823 }
824
825 if (g_hash_table_lookup_extended (hash_table: hash, GINT_TO_POINTER (script), NULL, value: (gpointer *) (gpointer) &result))
826 goto out;
827
828 for (p = languages; *p; p++)
829 if (pango_language_includes_script (language: *p, script))
830 break;
831 result = *p;
832
833 g_hash_table_insert (hash_table: hash, GINT_TO_POINTER (script), value: result);
834
835out:
836 G_UNLOCK (languages);
837
838 return result;
839}
840
841/**
842 * pango_language_get_preferred:
843 *
844 * Returns the list of languages that the user prefers.
845 *
846 * The list is specified by the `PANGO_LANGUAGE` or `LANGUAGE`
847 * environment variables, in order of preference. Note that this
848 * list does not necessarily include the language returned by
849 * [func@Pango.Language.get_default].
850 *
851 * When choosing language-specific resources, such as the sample
852 * text returned by [method@Pango.Language.get_sample_string],
853 * you should first try the default language, followed by the
854 * languages returned by this function.
855 *
856 * Returns: (transfer none) (nullable): a %NULL-terminated array
857 * of `PangoLanguage`*
858 *
859 * Since: 1.48
860 */
861PangoLanguage **
862pango_language_get_preferred (void)
863{
864 /* We call this just for its side-effect of initializing languages */
865 _pango_script_get_default_language (script: PANGO_SCRIPT_COMMON);
866
867 return (PangoLanguage **) languages;
868}
869
870/**
871 * pango_script_get_sample_language:
872 * @script: a `PangoScript`
873 *
874 * Finds a language tag that is reasonably representative of @script.
875 *
876 * The language will usually be the most widely spoken or used language
877 * written in that script: for instance, the sample language for
878 * %PANGO_SCRIPT_CYRILLIC is ru (Russian), the sample language for
879 * %PANGO_SCRIPT_ARABIC is ar.
880 *
881 * For some scripts, no sample language will be returned because
882 * there is no language that is sufficiently representative. The
883 * best example of this is %PANGO_SCRIPT_HAN, where various different
884 * variants of written Chinese, Japanese, and Korean all use
885 * significantly different sets of Han characters and forms
886 * of shared characters. No sample language can be provided
887 * for many historical scripts as well.
888 *
889 * As of 1.18, this function checks the environment variables
890 * `PANGO_LANGUAGE` and `LANGUAGE` (checked in that order) first.
891 * If one of them is set, it is parsed as a list of language tags
892 * separated by colons or other separators. This function
893 * will return the first language in the parsed list that Pango
894 * believes may use @script for writing. This last predicate
895 * is tested using [method@Pango.Language.includes_script]. This can
896 * be used to control Pango's font selection for non-primary
897 * languages. For example, a `PANGO_LANGUAGE` enviroment variable
898 * set to "en:fa" makes Pango choose fonts suitable for Persian (fa)
899 * instead of Arabic (ar) when a segment of Arabic text is found
900 * in an otherwise non-Arabic text. The same trick can be used to
901 * choose a default language for %PANGO_SCRIPT_HAN when setting
902 * context language is not feasible.
903 *
904 * Return value: (nullable): a `PangoLanguage` that is representative
905 * of the script
906 *
907 * Since: 1.4
908 */
909PangoLanguage *
910pango_script_get_sample_language (PangoScript script)
911{
912 /* Note that in the following, we want
913 * pango_language_includes_script() for the sample language
914 * to include the script, so alternate orthographies
915 * (Shavian for English, Osmanya for Somali, etc), typically
916 * have no sample language
917 */
918 static const char sample_languages[][4] = {
919 "", /* PANGO_SCRIPT_COMMON */
920 "", /* PANGO_SCRIPT_INHERITED */
921 "ar", /* PANGO_SCRIPT_ARABIC */
922 "hy", /* PANGO_SCRIPT_ARMENIAN */
923 "bn", /* PANGO_SCRIPT_BENGALI */
924 /* Used primarily in Taiwan, but not part of the standard
925 * zh-tw orthography */
926 "", /* PANGO_SCRIPT_BOPOMOFO */
927 "chr", /* PANGO_SCRIPT_CHEROKEE */
928 "cop", /* PANGO_SCRIPT_COPTIC */
929 "ru", /* PANGO_SCRIPT_CYRILLIC */
930 /* Deseret was used to write English */
931 "", /* PANGO_SCRIPT_DESERET */
932 "hi", /* PANGO_SCRIPT_DEVANAGARI */
933 "am", /* PANGO_SCRIPT_ETHIOPIC */
934 "ka", /* PANGO_SCRIPT_GEORGIAN */
935 "", /* PANGO_SCRIPT_GOTHIC */
936 "el", /* PANGO_SCRIPT_GREEK */
937 "gu", /* PANGO_SCRIPT_GUJARATI */
938 "pa", /* PANGO_SCRIPT_GURMUKHI */
939 "", /* PANGO_SCRIPT_HAN */
940 "ko", /* PANGO_SCRIPT_HANGUL */
941 "he", /* PANGO_SCRIPT_HEBREW */
942 "ja", /* PANGO_SCRIPT_HIRAGANA */
943 "kn", /* PANGO_SCRIPT_KANNADA */
944 "ja", /* PANGO_SCRIPT_KATAKANA */
945 "km", /* PANGO_SCRIPT_KHMER */
946 "lo", /* PANGO_SCRIPT_LAO */
947 "en", /* PANGO_SCRIPT_LATIN */
948 "ml", /* PANGO_SCRIPT_MALAYALAM */
949 "mn", /* PANGO_SCRIPT_MONGOLIAN */
950 "my", /* PANGO_SCRIPT_MYANMAR */
951 /* Ogham was used to write old Irish */
952 "", /* PANGO_SCRIPT_OGHAM */
953 "", /* PANGO_SCRIPT_OLD_ITALIC */
954 "or", /* PANGO_SCRIPT_ORIYA */
955 "", /* PANGO_SCRIPT_RUNIC */
956 "si", /* PANGO_SCRIPT_SINHALA */
957 "syr", /* PANGO_SCRIPT_SYRIAC */
958 "ta", /* PANGO_SCRIPT_TAMIL */
959 "te", /* PANGO_SCRIPT_TELUGU */
960 "dv", /* PANGO_SCRIPT_THAANA */
961 "th", /* PANGO_SCRIPT_THAI */
962 "bo", /* PANGO_SCRIPT_TIBETAN */
963 "iu", /* PANGO_SCRIPT_CANADIAN_ABORIGINAL */
964 "", /* PANGO_SCRIPT_YI */
965 "tl", /* PANGO_SCRIPT_TAGALOG */
966 /* Phillipino languages/scripts */
967 "hnn", /* PANGO_SCRIPT_HANUNOO */
968 "bku", /* PANGO_SCRIPT_BUHID */
969 "tbw", /* PANGO_SCRIPT_TAGBANWA */
970
971 "", /* PANGO_SCRIPT_BRAILLE */
972 "", /* PANGO_SCRIPT_CYPRIOT */
973 "", /* PANGO_SCRIPT_LIMBU */
974 /* Used for Somali (so) in the past */
975 "", /* PANGO_SCRIPT_OSMANYA */
976 /* The Shavian alphabet was designed for English */
977 "", /* PANGO_SCRIPT_SHAVIAN */
978 "", /* PANGO_SCRIPT_LINEAR_B */
979 "", /* PANGO_SCRIPT_TAI_LE */
980 "uga", /* PANGO_SCRIPT_UGARITIC */
981
982 "", /* PANGO_SCRIPT_NEW_TAI_LUE */
983 "bug", /* PANGO_SCRIPT_BUGINESE */
984 /* The original script for Old Church Slavonic (chu), later
985 * written with Cyrillic */
986 "", /* PANGO_SCRIPT_GLAGOLITIC */
987 /* Used for for Berber (ber), but Arabic script is more common */
988 "", /* PANGO_SCRIPT_TIFINAGH */
989 "syl", /* PANGO_SCRIPT_SYLOTI_NAGRI */
990 "peo", /* PANGO_SCRIPT_OLD_PERSIAN */
991 "", /* PANGO_SCRIPT_KHAROSHTHI */
992
993 "", /* PANGO_SCRIPT_UNKNOWN */
994 "", /* PANGO_SCRIPT_BALINESE */
995 "", /* PANGO_SCRIPT_CUNEIFORM */
996 "", /* PANGO_SCRIPT_PHOENICIAN */
997 "", /* PANGO_SCRIPT_PHAGS_PA */
998 "nqo", /* PANGO_SCRIPT_NKO */
999
1000 /* Unicode-5.1 additions */
1001 "", /* PANGO_SCRIPT_KAYAH_LI */
1002 "", /* PANGO_SCRIPT_LEPCHA */
1003 "", /* PANGO_SCRIPT_REJANG */
1004 "", /* PANGO_SCRIPT_SUNDANESE */
1005 "", /* PANGO_SCRIPT_SAURASHTRA */
1006 "", /* PANGO_SCRIPT_CHAM */
1007 "", /* PANGO_SCRIPT_OL_CHIKI */
1008 "", /* PANGO_SCRIPT_VAI */
1009 "", /* PANGO_SCRIPT_CARIAN */
1010 "", /* PANGO_SCRIPT_LYCIAN */
1011 "", /* PANGO_SCRIPT_LYDIAN */
1012
1013 /* Unicode-6.0 additions */
1014 "", /* PANGO_SCRIPT_BATAK */
1015 "", /* PANGO_SCRIPT_BRAHMI */
1016 "", /* PANGO_SCRIPT_MANDAIC */
1017
1018 /* Unicode-6.1 additions */
1019 "", /* PANGO_SCRIPT_CHAKMA */
1020 "", /* PANGO_SCRIPT_MEROITIC_CURSIVE */
1021 "", /* PANGO_SCRIPT_MEROITIC_HIEROGLYPHS */
1022 "", /* PANGO_SCRIPT_MIAO */
1023 "", /* PANGO_SCRIPT_SHARADA */
1024 "", /* PANGO_SCRIPT_SORA_SOMPENG */
1025 "", /* PANGO_SCRIPT_TAKRI */
1026 };
1027 const char *sample_language;
1028 PangoLanguage *result;
1029
1030 g_return_val_if_fail (script >= 0, NULL);
1031
1032 if ((guint)script >= G_N_ELEMENTS (sample_languages))
1033 return NULL;
1034
1035 result = _pango_script_get_default_language (script);
1036 if (result)
1037 return result;
1038
1039 sample_language = sample_languages[script];
1040
1041 if (!sample_language[0])
1042 return NULL;
1043 else
1044 return pango_language_from_string (language: sample_language);
1045}
1046

source code of gtk/subprojects/pango/pango/pango-language.c