1/* GTK - The GIMP Toolkit
2 * Copyright (C) 2015 Takao Fujiwara <takao.fujiwara1@gmail.com>
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <gdk/gdk.h>
19#include <glib.h>
20#include <glib/gprintf.h>
21#include <glib/gstdio.h>
22#include <locale.h>
23#include <stdlib.h>
24#include <string.h>
25
26#include "gtkcomposetable.h"
27#include "gtkimcontextsimple.h"
28
29
30#define GTK_COMPOSE_TABLE_MAGIC "GtkComposeTable"
31#define GTK_COMPOSE_TABLE_VERSION (3)
32
33extern const GtkComposeTable builtin_compose_table;
34
35/* Maximum length of sequences we parse */
36
37#define MAX_COMPOSE_LEN 20
38
39/* Implemented from g_str_hash() */
40static guint32
41data_hash (gconstpointer v, int length)
42{
43 const guint16 *p, *head;
44 unsigned char c;
45 guint32 h = 5381;
46
47 for (p = v, head = v; (p - head) < length; p++)
48 {
49 c = 0x00ff & (*p >> 8);
50 h = (h << 5) + h + c;
51 c = 0x00ff & *p;
52 h = (h << 5) + h + c;
53 }
54
55 return h;
56}
57
58static guint32
59sequence_hash (gconstpointer v)
60{
61 const gunichar *p = v;
62 int i;
63
64 for (i = 0; p[i]; i++) ;
65
66 return data_hash (v, length: i);
67}
68
69static gboolean
70sequence_equal (gconstpointer v1,
71 gconstpointer v2)
72{
73 const gunichar *p1 = v1;
74 const gunichar *p2 = v2;
75 int i;
76
77 for (i = 0; p1[i] && p2[i] && p1[i] == p2[i]; i++) ;
78
79 return p1[i] == p2[i];
80}
81
82typedef struct {
83 GHashTable *sequences;
84 GList *files;
85 const char *compose_file;
86 gboolean found_include;
87} GtkComposeParser;
88
89static GtkComposeParser *
90parser_new (void)
91{
92 GtkComposeParser *parser;
93
94 parser = g_new (GtkComposeParser, 1);
95
96 parser->sequences = g_hash_table_new_full (hash_func: sequence_hash, key_equal_func: sequence_equal, key_destroy_func: g_free, value_destroy_func: g_free);
97 parser->files = NULL;
98 parser->compose_file = NULL;
99 parser->found_include = FALSE;
100
101 return parser;
102}
103
104static void
105parser_free (GtkComposeParser *parser)
106{
107 g_hash_table_unref (hash_table: parser->sequences);
108 g_list_free_full (list: parser->files, free_func: g_free);
109 g_free (mem: parser);
110}
111
112static gboolean
113is_codepoint (const char *str)
114{
115 int i;
116
117 /* 'U' is not code point but 'U00C0' is code point */
118 if (str[0] == '\0' || str[0] != 'U' || str[1] == '\0')
119 return FALSE;
120
121 for (i = 1; str[i] != '\0'; i++)
122 {
123 if (!g_ascii_isxdigit (str[i]))
124 return FALSE;
125 }
126
127 return TRUE;
128}
129
130static char *
131parse_compose_value (const char *val,
132 const char *line)
133{
134 const char *p;
135 GString *value;
136 gunichar ch;
137 char *endp;
138
139 value = g_string_new (init: "");
140
141 if (val[0] != '"')
142 {
143 g_warning ("Only strings supported after ':': %s: %s", val, line);
144 goto fail;
145 }
146
147 p = val + 1;
148 while (*p)
149 {
150 if (*p == '\"')
151 {
152 return g_string_free (string: value, FALSE);
153 }
154
155 if (p[1] == '\0')
156 {
157 g_warning ("Missing closing '\"': %s: %s", val, line);
158 goto fail;
159 }
160 else if (*p == '\\')
161 {
162 if (p[1] == '"')
163 {
164 g_string_append_c (value, '"');
165 p += 2;
166 }
167 else if (p[1] == '\\')
168 {
169 g_string_append_c (value, '\\');
170 p += 2;
171 }
172 else if (p[1] >= '0' && p[1] < '8')
173 {
174 ch = g_ascii_strtoll (nptr: p + 1, endptr: &endp, base: 8);
175 if (ch == 0)
176 {
177 g_warning ("Invalid escape sequence: %s: %s", val, line);
178 goto fail;
179 }
180 g_string_append_unichar (string: value, wc: ch);
181 p = endp;
182 }
183 else if (p[1] == 'x' || p[1] == 'X')
184 {
185 ch = g_ascii_strtoll (nptr: p + 2, endptr: &endp, base: 16);
186 if (ch == 0)
187 {
188 g_warning ("Invalid escape sequence: %s: %s", val, line);
189 goto fail;
190 }
191 g_string_append_unichar (string: value, wc: ch);
192 p = endp;
193 }
194 else
195 {
196 g_warning ("Invalid escape sequence: %s: %s", val, line);
197 goto fail;
198 }
199 }
200 else
201 {
202 ch = g_utf8_get_char (p);
203 g_string_append_unichar (string: value, wc: ch);
204 p = g_utf8_next_char (p);
205 }
206 }
207
208fail:
209 g_string_free (string: value, TRUE);
210 return NULL;
211}
212
213static gunichar *
214parse_compose_sequence (const char *seq,
215 const char *line)
216{
217 char **words = g_strsplit (string: seq, delimiter: "<", max_tokens: -1);
218 int i;
219 int n = 0;
220 gunichar *sequence = NULL;
221
222 if (g_strv_length (str_array: words) < 2)
223 {
224 g_warning ("key sequence format is <a> <b>...: %s", line);
225 goto fail;
226 }
227
228 for (i = 1; words[i] != NULL; i++)
229 {
230 char *start = words[i];
231 char *end = strchr (s: words[i], c: '>');
232 char *match;
233 gunichar codepoint;
234
235 if (words[i][0] == '\0')
236 continue;
237
238 if (start == NULL || end == NULL || end <= start)
239 {
240 g_warning ("key sequence format is <a> <b>...: %s", line);
241 goto fail;
242 }
243
244 match = g_strndup (str: start, n: end - start);
245
246 sequence = g_realloc (mem: sequence, n_bytes: sizeof (gunichar) * (n + 2));
247
248 if (is_codepoint (str: match))
249 {
250 codepoint = (gunichar) g_ascii_strtoll (nptr: match + 1, NULL, base: 16);
251 sequence[n] = codepoint;
252 sequence[n + 1] = 0;
253 }
254 else
255 {
256 codepoint = (gunichar) gdk_keyval_from_name (keyval_name: match);
257 sequence[n] = codepoint;
258 sequence[n + 1] = 0;
259 }
260
261 if (codepoint == GDK_KEY_VoidSymbol)
262 g_warning ("Could not get code point of keysym %s", match);
263 g_free (mem: match);
264 n++;
265 }
266
267 if (0 == n || n > MAX_COMPOSE_LEN)
268 {
269 g_warning ("Suspicious compose sequence length (%d). Are you sure this is right?: %s",
270 n, line);
271 goto fail;
272 }
273
274 g_strfreev (str_array: words);
275
276 return sequence;
277
278fail:
279 g_strfreev (str_array: words);
280 g_free (mem: sequence);
281 return NULL;
282}
283
284static void parser_parse_file (GtkComposeParser *parser,
285 const char *path);
286
287/* Substitute %H, %L and %S */
288static char *
289handle_substitutions (const char *start,
290 int length)
291{
292 GString *s;
293 const char *locale_name;
294 const char *p;
295
296 s = g_string_new (init: "");
297
298 locale_name = getenv (name: "LANG");
299
300 for (p = start; *p && p < start + length; p++)
301 {
302 if (*p != '%')
303 {
304 g_string_append_c (s, *p);
305 }
306 else
307 {
308 switch (p[1])
309 {
310 case 'H':
311 p++;
312 g_string_append (string: s, val: g_get_home_dir ());
313 break;
314 case 'L':
315 p++;
316 g_string_append_printf (string: s, format: "/usr/share/X11/locale/%s/Compose", locale_name);
317 break;
318 case 'S':
319 p++;
320 g_string_append (string: s, val: "/usr/share/X11/locale");
321 break;
322 default: ;
323 /* do nothing, next iteration handles p[1] */
324 }
325 }
326 }
327
328 return g_string_free (string: s, FALSE);
329}
330
331static void
332add_sequence (gunichar *sequence,
333 int len,
334 const char *value,
335 gpointer data)
336{
337 GtkComposeParser *parser = data;
338 gunichar *seq;
339
340 seq = g_new (gunichar, len + 1);
341 memcpy (dest: seq, src: sequence, n: (len + 1) * sizeof (gunichar));
342
343 g_hash_table_replace (hash_table: parser->sequences, key: seq, value: g_strdup (str: value));
344}
345
346static void
347parser_add_default_sequences (GtkComposeParser *parser)
348{
349 const GtkComposeTable *table = &builtin_compose_table;
350
351 gtk_compose_table_foreach (table, callback: add_sequence, data: parser);
352}
353
354static void
355parser_handle_include (GtkComposeParser *parser,
356 const char *line)
357{
358 const char *p;
359 const char *start, *end;
360 char *path;
361
362 parser->found_include = TRUE;
363
364 p = line + strlen (s: "include ");
365
366 while (g_ascii_isspace (*p))
367 p++;
368
369 if (*p != '"')
370 goto error;
371
372 p++;
373
374 start = p;
375
376 while (*p && *p != '"')
377 p++;
378
379 if (*p != '"')
380 goto error;
381
382 end = p;
383
384 p++;
385
386 while (g_ascii_isspace (*p))
387 p++;
388
389 if (*p && *p != '#')
390 goto error;
391
392 if (end - start == 2 &&
393 strncmp (s1: "%L", s2: start, n: end - start) == 0)
394 {
395 parser_add_default_sequences (parser);
396 }
397 else
398 {
399 path = handle_substitutions (start, length: end - start);
400 parser_parse_file (parser, path);
401 g_free (mem: path);
402 }
403
404 return;
405
406error:
407 g_warning ("Could not parse include: %s", line);
408}
409
410static void
411parser_parse_line (GtkComposeParser *parser,
412 const char *line)
413{
414 char **components = NULL;
415 gunichar *sequence = NULL;
416 char *value = NULL;
417
418 if (line[0] == '\0' || line[0] == '#')
419 return;
420
421 if (g_str_has_prefix (str: line, prefix: "include "))
422 {
423 parser_handle_include (parser, line);
424 return;
425 }
426
427 components = g_strsplit (string: line, delimiter: ":", max_tokens: 2);
428
429 if (components[1] == NULL)
430 {
431 g_warning ("No delimiter ':': %s", line);
432 goto fail;
433 }
434
435 sequence = parse_compose_sequence (g_strstrip (components[0]), line);
436 if (sequence == NULL)
437 goto fail;
438
439 value = parse_compose_value (g_strstrip (components[1]), line);
440 if (value == NULL)
441 goto fail;
442
443 g_strfreev (str_array: components);
444
445 g_hash_table_replace (hash_table: parser->sequences, key: sequence, value);
446
447 return;
448
449fail:
450 g_strfreev (str_array: components);
451 g_free (mem: sequence);
452 g_free (mem: value);
453}
454
455static void
456parser_read_file (GtkComposeParser *parser,
457 const char *compose_file)
458{
459 char *contents = NULL;
460 char **lines = NULL;
461 gsize length = 0;
462 GError *error = NULL;
463
464 if (!g_file_get_contents (filename: compose_file, contents: &contents, length: &length, error: &error))
465 {
466 g_warning ("%s", error->message);
467 g_error_free (error);
468 return;
469 }
470
471 lines = g_strsplit (string: contents, delimiter: "\n", max_tokens: -1);
472 for (int i = 0; lines[i] != NULL; i++)
473 parser_parse_line (parser, line: lines[i]);
474
475 g_strfreev (str_array: lines);
476 g_free (mem: contents);
477}
478
479/* Remove sequences that can be handled algorithmically,
480 * sequences with non-BMP keys, and sequences that produce
481 * empty strings.
482 */
483static void
484parser_remove_duplicates (GtkComposeParser *parser)
485{
486 GHashTableIter iter;
487 gunichar *sequence;
488 char *value;
489 GString *output;
490
491 output = g_string_new (init: "");
492
493 g_hash_table_iter_init (iter: &iter, hash_table: parser->sequences);
494 while (g_hash_table_iter_next (iter: &iter, key: (gpointer *)&sequence, value: (gpointer *)&value))
495 {
496 static guint keysyms[MAX_COMPOSE_LEN + 1];
497 int i;
498 int n_compose = 0;
499 gboolean remove_sequence = FALSE;
500
501 if (value[0] == '\0')
502 {
503 remove_sequence = TRUE;
504 goto next;
505 }
506
507 if (sequence[1] == 0)
508 {
509 remove_sequence = TRUE;
510 goto next;
511 }
512
513 for (i = 0; i < MAX_COMPOSE_LEN + 1; i++)
514 keysyms[i] = 0;
515
516 for (i = 0; i < MAX_COMPOSE_LEN + 1; i++)
517 {
518 guint codepoint = sequence[i];
519 keysyms[i] = codepoint;
520
521 if (codepoint == 0)
522 break;
523
524 if (codepoint > 0xffff)
525 {
526 remove_sequence = TRUE;
527 goto next;
528 }
529
530 n_compose++;
531 }
532
533 if (gtk_check_algorithmically (compose_buffer: keysyms, n_compose, output))
534 {
535 if (strcmp (s1: value, s2: output->str) == 0)
536 remove_sequence = TRUE;
537 }
538
539next:
540 if (remove_sequence)
541 g_hash_table_iter_remove (iter: &iter);
542 }
543
544 g_string_free (string: output, TRUE);
545}
546
547static void
548parser_compute_max_compose_len (GtkComposeParser *parser,
549 int *max_compose_len,
550 int *n_first,
551 int *size)
552{
553 GHashTableIter iter;
554 gunichar *sequence;
555 char *value;
556 int max = 0;
557 int count = 0;
558 GHashTable *first;
559
560 first = g_hash_table_new (NULL, NULL);
561
562 g_hash_table_iter_init (iter: &iter, hash_table: parser->sequences);
563 while (g_hash_table_iter_next (iter: &iter, key: (gpointer *)&sequence, value: (gpointer *)&value))
564 {
565 g_hash_table_add (hash_table: first, GUINT_TO_POINTER (sequence[0]));
566
567 for (int i = 0; i < MAX_COMPOSE_LEN + 1; i++)
568 {
569 if (sequence[i] == 0)
570 {
571 count += i;
572 if (max < i)
573 max = i;
574 break;
575 }
576 }
577 }
578
579 *max_compose_len = max;
580 *n_first = g_hash_table_size (hash_table: first);
581 *size = count;
582
583 g_hash_table_unref (hash_table: first);
584}
585
586static inline int
587sequence_length (gpointer a)
588{
589 gunichar *seq = a;
590 int i;
591
592 for (i = 0; seq[i]; i++) ;
593
594 return i;
595}
596
597static int
598sequence_compare (gpointer a,
599 gpointer b,
600 gpointer data)
601{
602 gunichar *seq_a = a;
603 gunichar *seq_b = b;
604 int i;
605 gunichar code_a, code_b;
606 int len_a, len_b;
607
608 code_a = seq_a[0];
609 code_b = seq_b[0];
610
611 if (code_a != code_b)
612 return code_a - code_b;
613
614 len_a = sequence_length (a);
615 len_b = sequence_length (a: b);
616
617 if (len_a != len_b)
618 return len_a - len_b;
619
620 for (i = 1; i < len_a; i++)
621 {
622 code_a = seq_a[i];
623 code_b = seq_b[i];
624
625 if (code_a != code_b)
626 return code_a - code_b;
627 }
628
629 return 0;
630}
631
632guint32
633gtk_compose_table_data_hash (const guint16 *data,
634 int max_seq_len,
635 int n_seqs)
636{
637 gsize n_index_stride;
638 gsize length;
639
640 n_index_stride = max_seq_len + 2;
641 if (!g_size_checked_mul (&length, n_index_stride, n_seqs))
642 {
643 g_critical ("Overflow in the compose sequences");
644 return 0;
645 }
646
647 return data_hash (v: data, length);
648}
649
650static char *
651gtk_compose_hash_get_cache_path (guint32 hash)
652{
653 char *basename = NULL;
654 char *dir = NULL;
655 char *path = NULL;
656
657 basename = g_strdup_printf (format: "%08x.cache", hash);
658
659 dir = g_build_filename (first_element: g_get_user_cache_dir (), "gtk-4.0", "compose", NULL);
660 path = g_build_filename (first_element: dir, basename, NULL);
661 if (g_mkdir_with_parents (pathname: dir, mode: 0755) != 0)
662 {
663 g_warning ("Failed to mkdir %s", dir);
664 g_free (mem: path);
665 path = NULL;
666 }
667
668 g_free (mem: dir);
669 g_free (mem: basename);
670
671 return path;
672}
673
674static char *
675gtk_compose_table_serialize (GtkComposeTable *compose_table,
676 gsize *count)
677{
678 char *p, *contents;
679 gsize header_length, total_length;
680 guint16 bytes;
681 const char *header = GTK_COMPOSE_TABLE_MAGIC;
682 const guint16 version = GTK_COMPOSE_TABLE_VERSION;
683 guint16 max_seq_len = compose_table->max_seq_len;
684 guint16 n_index_size = compose_table->n_index_size;
685 guint16 data_size = compose_table->data_size;
686 guint16 n_chars = compose_table->n_chars;
687 guint32 i;
688
689 g_return_val_if_fail (compose_table != NULL, NULL);
690 g_return_val_if_fail (max_seq_len > 0, NULL);
691 g_return_val_if_fail (n_index_size > 0, NULL);
692
693 header_length = strlen (s: header);
694 total_length = header_length + sizeof (guint16) * (5 + data_size) + n_chars;
695 if (count)
696 *count = total_length;
697
698 p = contents = g_malloc (n_bytes: total_length);
699
700 memcpy (dest: p, src: header, n: header_length);
701 p += header_length;
702
703#define APPEND_GUINT16(elt) \
704 bytes = GUINT16_TO_BE (elt); \
705 memcpy (p, &bytes, sizeof (guint16)); \
706 p += sizeof (guint16);
707
708 APPEND_GUINT16 (version);
709 APPEND_GUINT16 (max_seq_len);
710 APPEND_GUINT16 (n_index_size);
711 APPEND_GUINT16 (data_size);
712 APPEND_GUINT16 (n_chars);
713
714 for (i = 0; i < data_size; i++)
715 {
716 APPEND_GUINT16 (compose_table->data[i]);
717 }
718
719 if (compose_table->n_chars > 0)
720 memcpy (dest: p, src: compose_table->char_data, n: compose_table->n_chars);
721
722#undef APPEND_GUINT16
723
724 return contents;
725}
726
727static GtkComposeTable *
728gtk_compose_table_load_cache (const char *compose_file,
729 gboolean *found_old_cache)
730{
731 guint32 hash;
732 char *path = NULL;
733 char *contents = NULL;
734 char *p;
735 GStatBuf original_buf;
736 GStatBuf cache_buf;
737 gsize total_length;
738 GError *error = NULL;
739 guint16 bytes;
740 guint16 version;
741 guint16 max_seq_len;
742 guint16 n_index_size;
743 guint16 data_size;
744 guint16 n_chars;
745 guint32 i;
746 guint16 *data = NULL;
747 char *char_data = NULL;
748 GtkComposeTable *retval;
749
750 *found_old_cache = FALSE;
751
752 hash = g_str_hash (v: compose_file);
753 if ((path = gtk_compose_hash_get_cache_path (hash)) == NULL)
754 return NULL;
755 if (!g_file_test (filename: path, test: G_FILE_TEST_EXISTS))
756 goto out_load_cache;
757
758 g_stat (file: path, buf: &cache_buf);
759 g_lstat (file: compose_file, buf: &original_buf);
760 if (original_buf.st_mtime > cache_buf.st_mtime)
761 goto out_load_cache;
762 g_stat (file: compose_file, buf: &original_buf);
763 if (original_buf.st_mtime > cache_buf.st_mtime)
764 goto out_load_cache;
765 if (!g_file_get_contents (filename: path, contents: &contents, length: &total_length, error: &error))
766 {
767 g_warning ("Failed to get cache content %s: %s", path, error->message);
768 g_error_free (error);
769 goto out_load_cache;
770 }
771
772#define GET_GUINT16(elt) \
773 memcpy (&bytes, p, sizeof (guint16)); \
774 elt = GUINT16_FROM_BE (bytes); \
775 p += sizeof (guint16);
776
777 p = contents;
778 if (g_ascii_strncasecmp (s1: p, GTK_COMPOSE_TABLE_MAGIC,
779 n: strlen (GTK_COMPOSE_TABLE_MAGIC)) != 0)
780 {
781 g_warning ("The file is not a GtkComposeTable cache file %s", path);
782 goto out_load_cache;
783 }
784
785 p += strlen (GTK_COMPOSE_TABLE_MAGIC);
786 if (p - contents > total_length)
787 {
788 g_warning ("Broken cache content %s at head", path);
789 goto out_load_cache;
790 }
791
792 GET_GUINT16 (version);
793 if (version != GTK_COMPOSE_TABLE_VERSION)
794 {
795 if (version < GTK_COMPOSE_TABLE_VERSION)
796 *found_old_cache = TRUE;
797 goto out_load_cache;
798 }
799
800 GET_GUINT16 (max_seq_len);
801 GET_GUINT16 (n_index_size);
802 GET_GUINT16 (data_size);
803 GET_GUINT16 (n_chars);
804
805 if (max_seq_len == 0 || data_size == 0)
806 {
807 g_warning ("cache size is not correct %d %d", max_seq_len, data_size);
808 goto out_load_cache;
809 }
810
811 data = g_new0 (guint16, data_size);
812
813 for (i = 0; i < data_size; i++)
814 {
815 GET_GUINT16 (data[i]);
816 }
817
818 if (n_chars > 0)
819 {
820 char_data = g_new (char, n_chars + 1);
821 memcpy (dest: char_data, src: p, n: n_chars);
822 char_data[n_chars] = '\0';
823 }
824
825 retval = g_new0 (GtkComposeTable, 1);
826 retval->data = data;
827 retval->max_seq_len = max_seq_len;
828 retval->n_index_size = n_index_size;
829 retval->data_size = data_size;
830 retval->char_data = char_data;
831 retval->n_chars = n_chars;
832 retval->id = hash;
833
834 g_free (mem: contents);
835 g_free (mem: path);
836
837 return retval;
838
839#undef GET_GUINT16
840
841out_load_cache:
842 g_free (mem: data);
843 g_free (mem: char_data);
844 g_free (mem: contents);
845 g_free (mem: path);
846 return NULL;
847}
848
849static void
850gtk_compose_table_save_cache (GtkComposeTable *compose_table)
851{
852 char *path = NULL;
853 char *contents = NULL;
854 GError *error = NULL;
855 gsize length = 0;
856
857 if ((path = gtk_compose_hash_get_cache_path (hash: compose_table->id)) == NULL)
858 return;
859
860 contents = gtk_compose_table_serialize (compose_table, count: &length);
861 if (contents == NULL)
862 {
863 g_warning ("Failed to serialize compose table %s", path);
864 goto out_save_cache;
865 }
866 if (!g_file_set_contents (filename: path, contents, length, error: &error))
867 {
868 g_warning ("Failed to save compose table %s: %s", path, error->message);
869 g_error_free (error);
870 goto out_save_cache;
871 }
872
873out_save_cache:
874 g_free (mem: contents);
875 g_free (mem: path);
876}
877
878static GtkComposeTable *
879parser_get_compose_table (GtkComposeParser *parser)
880{
881 guint16 *data;
882 GtkComposeTable *table;
883 guint16 encoded_value;
884 GString *char_data;
885 int max_compose_len;
886 GList *sequences;
887 GList *list;
888 int i;
889 int size;
890 int n_first;
891 int first_pos;
892 int rest_pos;
893 int index_rowstride;
894 int n_sequences;
895 gunichar current_first;
896
897 parser_remove_duplicates (parser);
898
899 if (g_hash_table_size (hash_table: parser->sequences) == 0)
900 return NULL;
901
902 parser_compute_max_compose_len (parser, max_compose_len: &max_compose_len, n_first: &n_first, size: &size);
903
904 sequences = g_hash_table_get_keys (hash_table: parser->sequences);
905
906 sequences = g_list_sort_with_data (list: sequences,
907 compare_func: (GCompareDataFunc) sequence_compare,
908 NULL);
909
910 index_rowstride = max_compose_len + 1;
911 data = g_new0 (guint16, n_first * index_rowstride + size);
912
913 char_data = g_string_new (init: "");
914
915 n_sequences = 0;
916 current_first = 0;
917 first_pos = 0;
918 rest_pos = n_first * index_rowstride;
919
920 for (list = sequences; list != NULL; list = list->next)
921 {
922 gunichar *sequence = list->data;
923 char *value = g_hash_table_lookup (hash_table: parser->sequences, key: sequence);
924 int len = sequence_length (a: sequence);
925
926 g_assert (2 <= len && len <= max_compose_len);
927
928 /* Encode the value. If the value is a single
929 * character with a value smaller than 1 << 15,
930 * we just use it directly.
931 * Otherwise, we store the value as string and
932 * put the offset into the table, with the high
933 * bit set.
934 */
935 if (g_utf8_strlen (p: value, max: -1) == 1 &&
936 g_utf8_get_char (p: value) < 0x8000)
937 {
938 encoded_value = (guint16) g_utf8_get_char (p: value);
939 }
940 else
941 {
942 if (char_data->len > 0)
943 g_string_append_c (char_data, 0);
944
945 g_assert (char_data->len < 0x8000);
946
947 encoded_value = (guint16) (char_data->len | 0x8000);
948 g_string_append (string: char_data, val: value);
949 }
950
951 if (sequence[0] != current_first)
952 {
953 g_assert (sequence[0] <= 0xffff);
954 if (current_first != 0)
955 first_pos += index_rowstride;
956 current_first = (guint16)sequence[0];
957
958 data[first_pos] = (guint16)sequence[0];
959 for (i = 1; i < index_rowstride; i++)
960 data[first_pos + i] = rest_pos;
961 }
962
963 for (i = 1; i < len; i++)
964 {
965 g_assert (sequence[i] != 0);
966 g_assert (sequence[i] <= 0xffff);
967 data[rest_pos + i - 1] = (guint16) sequence[i];
968 }
969
970 g_assert (encoded_value != 0);
971 data[rest_pos + len - 1] = encoded_value;
972
973 n_sequences++;
974
975 rest_pos += len;
976
977 for (i = len; i <= max_compose_len; i++)
978 data[first_pos + i] = rest_pos;
979
980 for (i = 1; i < max_compose_len; i++)
981 g_assert (data[first_pos + i] <= data[first_pos + i + 1]);
982 }
983
984 g_assert (first_pos + index_rowstride == n_first * index_rowstride);
985 g_assert (rest_pos == n_first * index_rowstride + size);
986
987 if (char_data->len > 0)
988 g_string_append_c (char_data, 0);
989
990 table = g_new0 (GtkComposeTable, 1);
991 table->data = data;
992 table->data_size = n_first * index_rowstride + size;
993 table->max_seq_len = max_compose_len;
994 table->n_index_size = n_first;
995 table->n_chars = char_data->len;
996 table->char_data = g_string_free (string: char_data, FALSE);
997 table->n_sequences = n_sequences;
998 table->id = g_str_hash (v: parser->compose_file);
999
1000 g_list_free (list: sequences);
1001
1002 return table;
1003}
1004
1005static char *
1006canonicalize_filename (const char *parent_path,
1007 const char *path)
1008{
1009 GFile *file;
1010 char *retval;
1011
1012 if (path[0] != '/' && parent_path)
1013 {
1014 GFile *orig = g_file_new_for_path (path: parent_path);
1015 GFile *parent = g_file_get_parent (file: orig);
1016 file = g_file_resolve_relative_path (file: parent, relative_path: path);
1017 g_object_unref (object: parent);
1018 g_object_unref (object: orig);
1019 }
1020 else
1021 {
1022 file = g_file_new_for_path (path);
1023 }
1024
1025 retval = g_file_get_path (file);
1026
1027 g_object_unref (object: file);
1028
1029 return retval;
1030}
1031
1032static void
1033parser_parse_file (GtkComposeParser *parser,
1034 const char *compose_file)
1035{
1036 char *path;
1037
1038 // stash the name for the table hash
1039 if (parser->compose_file == NULL)
1040 parser->compose_file = compose_file;
1041
1042 path = canonicalize_filename (parent_path: parser->compose_file, path: compose_file);
1043
1044 if (g_list_find_custom (list: parser->files, data: path, func: (GCompareFunc)strcmp))
1045 {
1046 g_warning ("include cycle detected: %s", compose_file);
1047 g_free (mem: path);
1048 return;
1049 }
1050
1051 parser->files = g_list_prepend (list: parser->files, data: path);
1052
1053 parser_read_file (parser, compose_file: path);
1054
1055 parser->files = g_list_remove (list: parser->files, data: path);
1056}
1057
1058GtkComposeTable *
1059gtk_compose_table_parse (const char *compose_file,
1060 gboolean *found_include)
1061{
1062 GtkComposeParser *parser;
1063 GtkComposeTable *compose_table;
1064
1065 parser = parser_new ();
1066 parser_parse_file (parser, compose_file);
1067 compose_table = parser_get_compose_table (parser);
1068 if (found_include)
1069 *found_include = parser->found_include;
1070 parser_free (parser);
1071
1072 return compose_table;
1073}
1074
1075static gboolean
1076rewrite_compose_file (const char *compose_file)
1077{
1078 static const char *prefix =
1079 "# GTK has rewritten this file to add the line:\n"
1080 "\n"
1081 "include \"%L\"\n"
1082 "\n"
1083 "# This is necessary to add your own Compose sequences\n"
1084 "# in addition to the builtin sequences of GTK. If this\n"
1085 "# is not what you want, just remove that line.\n"
1086 "#\n"
1087 "# A backup of the previous file contents has been made.\n"
1088 "\n"
1089 "\n";
1090
1091 char *path = NULL;
1092 char *content = NULL;
1093 gsize content_len;
1094 GFile *file = NULL;
1095 GOutputStream *stream = NULL;
1096 gboolean ret = FALSE;
1097
1098 path = canonicalize_filename (NULL, path: compose_file);
1099
1100 if (!g_file_get_contents (filename: path, contents: &content, length: &content_len, NULL))
1101 goto out;
1102
1103 file = g_file_new_for_path (path);
1104 stream = G_OUTPUT_STREAM (g_file_replace (file, NULL, TRUE, 0, NULL, NULL));
1105
1106 if (stream == NULL)
1107 goto out;
1108
1109 if (!g_output_stream_write (stream, buffer: prefix, count: strlen (s: prefix), NULL, NULL))
1110 goto out;
1111
1112 if (!g_output_stream_write (stream, buffer: content, count: content_len, NULL, NULL))
1113 goto out;
1114
1115 if (!g_output_stream_close (stream, NULL, NULL))
1116 goto out;
1117
1118 ret = TRUE;
1119
1120out:
1121 g_clear_object (&stream);
1122 g_clear_object (&file);
1123 g_clear_pointer (&path, g_free);
1124 g_clear_pointer (&content, g_free);
1125
1126 return ret;
1127}
1128
1129GtkComposeTable *
1130gtk_compose_table_new_with_file (const char *compose_file)
1131{
1132 GtkComposeTable *compose_table;
1133 gboolean found_old_cache = FALSE;
1134 gboolean found_include = FALSE;
1135
1136 g_assert (compose_file != NULL);
1137
1138 compose_table = gtk_compose_table_load_cache (compose_file, found_old_cache: &found_old_cache);
1139 if (compose_table != NULL)
1140 return compose_table;
1141
1142parse:
1143 compose_table = gtk_compose_table_parse (compose_file, found_include: &found_include);
1144
1145 /* This is where we apply heuristics to avoid breaking users existing configurations
1146 * with the change to not always add the default sequences.
1147 *
1148 * If we find a cache that was generated before 4.4, and the Compose file
1149 * does not have an include, and doesn't contain so many sequences that it
1150 * is probably a copy of the system one, we take steps to keep things working,
1151 * and thell the user about it.
1152 */
1153 if (found_old_cache && !found_include && compose_table->n_sequences < 100)
1154 {
1155 if (rewrite_compose_file (compose_file))
1156 {
1157 g_warning ("\nSince GTK 4.4, Compose files replace the builtin\n"
1158 "compose sequences. To keep them and add your own\n"
1159 "sequences on top, the line:\n"
1160 "\n"
1161 " include \"%%L\"\n"
1162 "\n"
1163 "has been added to the Compose file\n%s.\n", compose_file);
1164 goto parse;
1165 }
1166 else
1167 {
1168 g_warning ("\nSince GTK 4.4, Compose files replace the builtin\n"
1169 "compose sequences. To keep them and add your own\n"
1170 "sequences on top, you need to add the line:\n"
1171 "\n"
1172 " include \"%%L\"\n"
1173 "\n"
1174 "to the Compose file\n%s.\n", compose_file);
1175 }
1176 }
1177
1178 if (compose_table != NULL)
1179 gtk_compose_table_save_cache (compose_table);
1180
1181 return compose_table;
1182}
1183
1184GtkComposeTable *
1185gtk_compose_table_new_with_data (const guint16 *data,
1186 int max_seq_len,
1187 int n_seqs)
1188{
1189 GtkComposeParser *parser;
1190 GtkComposeTable *compose_table;
1191 int i;
1192
1193 parser = parser_new ();
1194
1195 for (i = 0; i < n_seqs; i++)
1196 {
1197 const guint16 *seq = data + i * (max_seq_len + 2);
1198 guint16 *sequence;
1199 gunichar ch;
1200 char buf[8] = { 0, };
1201
1202 sequence = g_new0 (guint16, max_seq_len + 1);
1203 memcpy (dest: sequence, src: seq, n: sizeof (guint16) * max_seq_len);
1204
1205 ch = ((gunichar)seq[max_seq_len]) << 16 | (gunichar)seq[max_seq_len + 1];
1206 g_unichar_to_utf8 (c: ch, outbuf: buf);
1207
1208 g_hash_table_replace (hash_table: parser->sequences, key: sequence, value: g_strdup (str: buf));
1209 }
1210
1211 compose_table = parser_get_compose_table (parser);
1212 parser_free (parser);
1213
1214 return compose_table;
1215}
1216
1217static int
1218compare_seq (const void *key, const void *value)
1219{
1220 int i = 0;
1221 const guint *keysyms = key;
1222 const guint16 *seq = value;
1223
1224 while (keysyms[i])
1225 {
1226 if (keysyms[i] < seq[i])
1227 return -1;
1228 else if (keysyms[i] > seq[i])
1229 return 1;
1230
1231 i++;
1232 }
1233
1234 return 0;
1235}
1236
1237static int
1238compare_seq_index (const void *key, const void *value)
1239{
1240 const guint *keysyms = key;
1241 const guint16 *seq = value;
1242
1243 if (keysyms[0] < seq[0])
1244 return -1;
1245 else if (keysyms[0] > seq[0])
1246 return 1;
1247
1248 return 0;
1249}
1250
1251/*
1252 * gtk_compose_table_check:
1253 * @table: the table to check
1254 * @compose_buffer: the key vals to match
1255 * @n_compose: number of non-zero key vals in @compose_buffer
1256 * @compose_finish: (out): return location for whether there may be longer matches
1257 * @compose_match: (out): return location for whether there is a match
1258 * @output: (out) (caller-allocates): return location for the match values
1259 *
1260 * Looks for matches for a key sequence in @table.
1261 *
1262 * Returns: %TRUE if there were any matches, %FALSE otherwise
1263 */
1264gboolean
1265gtk_compose_table_check (const GtkComposeTable *table,
1266 const guint *compose_buffer,
1267 int n_compose,
1268 gboolean *compose_finish,
1269 gboolean *compose_match,
1270 GString *output)
1271{
1272 int row_stride;
1273 guint16 *seq_index;
1274 guint16 *seq;
1275 int i;
1276 gboolean match;
1277 gunichar value;
1278
1279 if (compose_finish)
1280 *compose_finish = FALSE;
1281 if (compose_match)
1282 *compose_match = FALSE;
1283
1284 /* Will never match, if the sequence in the compose buffer is longer
1285 * than the sequences in the table. Further, compare_seq (key, val)
1286 * will overrun val if key is longer than val.
1287 */
1288 if (n_compose > table->max_seq_len)
1289 return FALSE;
1290
1291 seq_index = bsearch (key: compose_buffer,
1292 base: table->data,
1293 nmemb: table->n_index_size,
1294 size: sizeof (guint16) * (table->max_seq_len + 1),
1295 compar: compare_seq_index);
1296
1297 if (!seq_index)
1298 return FALSE;
1299
1300 if (n_compose == 1)
1301 return TRUE;
1302
1303 seq = NULL;
1304 match = FALSE;
1305 value = 0;
1306
1307 for (i = n_compose - 1; i < table->max_seq_len; i++)
1308 {
1309 row_stride = i + 1;
1310
1311 if (seq_index[i + 1] - seq_index[i] > 0)
1312 {
1313 seq = bsearch (key: compose_buffer + 1,
1314 base: table->data + seq_index[i],
1315 nmemb: (seq_index[i + 1] - seq_index[i]) / row_stride,
1316 size: sizeof (guint16) * row_stride,
1317 compar: compare_seq);
1318
1319 if (seq)
1320 {
1321 if (i == n_compose - 1)
1322 {
1323 value = seq[row_stride - 1];
1324
1325 if ((value & (1 << 15)) != 0)
1326 g_string_append (string: output, val: &table->char_data[value & ~(1 << 15)]);
1327 else
1328 g_string_append_unichar (string: output, wc: value);
1329 match = TRUE;
1330 }
1331 else
1332 {
1333 if (match)
1334 {
1335 if (compose_match)
1336 *compose_match = TRUE;
1337 }
1338
1339 return TRUE;
1340 }
1341 }
1342 }
1343 }
1344
1345 if (match)
1346 {
1347 if (compose_match)
1348 *compose_match = TRUE;
1349 if (compose_finish)
1350 *compose_finish = TRUE;
1351
1352 return TRUE;
1353 }
1354
1355 return FALSE;
1356}
1357
1358void
1359gtk_compose_table_get_prefix (const GtkComposeTable *table,
1360 const guint *compose_buffer,
1361 int n_compose,
1362 int *prefix)
1363{
1364 int index_stride = table->max_seq_len + 1;
1365 int p = 0;
1366
1367 for (int idx = 0; idx < table->n_index_size; idx++)
1368 {
1369 const guint16 *seq_index = table->data + (idx * index_stride);
1370
1371 if (seq_index[0] == compose_buffer[0])
1372 {
1373 p = 1;
1374
1375 for (int i = 1; i < table->max_seq_len; i++)
1376 {
1377 int len = i + 1;
1378
1379 for (int j = seq_index[i]; j < seq_index[i + 1]; j += len)
1380 {
1381 int k;
1382
1383 for (k = 0; k < MIN (len, n_compose) - 1; k++)
1384 {
1385 if (compose_buffer[k + 1] != table->data[j + k])
1386 break;
1387 }
1388 p = MAX (p, k + 1);
1389 }
1390 }
1391
1392 break;
1393 }
1394 }
1395
1396 *prefix = p;
1397}
1398
1399void
1400gtk_compose_table_foreach (const GtkComposeTable *table,
1401 GtkComposeSequenceCallback callback,
1402 gpointer data)
1403{
1404 int index_stride = table->max_seq_len + 1;
1405 gunichar *sequence;
1406 int seqno;
1407
1408 sequence = g_new0 (gunichar, table->max_seq_len + 1);
1409
1410 seqno = 0;
1411 for (int idx = 0; idx < table->n_index_size; idx++)
1412 {
1413 const guint16 *seq_index = table->data + (idx * index_stride);
1414
1415 for (int i = 1; i < table->max_seq_len; i++)
1416 {
1417 int len = i + 1;
1418
1419 g_assert (seq_index[i] <= seq_index[i + 1]);
1420 g_assert (seq_index[i + 1] <= table->data_size);
1421 g_assert ((seq_index[i + 1] - seq_index[i]) % len == 0);
1422
1423 for (int j = seq_index[i]; j < seq_index[i + 1]; j += len)
1424 {
1425 char buf[8] = { 0, };
1426 guint16 encoded_value;
1427 char *value;
1428
1429 sequence[0] = seq_index[0];
1430 for (int k = 0; k < len - 1; k++)
1431 sequence[k + 1] = (gunichar) table->data[j + k];
1432 sequence[len] = 0;
1433
1434 encoded_value = table->data[j + len - 1];
1435 g_assert (encoded_value != 0);
1436 if ((encoded_value & (1 << 15)) != 0)
1437 {
1438 int char_offset = encoded_value & ~(1 << 15);
1439 g_assert (char_offset < table->n_chars);
1440 value = &table->char_data[char_offset];
1441 }
1442 else
1443 {
1444 g_unichar_to_utf8 (c: (gunichar)encoded_value, outbuf: buf);
1445 value = buf;
1446 }
1447
1448 callback (sequence, len, value, data);
1449 seqno++;
1450 }
1451 }
1452 }
1453
1454 g_free (mem: sequence);
1455}
1456
1457/* Checks if a keysym is a dead key.
1458 * Dead key keysym values are defined in ../gdk/gdkkeysyms.h and the
1459 * first is GDK_KEY_dead_grave. As X.Org is updated, more dead keys
1460 * are added and we need to update the upper limit.
1461 */
1462#define IS_DEAD_KEY(k) \
1463 ((k) >= GDK_KEY_dead_grave && (k) <= GDK_KEY_dead_greek)
1464
1465gboolean
1466gtk_check_algorithmically (const guint *compose_buffer,
1467 int n_compose,
1468 GString *output)
1469
1470{
1471 int i;
1472
1473 g_string_set_size (string: output, len: 0);
1474
1475 for (i = 0; i < n_compose && IS_DEAD_KEY (compose_buffer[i]); i++)
1476 ;
1477
1478 /* Can't combine if there's no base character: incomplete sequence */
1479 if (i == n_compose)
1480 return TRUE;
1481
1482 if (i > 0 && i == n_compose - 1)
1483 {
1484 GString *input;
1485 char *nfc;
1486 gunichar ch;
1487
1488 ch = gdk_keyval_to_unicode (keyval: compose_buffer[i]);
1489
1490 /* We don't allow combining with non-letters */
1491 if (!g_unichar_isalpha (c: ch))
1492 return FALSE;
1493
1494 input = g_string_sized_new (dfl_size: 4 * n_compose);
1495
1496 g_string_append_unichar (string: input, wc: ch);
1497
1498 i--;
1499 while (i >= 0)
1500 {
1501 switch (compose_buffer[i])
1502 {
1503#define CASE(keysym, unicode) \
1504 case GDK_KEY_dead_##keysym: g_string_append_unichar (input, unicode); break
1505
1506 CASE (grave, 0x0300);
1507 CASE (acute, 0x0301);
1508 CASE (circumflex, 0x0302);
1509 case GDK_KEY_dead_tilde:
1510 if (g_unichar_get_script (ch) == G_UNICODE_SCRIPT_GREEK)
1511 g_string_append_unichar (string: input, wc: 0x342); /* combining perispomeni */
1512 else
1513 g_string_append_unichar (string: input, wc: 0x303); /* combining tilde */
1514 break;
1515 CASE (macron, 0x0304);
1516 CASE (breve, 0x0306);
1517 CASE (abovedot, 0x0307);
1518 CASE (diaeresis, 0x0308);
1519 CASE (abovering, 0x30A);
1520 CASE (hook, 0x0309);
1521 CASE (doubleacute, 0x030B);
1522 CASE (caron, 0x030C);
1523 CASE (cedilla, 0x0327);
1524 CASE (ogonek, 0x0328); /* Legacy use for dasia, 0x314.*/
1525 CASE (iota, 0x0345);
1526 CASE (voiced_sound, 0x3099); /* Per Markus Kuhn keysyms.txt file. */
1527 CASE (semivoiced_sound, 0x309A); /* Per Markus Kuhn keysyms.txt file. */
1528 CASE (belowdot, 0x0323);
1529 CASE (horn, 0x031B); /* Legacy use for psili, 0x313 (or 0x343). */
1530 CASE (stroke, 0x335);
1531 CASE (abovecomma, 0x0313); /* Equivalent to psili */
1532 CASE (abovereversedcomma, 0x0314); /* Equivalent to dasia */
1533 CASE (doublegrave, 0x30F);
1534 CASE (belowring, 0x325);
1535 CASE (belowmacron, 0x331);
1536 CASE (belowcircumflex, 0x32D);
1537 CASE (belowtilde, 0x330);
1538 CASE (belowbreve, 0x32e);
1539 CASE (belowdiaeresis, 0x324);
1540 CASE (invertedbreve, 0x32f);
1541 CASE (belowcomma, 0x326);
1542 CASE (lowline, 0x332);
1543 CASE (aboveverticalline, 0x30D);
1544 CASE (belowverticalline, 0x329);
1545 CASE (longsolidusoverlay, 0x338);
1546 CASE (a, 0x363);
1547 CASE (A, 0x363);
1548 CASE (e, 0x364);
1549 CASE (E, 0x364);
1550 CASE (i, 0x365);
1551 CASE (I, 0x365);
1552 CASE (o, 0x366);
1553 CASE (O, 0x366);
1554 CASE (u, 0x367);
1555 CASE (U, 0x367);
1556 CASE (small_schwa, 0x1DEA);
1557 CASE (capital_schwa, 0x1DEA);
1558#undef CASE
1559 default:
1560 g_string_append_unichar (string: input, wc: gdk_keyval_to_unicode (keyval: compose_buffer[i]));
1561 }
1562 i--;
1563 }
1564
1565 nfc = g_utf8_normalize (str: input->str, len: input->len, mode: G_NORMALIZE_NFC);
1566
1567 g_string_assign (string: output, rval: nfc);
1568
1569 g_free (mem: nfc);
1570
1571 g_string_free (string: input, TRUE);
1572
1573 return TRUE;
1574 }
1575
1576 return FALSE;
1577}
1578

source code of gtk/gtk/gtkcomposetable.c