1/* gtkbuilderparser.c
2 * Copyright (C) 2019 Red Hat,
3 * Alexander Larsson <alexander.larsson@redhat.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with this library. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include "config.h"
20
21#include <gio/gio.h>
22#include "gtkbuilderprivate.h"
23#include "gtkbuilder.h"
24#include "gtkbuildableprivate.h"
25
26/***************************************** Record a GMarkup parser call ***************************/
27
28typedef enum
29{
30 RECORD_TYPE_ELEMENT,
31 RECORD_TYPE_END_ELEMENT,
32 RECORD_TYPE_TEXT,
33} RecordDataType;
34
35/* All strings are owned by the string chunk */
36typedef struct {
37 /* Must be first for g_slice_free_chain() */
38 GList link;
39
40 const char *string;
41 int len;
42 int count;
43 int offset;
44 int text_offset;
45 gboolean include_len;
46} RecordDataString;
47
48typedef struct {
49 RecordDataType type;
50 GList link;
51} RecordDataNode;
52
53typedef struct RecordDataElement RecordDataElement;
54
55struct RecordDataElement {
56 RecordDataNode base;
57
58 RecordDataElement *parent;
59 GQueue children;
60 int n_attributes;
61 gboolean preserve_whitespace;
62 RecordDataString *name;
63 RecordDataString *attributes[];
64};
65
66typedef struct {
67 RecordDataNode base;
68
69 RecordDataString *string;
70} RecordDataText;
71
72typedef struct {
73 GHashTable *strings;
74 GStringChunk *chunks;
75 GQueue string_list;
76 RecordDataElement *root;
77 RecordDataElement *current;
78} RecordData;
79
80static gpointer
81record_data_node_new (RecordDataElement *parent,
82 RecordDataType type,
83 gsize size)
84{
85 RecordDataNode *node = g_slice_alloc0 (block_size: size);
86
87 node->type = type;
88 node->link.data = node;
89
90 if (parent)
91 g_queue_push_tail_link (queue: &parent->children, link_: &node->link);
92
93 return node;
94}
95
96static gboolean
97text_is_important (const char *name)
98{
99 const char *elements[] = {
100 "property",
101 "attribute",
102 "col",
103 "action-widget",
104 "item",
105 "mime-type",
106 "pattern",
107 "suffix",
108 "mark",
109 NULL
110 };
111
112 return g_strv_contains (strv: elements, str: name);
113}
114
115static RecordDataElement *
116record_data_element_new (RecordDataElement *parent,
117 RecordDataString *name,
118 gsize n_attributes)
119{
120 RecordDataElement *element;
121
122 element = record_data_node_new (parent,
123 type: RECORD_TYPE_ELEMENT,
124 size: sizeof (RecordDataElement) +
125 sizeof (RecordDataString) * n_attributes);
126 element->parent = parent;
127 element->name = name;
128 element->preserve_whitespace = name && text_is_important (name: name->string);
129 element->n_attributes = n_attributes;
130
131 return element;
132}
133
134static void
135record_data_element_append_text (RecordDataElement *parent,
136 RecordDataString *string)
137{
138 RecordDataText *text;
139
140 text = record_data_node_new (parent,
141 type: RECORD_TYPE_TEXT,
142 size: sizeof (RecordDataText));
143 text->string = string;
144}
145
146static void
147record_data_node_free (RecordDataNode *node)
148{
149 GList *l, *next;
150 RecordDataText *text;
151 RecordDataElement *element;
152
153 switch (node->type)
154 {
155 case RECORD_TYPE_ELEMENT:
156 element = (RecordDataElement *)node;
157
158 l = element->children.head;
159 while (l)
160 {
161 next = l->next;
162 record_data_node_free (node: l->data);
163 l = next;
164 }
165
166 g_slice_free1 (block_size: sizeof (RecordDataElement) +
167 sizeof (RecordDataString) * element->n_attributes, mem_block: element);
168 break;
169 case RECORD_TYPE_TEXT:
170 text = (RecordDataText *)node;
171 g_slice_free (RecordDataText, text);
172 break;
173 case RECORD_TYPE_END_ELEMENT:
174 default:
175 g_assert_not_reached ();
176 }
177}
178
179static gboolean
180record_data_string_equal (gconstpointer _a,
181 gconstpointer _b)
182{
183 const RecordDataString *a = _a;
184 const RecordDataString *b = _b;
185
186 return a->len == b->len &&
187 memcmp (s1: a->string, s2: b->string, n: a->len) == 0;
188}
189
190/* Copied from g_bytes_hash() */
191static guint
192record_data_string_hash (gconstpointer _a)
193{
194 const RecordDataString *a = _a;
195 const signed char *p, *e;
196 guint32 h = 5381;
197
198 for (p = (signed char *)a->string, e = (signed char *)a->string + a->len; p != e; p++)
199 h = (h << 5) + h + *p;
200
201 return h;
202}
203
204static int
205record_data_string_compare (gconstpointer _a,
206 gconstpointer _b,
207 gpointer user_data)
208{
209 const RecordDataString *a = _a;
210 const RecordDataString *b = _b;
211
212 return b->count - a->count;
213}
214
215static RecordDataString *
216record_data_string_lookup (RecordData *data,
217 const char *str,
218 gssize len)
219{
220 RecordDataString *s, tmp;
221 gboolean include_len = len >= 0;
222
223 if (len < 0)
224 len = strlen (s: str);
225
226 tmp.string = str;
227 tmp.len = len;
228
229 s = g_hash_table_lookup (hash_table: data->strings, key: &tmp);
230 if (s)
231 {
232 s->count++;
233 s->include_len |= include_len;
234 return s;
235 }
236
237 s = g_slice_new (RecordDataString);
238 /* The string is zero terminated */
239 s->string = g_string_chunk_insert_len (chunk: data->chunks, string: str, len);
240 s->len = len;
241 s->count = 1;
242 s->include_len = include_len;
243 s->link.data = s;
244 s->link.next = NULL;
245 s->link.prev = NULL;
246
247 g_hash_table_add (hash_table: data->strings, key: s);
248 g_queue_push_tail_link (queue: &data->string_list, link_: &s->link);
249 return s;
250}
251
252static void
253record_start_element (GMarkupParseContext *context,
254 const char *element_name,
255 const char **names,
256 const char **values,
257 gpointer user_data,
258 GError **error)
259{
260 gsize n_attrs = g_strv_length (str_array: (char **)names);
261 RecordData *data = user_data;
262 RecordDataElement *child;
263 RecordDataString *name, **attr_names, **attr_values;
264 int i;
265
266 name = record_data_string_lookup (data, str: element_name, len: -1);
267 child = record_data_element_new (parent: data->current, name, n_attributes: n_attrs);
268 data->current = child;
269
270 attr_names = &child->attributes[0];
271 attr_values = &child->attributes[n_attrs];
272 for (i = 0; i < n_attrs; i++)
273 {
274 attr_names[i] = record_data_string_lookup (data, str: names[i], len: -1);
275 attr_values[i] = record_data_string_lookup (data, str: values[i], len: -1);
276 }
277}
278
279static void
280record_end_element (GMarkupParseContext *context,
281 const char *element_name,
282 gpointer user_data,
283 GError **error)
284{
285 RecordData *data = user_data;
286
287 data->current = data->current->parent;
288}
289
290static gboolean
291is_whitespace (const char *text,
292 gsize text_len)
293{
294 const char *end;
295 const char *p;
296
297 end = text + text_len;
298 for (p = text; p < end; p = g_utf8_next_char (p))
299 {
300 if (!g_unichar_isspace (c: g_utf8_get_char (p)))
301 return FALSE;
302 }
303
304 return TRUE;
305}
306
307static void
308record_text (GMarkupParseContext *context,
309 const char *text,
310 gsize text_len,
311 gpointer user_data,
312 GError **error)
313{
314 RecordData *data = user_data;
315 RecordDataString *string;
316
317 if (text_len == 0)
318 return;
319
320 if (!data->current->preserve_whitespace && is_whitespace (text, text_len))
321 return;
322
323 string = record_data_string_lookup (data, str: text, len: text_len);
324 record_data_element_append_text (parent: data->current, string);
325}
326
327static const GMarkupParser record_parser =
328{
329 record_start_element,
330 record_end_element,
331 record_text,
332 NULL, // passthrough, not stored
333 NULL, // error, fails immediately
334};
335
336static void
337marshal_uint32 (GString *str,
338 guint32 v)
339{
340 /*
341 We encode in a variable length format similar to
342 utf8:
343
344 v size byte 1 byte 2 byte 3 byte 4 byte 5
345 7 bit: 0xxxxxxx
346 14 bit: 10xxxxxx xxxxxxxx
347 21 bit: 110xxxxx xxxxxxxx xxxxxxxx
348 28 bit: 1110xxxx xxxxxxxx xxxxxxxx xxxxxxxx
349 32 bit: 11110000 xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxx
350 */
351
352 if (v < 128)
353 {
354 g_string_append_c (str, (guchar)v);
355 }
356 else if (v < (1<<14))
357 {
358 g_string_append_c (str, (guchar)(v >> 8) | 0x80);
359 g_string_append_c (str, (guchar)(v & 0xff));
360 }
361 else if (v < (1<<21))
362 {
363 g_string_append_c (str, (guchar)(v >> 16) | 0xc0);
364 g_string_append_c (str, (guchar)((v >> 8) & 0xff));
365 g_string_append_c (str, (guchar)(v & 0xff));
366 }
367 else if (v < (1<<28))
368 {
369 g_string_append_c (str, (guchar)(v >> 24) | 0xe0);
370 g_string_append_c (str, (guchar)((v >> 16) & 0xff));
371 g_string_append_c (str, (guchar)((v >> 8) & 0xff));
372 g_string_append_c (str, (guchar)(v & 0xff));
373 }
374 else
375 {
376 g_string_append_c (str, 0xf0);
377 g_string_append_c (str, (guchar)((v >> 24) & 0xff));
378 g_string_append_c (str, (guchar)((v >> 16) & 0xff));
379 g_string_append_c (str, (guchar)((v >> 8) & 0xff));
380 g_string_append_c (str, (guchar)(v & 0xff));
381 }
382}
383
384static int
385marshal_uint32_len (guint32 v)
386{
387 if (v < 128)
388 return 1;
389
390 if (v < (1<<14))
391 return 2;
392
393 if (v < (1<<21))
394 return 3;
395
396 if (v < (1<<28))
397 return 4;
398
399 return 5;
400}
401
402static void
403marshal_tree (GString *marshaled,
404 RecordDataNode *node)
405{
406 GList *l;
407 int i;
408 RecordDataText *text;
409 RecordDataElement *element;
410 RecordDataString **attr_names, **attr_values;
411
412 switch (node->type)
413 {
414 case RECORD_TYPE_ELEMENT:
415 element = (RecordDataElement *)node;
416
417 marshal_uint32 (str: marshaled, v: RECORD_TYPE_ELEMENT);
418 marshal_uint32 (str: marshaled, v: element->name->offset);
419 marshal_uint32 (str: marshaled, v: element->n_attributes);
420
421 attr_names = &element->attributes[0];
422 attr_values = &element->attributes[element->n_attributes];
423 for (i = 0; i < element->n_attributes; i++)
424 {
425 marshal_uint32 (str: marshaled, v: attr_names[i]->offset);
426 marshal_uint32 (str: marshaled, v: attr_values[i]->offset);
427 }
428
429 for (l = element->children.head; l != NULL; l = l->next)
430 marshal_tree (marshaled, node: l->data);
431
432 marshal_uint32 (str: marshaled, v: RECORD_TYPE_END_ELEMENT);
433 break;
434 case RECORD_TYPE_TEXT:
435 text = (RecordDataText *)node;
436 marshal_uint32 (str: marshaled, v: RECORD_TYPE_TEXT);
437 marshal_uint32 (str: marshaled, v: text->string->text_offset);
438 break;
439 case RECORD_TYPE_END_ELEMENT:
440 default:
441 g_assert_not_reached ();
442 }
443}
444
445static void
446marshal_root (GString *marshaled,
447 RecordDataNode *node)
448{
449 GList *l;
450 RecordDataElement *element = (RecordDataElement *)node;
451
452 for (l = element->children.head; l != NULL; l = l->next)
453 marshal_tree (marshaled, node: l->data);
454}
455
456/**
457 * _gtk_buildable_parser_precompile:
458 * @text: chunk of text to parse
459 * @text_len: length of @text in bytes
460 *
461 * Converts the xml format typically used by GtkBuilder to a
462 * binary form that is more efficient to parse. This is a custom
463 * format that is only supported by GtkBuilder.
464 *
465 * returns: A `GBytes` with the precompiled data
466 **/
467GBytes *
468_gtk_buildable_parser_precompile (const char *text,
469 gssize text_len,
470 GError **error)
471{
472 GMarkupParseContext *ctx;
473 RecordData data = { 0 };
474 GList *l;
475 GString *marshaled;
476 int offset;
477
478 data.strings = g_hash_table_new (hash_func: record_data_string_hash, key_equal_func: record_data_string_equal);
479 data.chunks = g_string_chunk_new (size: 512);
480 data.root = record_data_element_new (NULL, NULL, n_attributes: 0);
481 data.current = data.root;
482
483 ctx = g_markup_parse_context_new (parser: &record_parser, flags: G_MARKUP_TREAT_CDATA_AS_TEXT, user_data: &data, NULL);
484
485 if (!g_markup_parse_context_parse (context: ctx, text, text_len, error) ||
486 !g_markup_parse_context_end_parse (context: ctx, error))
487 {
488 record_data_node_free (node: &data.root->base);
489 g_string_chunk_free (chunk: data.chunks);
490 g_hash_table_destroy (hash_table: data.strings);
491 g_markup_parse_context_free (context: ctx);
492 return NULL;
493 }
494
495 g_markup_parse_context_free (context: ctx);
496
497 g_queue_sort (queue: &data.string_list, compare_func: record_data_string_compare, NULL);
498
499 offset = 0;
500 for (l = data.string_list.head; l != NULL; l = l->next)
501 {
502 RecordDataString *s = l->data;
503
504 if (s->include_len)
505 {
506 s->text_offset = offset;
507 offset += marshal_uint32_len (v: s->len);
508 }
509
510 s->offset = offset;
511 offset += s->len + 1;
512 }
513
514 marshaled = g_string_sized_new (dfl_size: 4 + offset + 32);
515 /* Magic marker */
516 g_string_append_len (string: marshaled, val: "GBU\0", len: 4);
517 marshal_uint32 (str: marshaled, v: offset);
518
519 for (l = data.string_list.head; l != NULL; l = l->next)
520 {
521 RecordDataString *s = l->data;
522
523 if (s->include_len)
524 marshal_uint32 (str: marshaled, v: s->len);
525
526 g_string_append_len (string: marshaled, val: s->string, len: s->len + 1);
527 }
528
529 marshal_root (marshaled, node: &data.root->base);
530
531 g_slice_free_chain (RecordDataString,
532 (RecordDataString *)data.string_list.head,
533 link.next);
534 record_data_node_free (node: &data.root->base);
535 g_string_chunk_free (chunk: data.chunks);
536 g_hash_table_destroy (hash_table: data.strings);
537
538 return g_string_free_to_bytes (string: marshaled);
539}
540
541/***************************************** Replay GMarkup parser callbacks ***************************/
542
543static guint32
544demarshal_uint32 (const char **tree)
545{
546 const guchar *p = (const guchar *)*tree;
547 guchar c = *p;
548 /* see marshal_uint32 for format */
549
550 if (c < 128) /* 7 bit */
551 {
552 *tree += 1;
553 return c;
554 }
555 else if ((c & 0xc0) == 0x80) /* 14 bit */
556 {
557 *tree += 2;
558 return (c & 0x3f) << 8 | p[1];
559 }
560 else if ((c & 0xe0) == 0xc0) /* 21 bit */
561 {
562 *tree += 3;
563 return (c & 0x1f) << 16 | p[1] << 8 | p[2];
564 }
565 else if ((c & 0xf0) == 0xe0) /* 28 bit */
566 {
567 *tree += 4;
568 return (c & 0xf) << 24 | p[1] << 16 | p[2] << 8 | p[3];
569 }
570 else
571 {
572 *tree += 5;
573 return p[1] << 24 | p[2] << 16 | p[3] << 8 | p[4];
574 }
575}
576
577static const char *
578demarshal_string (const char **tree,
579 const char *strings)
580{
581 guint32 offset = demarshal_uint32 (tree);
582
583 return strings + offset;
584}
585
586static const char *
587demarshal_text (const char **tree,
588 const char *strings,
589 guint32 *len)
590{
591 guint32 offset = demarshal_uint32 (tree);
592 const char *str = strings + offset;
593
594 *len = demarshal_uint32 (tree: &str);
595 return str;
596}
597
598static void
599propagate_error (GtkBuildableParseContext *context,
600 GError **dest,
601 GError *src)
602{
603 (*context->internal_callbacks->error) (NULL, src, context);
604 g_propagate_error (dest, src);
605}
606
607static gboolean
608replay_start_element (GtkBuildableParseContext *context,
609 const char **tree,
610 const char *strings,
611 GError **error)
612{
613 const char *element_name;
614 guint32 i, n_attrs;
615 const char **attr_names;
616 const char **attr_values;
617 GError *tmp_error = NULL;
618
619 element_name = demarshal_string (tree, strings);
620 n_attrs = demarshal_uint32 (tree);
621
622 attr_names = g_newa (const char *, n_attrs + 1);
623 attr_values = g_newa (const char *, n_attrs + 1);
624 for (i = 0; i < n_attrs; i++)
625 {
626 attr_names[i] = demarshal_string (tree, strings);
627 attr_values[i] = demarshal_string (tree, strings);
628 }
629 attr_names[i] = NULL;
630 attr_values[i] = NULL;
631
632 (* context->internal_callbacks->start_element) (NULL,
633 element_name,
634 attr_names,
635 attr_values,
636 context,
637 &tmp_error);
638
639 if (tmp_error)
640 {
641 propagate_error (context, dest: error, src: tmp_error);
642 return FALSE;
643 }
644
645 return TRUE;
646}
647
648static gboolean
649replay_end_element (GtkBuildableParseContext *context,
650 const char **tree,
651 const char *strings,
652 GError **error)
653{
654 GError *tmp_error = NULL;
655
656 (* context->internal_callbacks->end_element) (NULL,
657 gtk_buildable_parse_context_get_element (context),
658 context,
659 &tmp_error);
660 if (tmp_error)
661 {
662 propagate_error (context, dest: error, src: tmp_error);
663 return FALSE;
664 }
665
666 return TRUE;
667}
668
669static gboolean
670replay_text (GtkBuildableParseContext *context,
671 const char **tree,
672 const char *strings,
673 GError **error)
674{
675 guint32 len;
676 const char *text;
677 GError *tmp_error = NULL;
678
679 text = demarshal_text (tree, strings, len: &len);
680
681 (*context->internal_callbacks->text) (NULL,
682 text,
683 len,
684 context,
685 &tmp_error);
686
687 if (tmp_error)
688 {
689 propagate_error (context, dest: error, src: tmp_error);
690 return FALSE;
691 }
692
693 return TRUE;
694}
695
696gboolean
697_gtk_buildable_parser_is_precompiled (const char *data,
698 gssize data_len)
699{
700 return
701 data_len > 4 &&
702 data[0] == 'G' &&
703 data[1] == 'B' &&
704 data[2] == 'U' &&
705 data[3] == 0;
706}
707
708gboolean
709_gtk_buildable_parser_replay_precompiled (GtkBuildableParseContext *context,
710 const char *data,
711 gssize data_len,
712 GError **error)
713{
714 const char *data_end = data + data_len;
715 guint32 type, len;
716 const char *strings;
717 const char *tree;
718
719 data = data + 4; /* Skip header */
720
721 len = demarshal_uint32 (tree: &data);
722
723 strings = data;
724 data = data + len;
725 tree = data;
726
727 while (tree < data_end)
728 {
729 gboolean res;
730 type = demarshal_uint32 (tree: &tree);
731
732 switch (type)
733 {
734 case RECORD_TYPE_ELEMENT:
735 res = replay_start_element (context, tree: &tree, strings, error);
736 break;
737 case RECORD_TYPE_END_ELEMENT:
738 res = replay_end_element (context, tree: &tree, strings, error);
739 break;
740 case RECORD_TYPE_TEXT:
741 res = replay_text (context, tree: &tree, strings, error);
742 break;
743 default:
744 g_assert_not_reached ();
745 }
746
747 if (!res)
748 return FALSE;
749 }
750
751 return TRUE;
752}
753

source code of gtk/gtk/gtkbuilderprecompile.c