gmarkup.c source code [gtk/subprojects/glib/glib/gmarkup.c]

1	/ gmarkup.c - Simple XML-like parser*
2	*
3	* Copyright 2000, 2003 Red Hat, Inc.
4	* Copyright 2007, 2008 Ryan Lortie <desrt@desrt.ca>
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2.1 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public License
17	* along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "config.h"
21
22	#include <stdarg.h>
23	#include <string.h>
24	#include <stdio.h>
25	#include <stdlib.h>
26	#include <errno.h>
27
28	#include "gmarkup.h"
29
30	#include "gatomic.h"
31	#include "gslice.h"
32	#include "galloca.h"
33	#include "gstrfuncs.h"
34	#include "gstring.h"
35	#include "gtestutils.h"
36	#include "glibintl.h"
37	#include "gthread.h"
38
39	/**
40	* SECTION:markup
41	* @Title: Simple XML Subset Parser
42	* @Short_description: parses a subset of XML
43	* @See_also: [XML Specification](http://www.w3.org/TR/REC-xml/)
44	*
45	* The "GMarkup" parser is intended to parse a simple markup format
46	* that's a subset of XML. This is a small, efficient, easy-to-use
47	* parser. It should not be used if you expect to interoperate with
48	* other applications generating full-scale XML, and must not be used if you
49	* expect to parse untrusted input. However, it's very
50	* useful for application data files, config files, etc. where you
51	* know your application will be the only one writing the file.
52	* Full-scale XML parsers should be able to parse the subset used by
53	* GMarkup, so you can easily migrate to full-scale XML at a later
54	* time if the need arises.
55	*
56	* GMarkup is not guaranteed to signal an error on all invalid XML;
57	* the parser may accept documents that an XML parser would not.
58	* However, XML documents which are not well-formed (which is a
59	* weaker condition than being valid. See the
60	* [XML specification](http://www.w3.org/TR/REC-xml/)
61	* for definitions of these terms.) are not considered valid GMarkup
62	* documents.
63	*
64	* Simplifications to XML include:
65	*
66	* - Only UTF-8 encoding is allowed
67	*
68	* - No user-defined entities
69	*
70	* - Processing instructions, comments and the doctype declaration
71	* are "passed through" but are not interpreted in any way
72	*
73	* - No DTD or validation
74	*
75	* The markup format does support:
76	*
77	* - Elements
78	*
79	* - Attributes
80	*
81	* - 5 standard entities: & < > " '
82	*
83	* - Character references
84	*
85	* - Sections marked as CDATA
86	*/
87
88	G_DEFINE_QUARK (g-markup-error-quark, g_markup_error)
89
90	typedef enum
91	{
92	STATE_START,
93	STATE_AFTER_OPEN_ANGLE,
94	STATE_AFTER_CLOSE_ANGLE,
95	STATE_AFTER_ELISION_SLASH, / the slash that obviates need for end element /
96	STATE_INSIDE_OPEN_TAG_NAME,
97	STATE_INSIDE_ATTRIBUTE_NAME,
98	STATE_AFTER_ATTRIBUTE_NAME,
99	STATE_BETWEEN_ATTRIBUTES,
100	STATE_AFTER_ATTRIBUTE_EQUALS_SIGN,
101	STATE_INSIDE_ATTRIBUTE_VALUE_SQ,
102	STATE_INSIDE_ATTRIBUTE_VALUE_DQ,
103	STATE_INSIDE_TEXT,
104	STATE_AFTER_CLOSE_TAG_SLASH,
105	STATE_INSIDE_CLOSE_TAG_NAME,
106	STATE_AFTER_CLOSE_TAG_NAME,
107	STATE_INSIDE_PASSTHROUGH,
108	STATE_ERROR
109	} GMarkupParseState;
110
111	typedef struct
112	{
113	const char *prev_element;
114	const GMarkupParser *prev_parser;
115	gpointer prev_user_data;
116	} GMarkupRecursionTracker;
117
118	struct _GMarkupParseContext
119	{
120	const GMarkupParser *parser;
121
122	gint ref_count; / (atomic) /
123
124	GMarkupParseFlags flags;
125
126	gint line_number;
127	gint char_number;
128
129	GMarkupParseState state;
130
131	gpointer user_data;
132	GDestroyNotify dnotify;
133
134	/ A piece of character data or an element that*
135	* hasn't "ended" yet so we haven't yet called
136	* the callback for it.
137	*/
138	GString *partial_chunk;
139	GSList *spare_chunks;
140
141	GSList *tag_stack;
142	GSList *tag_stack_gstr;
143	GSList *spare_list_nodes;
144
145	GString **attr_names;
146	GString **attr_values;
147	gint cur_attr;
148	gint alloc_attrs;
149
150	const gchar *current_text;
151	gssize current_text_len;
152	const gchar *current_text_end;
153
154	/ used to save the start of the last interesting thingy /
155	const gchar *start;
156
157	const gchar *iter;
158
159	guint document_empty : `1`;
160	guint parsing : `1`;
161	guint awaiting_pop : `1`;
162	gint balance;
163
164	/ subparser support /
165	GSList subparser_stack; /* (GMarkupRecursionTracker ) /*
166	const char *subparser_element;
167	gpointer held_user_data;
168	};
169
170	/*
171	* Helpers to reduce our allocation overhead, we have
172	* a well defined allocation lifecycle.
173	*/
174	static GSList *
175	get_list_node (GMarkupParseContext *context, gpointer data)
176	{
177	GSList *node;
178	if (context->spare_list_nodes != NULL)
179	{
180	node = context->spare_list_nodes;
181	context->spare_list_nodes = g_slist_remove_link (list: context->spare_list_nodes, link_: node);
182	}
183	else
184	node = g_slist_alloc();
185	node->data = data;
186	return node;
187	}
188
189	static void
190	free_list_node (GMarkupParseContext context, GSList node)
191	{
192	node->data = NULL;
193	context->spare_list_nodes = g_slist_concat (list1: node, list2: context->spare_list_nodes);
194	}
195
196	static inline void
197	string_blank (GString *string)
198	{
199	string->str[`0`] = `'\0'`;
200	string->len = `0`;
201	}
202
203	/**
204	* g_markup_parse_context_new:
205	* @parser: a #GMarkupParser
206	* @flags: one or more #GMarkupParseFlags
207	* @user_data: user data to pass to #GMarkupParser functions
208	* @user_data_dnotify: user data destroy notifier called when
209	* the parse context is freed
210	*
211	* Creates a new parse context. A parse context is used to parse
212	* marked-up documents. You can feed any number of documents into
213	* a context, as long as no errors occur; once an error occurs,
214	* the parse context can't continue to parse text (you have to
215	* free it and create a new parse context).
216	*
217	* Returns: a new #GMarkupParseContext
218	**/
219	GMarkupParseContext *
220	g_markup_parse_context_new (const GMarkupParser *parser,
221	GMarkupParseFlags flags,
222	gpointer user_data,
223	GDestroyNotify user_data_dnotify)
224	{
225	GMarkupParseContext *context;
226
227	g_return_val_if_fail (parser != NULL, NULL);
228
229	context = g_new (GMarkupParseContext, `1`);
230
231	context->ref_count = `1`;
232	context->parser = parser;
233	context->flags = flags;
234	context->user_data = user_data;
235	context->dnotify = user_data_dnotify;
236
237	context->line_number = `1`;
238	context->char_number = `1`;
239
240	context->partial_chunk = NULL;
241	context->spare_chunks = NULL;
242	context->spare_list_nodes = NULL;
243
244	context->state = STATE_START;
245	context->tag_stack = NULL;
246	context->tag_stack_gstr = NULL;
247	context->attr_names = NULL;
248	context->attr_values = NULL;
249	context->cur_attr = -`1`;
250	context->alloc_attrs = `0`;
251
252	context->current_text = NULL;
253	context->current_text_len = -`1`;
254	context->current_text_end = NULL;
255
256	context->start = NULL;
257	context->iter = NULL;
258
259	context->document_empty = TRUE;
260	context->parsing = FALSE;
261
262	context->awaiting_pop = FALSE;
263	context->subparser_stack = NULL;
264	context->subparser_element = NULL;
265
266	/ this is only looked at if awaiting_pop = TRUE. initialise anyway. /
267	context->held_user_data = NULL;
268
269	context->balance = `0`;
270
271	return context;
272	}
273
274	/**
275	* g_markup_parse_context_ref:
276	* @context: a #GMarkupParseContext
277	*
278	* Increases the reference count of @context.
279	*
280	* Returns: the same @context
281	*
282	* Since: 2.36
283	**/
284	GMarkupParseContext *
285	g_markup_parse_context_ref (GMarkupParseContext *context)
286	{
287	g_return_val_if_fail (context != NULL, NULL);
288	g_return_val_if_fail (context->ref_count > `0`, NULL);
289
290	g_atomic_int_inc (&context->ref_count);
291
292	return context;
293	}
294
295	/**
296	* g_markup_parse_context_unref:
297	* @context: a #GMarkupParseContext
298	*
299	* Decreases the reference count of @context. When its reference count
300	* drops to 0, it is freed.
301	*
302	* Since: 2.36
303	**/
304	void
305	g_markup_parse_context_unref (GMarkupParseContext *context)
306	{
307	g_return_if_fail (context != NULL);
308	g_return_if_fail (context->ref_count > `0`);
309
310	if (g_atomic_int_dec_and_test (&context->ref_count))
311	g_markup_parse_context_free (context);
312	}
313
314	static void
315	string_full_free (gpointer ptr)
316	{
317	g_string_free (string: ptr, TRUE);
318	}
319
320	static void clear_attributes (GMarkupParseContext *context);
321
322	/**
323	* g_markup_parse_context_free:
324	* @context: a #GMarkupParseContext
325	*
326	* Frees a #GMarkupParseContext.
327	*
328	* This function can't be called from inside one of the
329	* #GMarkupParser functions or while a subparser is pushed.
330	*/
331	void
332	g_markup_parse_context_free (GMarkupParseContext *context)
333	{
334	g_return_if_fail (context != NULL);
335	g_return_if_fail (!context->parsing);
336	g_return_if_fail (!context->subparser_stack);
337	g_return_if_fail (!context->awaiting_pop);
338
339	if (context->dnotify)
340	(* context->dnotify) (context->user_data);
341
342	clear_attributes (context);
343	g_free (mem: context->attr_names);
344	g_free (mem: context->attr_values);
345
346	g_slist_free_full (list: context->tag_stack_gstr, free_func: string_full_free);
347	g_slist_free (list: context->tag_stack);
348
349	g_slist_free_full (list: context->spare_chunks, free_func: string_full_free);
350	g_slist_free (list: context->spare_list_nodes);
351
352	if (context->partial_chunk)
353	g_string_free (string: context->partial_chunk, TRUE);
354
355	g_free (mem: context);
356	}
357
358	static void pop_subparser_stack (GMarkupParseContext *context);
359
360	static void
361	mark_error (GMarkupParseContext *context,
362	GError *error)
363	{
364	context->state = STATE_ERROR;
365
366	if (context->parser->error)
367	(*context->parser->error) (context, error, context->user_data);
368
369	/ report the error all the way up to free all the user-data /
370	while (context->subparser_stack)
371	{
372	pop_subparser_stack (context);
373	context->awaiting_pop = FALSE; / already been freed /
374
375	if (context->parser->error)
376	(*context->parser->error) (context, error, context->user_data);
377	}
378	}
379
380	static void
381	set_error (GMarkupParseContext *context,
382	GError **error,
383	GMarkupError code,
384	const gchar *format,
385	...) G_GNUC_PRINTF (`4`, `5`);
386
387	static void
388	set_error_literal (GMarkupParseContext *context,
389	GError **error,
390	GMarkupError code,
391	const gchar *message)
392	{
393	GError *tmp_error;
394
395	tmp_error = g_error_new_literal (G_MARKUP_ERROR, code, message);
396
397	g_prefix_error (err: &tmp_error,
398	_("Error on line %d char %d: "),
399	context->line_number,
400	context->char_number);
401
402	mark_error (context, error: tmp_error);
403
404	g_propagate_error (dest: error, src: tmp_error);
405	}
406
407	G_GNUC_PRINTF(`4`, `5`)
408	static void
409	set_error (GMarkupParseContext *context,
410	GError **error,
411	GMarkupError code,
412	const gchar *format,
413	...)
414	{
415	gchar *s;
416	gchar *s_valid;
417	va_list args;
418
419	va_start (args, format);
420	s = g_strdup_vprintf (format, args);
421	va_end (args);
422
423	/ Make sure that the GError message is valid UTF-8*
424	* even if it is complaining about invalid UTF-8 in the markup
425	*/
426	s_valid = g_utf8_make_valid (str: s, len: -`1`);
427	set_error_literal (context, error, code, message: s);
428
429	g_free (mem: s);
430	g_free (mem: s_valid);
431	}
432
433	static void
434	propagate_error (GMarkupParseContext *context,
435	GError **dest,
436	GError *src)
437	{
438	if (context->flags & G_MARKUP_PREFIX_ERROR_POSITION)
439	g_prefix_error (err: &src,
440	_("Error on line %d char %d: "),
441	context->line_number,
442	context->char_number);
443
444	mark_error (context, error: src);
445
446	g_propagate_error (dest, src);
447	}
448
449	#define IS_COMMON_NAME_END_CHAR(c) \
450	((c) == '=' \|\| (c) == '/' \|\| (c) == '>' \|\| (c) == ' ')
451
452	static gboolean
453	slow_name_validate (GMarkupParseContext *context,
454	const gchar *name,
455	GError **error)
456	{
457	const gchar *p = name;
458
459	if (!g_utf8_validate (str: name, max_len: -`1`, NULL))
460	{
461	set_error (context, error, code: G_MARKUP_ERROR_BAD_UTF8,
462	_("Invalid UTF-8 encoded text in name — not valid “%s”"), name);
463	return FALSE;
464	}
465
466	if (!(g_ascii_isalpha (*p) \|\|
467	(!IS_COMMON_NAME_END_CHAR (*p) &&
468	(*p == `'_'` \|\|
469	*p == `':'` \|\|
470	g_unichar_isalpha (c: g_utf8_get_char (p))))))
471	{
472	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
473	_("“%s” is not a valid name"), name);
474	return FALSE;
475	}
476
477	for (p = g_utf8_next_char (name); *p != `'\0'`; p = g_utf8_next_char (p))
478	{
479	/ is_name_char /
480	if (!(g_ascii_isalnum (*p) \|\|
481	(!IS_COMMON_NAME_END_CHAR (*p) &&
482	(*p == `'.'` \|\|
483	*p == `'-'` \|\|
484	*p == `'_'` \|\|
485	*p == `':'` \|\|
486	g_unichar_isalpha (c: g_utf8_get_char (p))))))
487	{
488	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
489	_("“%s” is not a valid name: “%c”"), name, *p);
490	return FALSE;
491	}
492	}
493	return TRUE;
494	}
495
496	/*
497	* Use me for elements, attributes etc.
498	*/
499	static gboolean
500	name_validate (GMarkupParseContext *context,
501	const gchar *name,
502	GError **error)
503	{
504	char mask;
505	const char *p;
506
507	/ name start char /
508	p = name;
509	if (G_UNLIKELY (IS_COMMON_NAME_END_CHAR (*p) \|\|
510	!(g_ascii_isalpha (p) \|\| p == `'_'` \|\| *p == `':'`)))
511	goto slow_validate;
512
513	for (mask = p++; p != `'\0'`; p++)
514	{
515	mask \|= *p;
516
517	/ is_name_char /
518	if (G_UNLIKELY (!(g_ascii_isalnum (*p) \|\|
519	(!IS_COMMON_NAME_END_CHAR (*p) &&
520	(*p == `'.'` \|\|
521	*p == `'-'` \|\|
522	*p == `'_'` \|\|
523	*p == `':'`)))))
524	goto slow_validate;
525	}
526
527	if (mask & `0x80`) / un-common / non-ascii /
528	goto slow_validate;
529
530	return TRUE;
531
532	slow_validate:
533	return slow_name_validate (context, name, error);
534	}
535
536	static gboolean
537	text_validate (GMarkupParseContext *context,
538	const gchar *p,
539	gint len,
540	GError **error)
541	{
542	if (!g_utf8_validate_len (str: p, max_len: len, NULL))
543	{
544	set_error (context, error, code: G_MARKUP_ERROR_BAD_UTF8,
545	_("Invalid UTF-8 encoded text in name — not valid “%s”"), p);
546	return FALSE;
547	}
548	else
549	return TRUE;
550	}
551
552	static gchar*
553	char_str (gunichar c,
554	gchar *buf)
555	{
556	memset (s: buf, c: `0`, n: `8`);
557	g_unichar_to_utf8 (c, outbuf: buf);
558	return buf;
559	}
560
561	/ Format the next UTF-8 character as a gchar* for printing in error output*
562	* when we encounter a syntax error. This correctly handles invalid UTF-8,
563	* emitting it as hex escapes. */
564	static gchar*
565	utf8_str (const gchar *utf8,
566	gsize max_len,
567	gchar *buf)
568	{
569	gunichar c = g_utf8_get_char_validated (p: utf8, max_len);
570	if (c == (gunichar) -`1` \|\| c == (gunichar) -`2`)
571	{
572	guchar ch = (max_len > `0`) ? (guchar) *utf8 : `0`;
573	gchar *temp = g_strdup_printf (format: "\\x%02x", (guint) ch);
574	memset (s: buf, c: `0`, n: `8`);
575	memcpy (dest: buf, src: temp, n: strlen (s: temp));
576	g_free (mem: temp);
577	}
578	else
579	char_str (c, buf);
580	return buf;
581	}
582
583	G_GNUC_PRINTF(`5`, `6`)
584	static void
585	set_unescape_error (GMarkupParseContext *context,
586	GError **error,
587	const gchar *remaining_text,
588	GMarkupError code,
589	const gchar *format,
590	...)
591	{
592	GError *tmp_error;
593	gchar *s;
594	va_list args;
595	gint remaining_newlines;
596	const gchar *p;
597
598	remaining_newlines = `0`;
599	p = remaining_text;
600	while (*p != `'\0'`)
601	{
602	if (*p == `'\n'`)
603	++remaining_newlines;
604	++p;
605	}
606
607	va_start (args, format);
608	s = g_strdup_vprintf (format, args);
609	va_end (args);
610
611	tmp_error = g_error_new (G_MARKUP_ERROR,
612	code,
613	_("Error on line %d: %s"),
614	context->line_number - remaining_newlines,
615	s);
616
617	g_free (mem: s);
618
619	mark_error (context, error: tmp_error);
620
621	g_propagate_error (dest: error, src: tmp_error);
622	}
623
624	/*
625	* re-write the GString in-place, unescaping anything that escaped.
626	* most XML does not contain entities, or escaping.
627	*/
628	static gboolean
629	unescape_gstring_inplace (GMarkupParseContext *context,
630	GString *string,
631	gboolean *is_ascii,
632	GError **error)
633	{
634	char mask, *to;
635	const char *from;
636	gboolean normalize_attribute;
637
638	*is_ascii = FALSE;
639
640	/ are we unescaping an attribute or not ? /
641	if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ \|\|
642	context->state == STATE_INSIDE_ATTRIBUTE_VALUE_DQ)
643	normalize_attribute = TRUE;
644	else
645	normalize_attribute = FALSE;
646
647	/*
648	* Meeks' theorem: unescaping can only shrink text.
649	* for < etc. this is obvious, for more
650	* thought is required, but this is patently so.
651	*/
652	mask = `0`;
653	for (from = to = string->str; *from != `'\0'`; from++, to++)
654	{
655	to = from;
656
657	mask \|= *to;
658	if (normalize_attribute && (to == `'\t'` \|\| to == `'\n'`))
659	*to = `' '`;
660	if (*to == `'\r'`)
661	{
662	*to = normalize_attribute ? `' '` : `'\n'`;
663	if (from[`1`] == `'\n'`)
664	from++;
665	}
666	if (*from == `'&'`)
667	{
668	from++;
669	if (*from == `'#'`)
670	{
671	gint base = `10`;
672	gulong l;
673	gchar *end = NULL;
674
675	from++;
676
677	if (*from == `'x'`)
678	{
679	base = `16`;
680	from++;
681	}
682
683	errno = `0`;
684	l = strtoul (nptr: from, endptr: &end, base: base);
685
686	if (end == from \|\| errno != `0`)
687	{
688	set_unescape_error (context, error,
689	remaining_text: from, code: G_MARKUP_ERROR_PARSE,
690	_("Failed to parse “%-.*s”, which "
691	"should have been a digit "
692	"inside a character reference "
693	"(ê for example) — perhaps "
694	"the digit is too large"),
695	(int)(end - from), from);
696	return FALSE;
697	}
698	else if (*end != `';'`)
699	{
700	set_unescape_error (context, error,
701	remaining_text: from, code: G_MARKUP_ERROR_PARSE,
702	_("Character reference did not end with a "
703	"semicolon; "
704	"most likely you used an ampersand "
705	"character without intending to start "
706	"an entity — escape ampersand as &"));
707	return FALSE;
708	}
709	else
710	{
711	/ characters XML 1.1 permits /
712	if ((`0` < l && l <= `0xD7FF`) \|\|
713	(`0xE000` <= l && l <= `0xFFFD`) \|\|
714	(`0x10000` <= l && l <= `0x10FFFF`))
715	{
716	gchar buf[`8`];
717	char_str (c: l, buf);
718	strcpy (dest: to, src: buf);
719	to += strlen (s: buf) - `1`;
720	from = end;
721	if (l >= `0x80`) / not ascii /
722	mask \|= `0x80`;
723	}
724	else
725	{
726	set_unescape_error (context, error,
727	remaining_text: from, code: G_MARKUP_ERROR_PARSE,
728	_("Character reference “%-.*s” does not "
729	"encode a permitted character"),
730	(int)(end - from), from);
731	return FALSE;
732	}
733	}
734	}
735
736	else if (strncmp (s1: from, s2: "lt;", n: `3`) == `0`)
737	{
738	*to = `'<'`;
739	from += `2`;
740	}
741	else if (strncmp (s1: from, s2: "gt;", n: `3`) == `0`)
742	{
743	*to = `'>'`;
744	from += `2`;
745	}
746	else if (strncmp (s1: from, s2: "amp;", n: `4`) == `0`)
747	{
748	*to = `'&'`;
749	from += `3`;
750	}
751	else if (strncmp (s1: from, s2: "quot;", n: `5`) == `0`)
752	{
753	*to = `'"'`;
754	from += `4`;
755	}
756	else if (strncmp (s1: from, s2: "apos;", n: `5`) == `0`)
757	{
758	*to = `'\''`;
759	from += `4`;
760	}
761	else
762	{
763	if (*from == `';'`)
764	set_unescape_error (context, error,
765	remaining_text: from, code: G_MARKUP_ERROR_PARSE,
766	_("Empty entity “&;” seen; valid "
767	"entities are: & " < > '"));
768	else
769	{
770	const char *end = strchr (s: from, c: `';'`);
771	if (end)
772	set_unescape_error (context, error,
773	remaining_text: from, code: G_MARKUP_ERROR_PARSE,
774	_("Entity name “%-.*s” is not known"),
775	(int)(end - from), from);
776	else
777	set_unescape_error (context, error,
778	remaining_text: from, code: G_MARKUP_ERROR_PARSE,
779	_("Entity did not end with a semicolon; "
780	"most likely you used an ampersand "
781	"character without intending to start "
782	"an entity — escape ampersand as &"));
783	}
784	return FALSE;
785	}
786	}
787	}
788
789	g_assert (to - string->str <= (gssize) string->len);
790	if (to - string->str != (gssize) string->len)
791	g_string_truncate (string, len: to - string->str);
792
793	*is_ascii = !(mask & `0x80`);
794
795	return TRUE;
796	}
797
798	static inline gboolean
799	advance_char (GMarkupParseContext *context)
800	{
801	context->iter++;
802	context->char_number++;
803
804	if (G_UNLIKELY (context->iter == context->current_text_end))
805	return FALSE;
806
807	else if (G_UNLIKELY (*context->iter == `'\n'`))
808	{
809	context->line_number++;
810	context->char_number = `1`;
811	}
812
813	return TRUE;
814	}
815
816	static inline gboolean
817	xml_isspace (char c)
818	{
819	return c == `' '` \|\| c == `'\t'` \|\| c == `'\n'` \|\| c == `'\r'`;
820	}
821
822	static void
823	skip_spaces (GMarkupParseContext *context)
824	{
825	do
826	{
827	if (!xml_isspace (c: *context->iter))
828	return;
829	}
830	while (advance_char (context));
831	}
832
833	static void
834	advance_to_name_end (GMarkupParseContext *context)
835	{
836	do
837	{
838	if (IS_COMMON_NAME_END_CHAR (*(context->iter)))
839	return;
840	if (xml_isspace (c: *(context->iter)))
841	return;
842	}
843	while (advance_char (context));
844	}
845
846	static void
847	release_chunk (GMarkupParseContext context, GString str)
848	{
849	GSList *node;
850	if (!str)
851	return;
852	if (str->allocated_len > `256`)
853	{ / large strings are unusual and worth freeing /
854	g_string_free (string: str, TRUE);
855	return;
856	}
857	string_blank (string: str);
858	node = get_list_node (context, data: str);
859	context->spare_chunks = g_slist_concat (list1: node, list2: context->spare_chunks);
860	}
861
862	static void
863	add_to_partial (GMarkupParseContext *context,
864	const gchar *text_start,
865	const gchar *text_end)
866	{
867	if (context->partial_chunk == NULL)
868	{ / allocate a new chunk to parse into /
869
870	if (context->spare_chunks != NULL)
871	{
872	GSList *node = context->spare_chunks;
873	context->spare_chunks = g_slist_remove_link (list: context->spare_chunks, link_: node);
874	context->partial_chunk = node->data;
875	free_list_node (context, node);
876	}
877	else
878	context->partial_chunk = g_string_sized_new (MAX (`28`, text_end - text_start));
879	}
880
881	if (text_start != text_end)
882	g_string_insert_len (string: context->partial_chunk, pos: -`1`,
883	val: text_start, len: text_end - text_start);
884	}
885
886	static inline void
887	truncate_partial (GMarkupParseContext *context)
888	{
889	if (context->partial_chunk != NULL)
890	string_blank (string: context->partial_chunk);
891	}
892
893	static inline const gchar*
894	current_element (GMarkupParseContext *context)
895	{
896	return context->tag_stack->data;
897	}
898
899	static void
900	pop_subparser_stack (GMarkupParseContext *context)
901	{
902	GMarkupRecursionTracker *tracker;
903
904	g_assert (context->subparser_stack);
905
906	tracker = context->subparser_stack->data;
907
908	context->awaiting_pop = TRUE;
909	context->held_user_data = context->user_data;
910
911	context->user_data = tracker->prev_user_data;
912	context->parser = tracker->prev_parser;
913	context->subparser_element = tracker->prev_element;
914	g_slice_free (GMarkupRecursionTracker, tracker);
915
916	context->subparser_stack = g_slist_delete_link (list: context->subparser_stack,
917	link_: context->subparser_stack);
918	}
919
920	static void
921	push_partial_as_tag (GMarkupParseContext *context)
922	{
923	GString *str = context->partial_chunk;
924	/ sadly, this is exported by gmarkup_get_element_stack as-is /
925	context->tag_stack = g_slist_concat (list1: get_list_node (context, data: str->str), list2: context->tag_stack);
926	context->tag_stack_gstr = g_slist_concat (list1: get_list_node (context, data: str), list2: context->tag_stack_gstr);
927	context->partial_chunk = NULL;
928	}
929
930	static void
931	pop_tag (GMarkupParseContext *context)
932	{
933	GSList nodea, nodeb;
934
935	nodea = context->tag_stack;
936	nodeb = context->tag_stack_gstr;
937	release_chunk (context, str: nodeb->data);
938	context->tag_stack = g_slist_remove_link (list: context->tag_stack, link_: nodea);
939	context->tag_stack_gstr = g_slist_remove_link (list: context->tag_stack_gstr, link_: nodeb);
940	free_list_node (context, node: nodea);
941	free_list_node (context, node: nodeb);
942	}
943
944	static void
945	possibly_finish_subparser (GMarkupParseContext *context)
946	{
947	if (current_element (context) == context->subparser_element)
948	pop_subparser_stack (context);
949	}
950
951	static void
952	ensure_no_outstanding_subparser (GMarkupParseContext *context)
953	{
954	if (context->awaiting_pop)
955	g_critical ("During the first end_element call after invoking a "
956	"subparser you must pop the subparser stack and handle "
957	"the freeing of the subparser user_data. This can be "
958	"done by calling the end function of the subparser. "
959	"Very probably, your program just leaked memory.");
960
961	/ let valgrind watch the pointer disappear... /
962	context->held_user_data = NULL;
963	context->awaiting_pop = FALSE;
964	}
965
966	static const gchar*
967	current_attribute (GMarkupParseContext *context)
968	{
969	g_assert (context->cur_attr >= `0`);
970	return context->attr_names[context->cur_attr]->str;
971	}
972
973	static gboolean
974	add_attribute (GMarkupParseContext context, GString str)
975	{
976	/ Sanity check on the number of attributes. /
977	if (context->cur_attr >= `1000`)
978	return FALSE;
979
980	if (context->cur_attr + `2` >= context->alloc_attrs)
981	{
982	context->alloc_attrs += `5`; / silly magic number /
983	context->attr_names = g_realloc (mem: context->attr_names, n_bytes: sizeof(GString)context->alloc_attrs);
984	context->attr_values = g_realloc (mem: context->attr_values, n_bytes: sizeof(GString)context->alloc_attrs);
985	}
986	context->cur_attr++;
987	context->attr_names[context->cur_attr] = str;
988	context->attr_values[context->cur_attr] = NULL;
989	context->attr_names[context->cur_attr+`1`] = NULL;
990	context->attr_values[context->cur_attr+`1`] = NULL;
991
992	return TRUE;
993	}
994
995	static void
996	clear_attributes (GMarkupParseContext *context)
997	{
998	/ Go ahead and free the attributes. /
999	for (; context->cur_attr >= `0`; context->cur_attr--)
1000	{
1001	int pos = context->cur_attr;
1002	release_chunk (context, str: context->attr_names[pos]);
1003	release_chunk (context, str: context->attr_values[pos]);
1004	context->attr_names[pos] = context->attr_values[pos] = NULL;
1005	}
1006	g_assert (context->cur_attr == -`1`);
1007	g_assert (context->attr_names == NULL \|\|
1008	context->attr_names[`0`] == NULL);
1009	g_assert (context->attr_values == NULL \|\|
1010	context->attr_values[`0`] == NULL);
1011	}
1012
1013	/ This has to be a separate function to ensure the alloca's*
1014	* are unwound on exit - otherwise we grow & blow the stack
1015	* with large documents
1016	*/
1017	static inline void
1018	emit_start_element (GMarkupParseContext *context,
1019	GError **error)
1020	{
1021	int i, j = `0`;
1022	const gchar *start_name;
1023	const gchar **attr_names;
1024	const gchar **attr_values;
1025	GError *tmp_error;
1026
1027	/ In case we want to ignore qualified tags and we see that we have*
1028	* one here, we push a subparser. This will ignore all tags inside of
1029	* the qualified tag.
1030	*
1031	* We deal with the end of the subparser from emit_end_element.
1032	*/
1033	if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (s: current_element (context), c: `':'`))
1034	{
1035	static const GMarkupParser ignore_parser;
1036	g_markup_parse_context_push (context, parser: &ignore_parser, NULL);
1037	clear_attributes (context);
1038	return;
1039	}
1040
1041	attr_names = g_newa (const gchar *, context->cur_attr + `2`);
1042	attr_values = g_newa (const gchar *, context->cur_attr + `2`);
1043	for (i = `0`; i < context->cur_attr + `1`; i++)
1044	{
1045	/ Possibly omit qualified attribute names from the list /
1046	if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (s: context->attr_names[i]->str, c: `':'`))
1047	continue;
1048
1049	attr_names[j] = context->attr_names[i]->str;
1050	attr_values[j] = context->attr_values[i]->str;
1051	j++;
1052	}
1053	attr_names[j] = NULL;
1054	attr_values[j] = NULL;
1055
1056	/ Call user callback for element start /
1057	tmp_error = NULL;
1058	start_name = current_element (context);
1059
1060	if (!name_validate (context, name: start_name, error))
1061	return;
1062
1063	if (context->parser->start_element)
1064	(* context->parser->start_element) (context,
1065	start_name,
1066	(const gchar **)attr_names,
1067	(const gchar **)attr_values,
1068	context->user_data,
1069	&tmp_error);
1070	clear_attributes (context);
1071
1072	if (tmp_error != NULL)
1073	propagate_error (context, dest: error, src: tmp_error);
1074	}
1075
1076	static void
1077	emit_end_element (GMarkupParseContext *context,
1078	GError **error)
1079	{
1080	/ We need to pop the tag stack and call the end_element*
1081	* function, since this is the close tag
1082	*/
1083	GError *tmp_error = NULL;
1084
1085	g_assert (context->tag_stack != NULL);
1086
1087	possibly_finish_subparser (context);
1088
1089	/ We might have just returned from our ignore subparser /
1090	if ((context->flags & G_MARKUP_IGNORE_QUALIFIED) && strchr (s: current_element (context), c: `':'`))
1091	{
1092	g_markup_parse_context_pop (context);
1093	pop_tag (context);
1094	return;
1095	}
1096
1097	tmp_error = NULL;
1098	if (context->parser->end_element)
1099	(* context->parser->end_element) (context,
1100	current_element (context),
1101	context->user_data,
1102	&tmp_error);
1103
1104	ensure_no_outstanding_subparser (context);
1105
1106	if (tmp_error)
1107	{
1108	mark_error (context, error: tmp_error);
1109	g_propagate_error (dest: error, src: tmp_error);
1110	}
1111
1112	pop_tag (context);
1113	}
1114
1115	/**
1116	* g_markup_parse_context_parse:
1117	* @context: a #GMarkupParseContext
1118	* @text: chunk of text to parse
1119	* @text_len: length of @text in bytes
1120	* @error: return location for a #GError
1121	*
1122	* Feed some data to the #GMarkupParseContext.
1123	*
1124	* The data need not be valid UTF-8; an error will be signaled if
1125	* it's invalid. The data need not be an entire document; you can
1126	* feed a document into the parser incrementally, via multiple calls
1127	* to this function. Typically, as you receive data from a network
1128	* connection or file, you feed each received chunk of data into this
1129	* function, aborting the process if an error occurs. Once an error
1130	* is reported, no further data may be fed to the #GMarkupParseContext;
1131	* all errors are fatal.
1132	*
1133	* Returns: %FALSE if an error occurred, %TRUE on success
1134	*/
1135	gboolean
1136	g_markup_parse_context_parse (GMarkupParseContext *context,
1137	const gchar *text,
1138	gssize text_len,
1139	GError **error)
1140	{
1141	g_return_val_if_fail (context != NULL, FALSE);
1142	g_return_val_if_fail (text != NULL, FALSE);
1143	g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1144	g_return_val_if_fail (!context->parsing, FALSE);
1145
1146	if (text_len < `0`)
1147	text_len = strlen (s: text);
1148
1149	if (text_len == `0`)
1150	return TRUE;
1151
1152	context->parsing = TRUE;
1153
1154
1155	context->current_text = text;
1156	context->current_text_len = text_len;
1157	context->current_text_end = context->current_text + text_len;
1158	context->iter = context->current_text;
1159	context->start = context->iter;
1160
1161	while (context->iter != context->current_text_end)
1162	{
1163	switch (context->state)
1164	{
1165	case STATE_START:
1166	/ Possible next state: AFTER_OPEN_ANGLE /
1167
1168	g_assert (context->tag_stack == NULL);
1169
1170	/ whitespace is ignored outside of any elements /
1171	skip_spaces (context);
1172
1173	if (context->iter != context->current_text_end)
1174	{
1175	if (*context->iter == `'<'`)
1176	{
1177	/ Move after the open angle /
1178	advance_char (context);
1179
1180	context->state = STATE_AFTER_OPEN_ANGLE;
1181
1182	/ this could start a passthrough /
1183	context->start = context->iter;
1184
1185	/ document is now non-empty /
1186	context->document_empty = FALSE;
1187	}
1188	else
1189	{
1190	set_error_literal (context,
1191	error,
1192	code: G_MARKUP_ERROR_PARSE,
1193	_("Document must begin with an element (e.g. <book>)"));
1194	}
1195	}
1196	break;
1197
1198	case STATE_AFTER_OPEN_ANGLE:
1199	/ Possible next states: INSIDE_OPEN_TAG_NAME,*
1200	* AFTER_CLOSE_TAG_SLASH, INSIDE_PASSTHROUGH
1201	*/
1202	if (*context->iter == `'?'` \|\|
1203	*context->iter == `'!'`)
1204	{
1205	/ include < in the passthrough /
1206	const gchar *openangle = "<";
1207	add_to_partial (context, text_start: openangle, text_end: openangle + `1`);
1208	context->start = context->iter;
1209	context->balance = `1`;
1210	context->state = STATE_INSIDE_PASSTHROUGH;
1211	}
1212	else if (*context->iter == `'/'`)
1213	{
1214	/ move after it /
1215	advance_char (context);
1216
1217	context->state = STATE_AFTER_CLOSE_TAG_SLASH;
1218	}
1219	else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1220	{
1221	context->state = STATE_INSIDE_OPEN_TAG_NAME;
1222
1223	/ start of tag name /
1224	context->start = context->iter;
1225	}
1226	else
1227	{
1228	gchar buf[`8`];
1229
1230	set_error (context,
1231	error,
1232	code: G_MARKUP_ERROR_PARSE,
1233	_("“%s” is not a valid character following "
1234	"a “<” character; it may not begin an "
1235	"element name"),
1236	utf8_str (utf8: context->iter,
1237	max_len: context->current_text_end - context->iter, buf));
1238	}
1239	break;
1240
1241	/ The AFTER_CLOSE_ANGLE state is actually sort of*
1242	* broken, because it doesn't correspond to a range
1243	* of characters in the input stream as the others do,
1244	* and thus makes things harder to conceptualize
1245	*/
1246	case STATE_AFTER_CLOSE_ANGLE:
1247	/ Possible next states: INSIDE_TEXT, STATE_START /
1248	if (context->tag_stack == NULL)
1249	{
1250	context->start = NULL;
1251	context->state = STATE_START;
1252	}
1253	else
1254	{
1255	context->start = context->iter;
1256	context->state = STATE_INSIDE_TEXT;
1257	}
1258	break;
1259
1260	case STATE_AFTER_ELISION_SLASH:
1261	/ Possible next state: AFTER_CLOSE_ANGLE /
1262	if (*context->iter == `'>'`)
1263	{
1264	/ move after the close angle /
1265	advance_char (context);
1266	context->state = STATE_AFTER_CLOSE_ANGLE;
1267	emit_end_element (context, error);
1268	}
1269	else
1270	{
1271	gchar buf[`8`];
1272
1273	set_error (context,
1274	error,
1275	code: G_MARKUP_ERROR_PARSE,
1276	_("Odd character “%s”, expected a “>” character "
1277	"to end the empty-element tag “%s”"),
1278	utf8_str (utf8: context->iter,
1279	max_len: context->current_text_end - context->iter, buf),
1280	current_element (context));
1281	}
1282	break;
1283
1284	case STATE_INSIDE_OPEN_TAG_NAME:
1285	/ Possible next states: BETWEEN_ATTRIBUTES /
1286
1287	/ if there's a partial chunk then it's the first part of the*
1288	* tag name. If there's a context->start then it's the start
1289	* of the tag name in current_text, the partial chunk goes
1290	* before that start though.
1291	*/
1292	advance_to_name_end (context);
1293
1294	if (context->iter == context->current_text_end)
1295	{
1296	/ The name hasn't necessarily ended. Merge with*
1297	* partial chunk, leave state unchanged.
1298	*/
1299	add_to_partial (context, text_start: context->start, text_end: context->iter);
1300	}
1301	else
1302	{
1303	/ The name has ended. Combine it with the partial chunk*
1304	* if any; push it on the stack; enter next state.
1305	*/
1306	add_to_partial (context, text_start: context->start, text_end: context->iter);
1307	push_partial_as_tag (context);
1308
1309	context->state = STATE_BETWEEN_ATTRIBUTES;
1310	context->start = NULL;
1311	}
1312	break;
1313
1314	case STATE_INSIDE_ATTRIBUTE_NAME:
1315	/ Possible next states: AFTER_ATTRIBUTE_NAME /
1316
1317	advance_to_name_end (context);
1318	add_to_partial (context, text_start: context->start, text_end: context->iter);
1319
1320	/ read the full name, if we enter the equals sign state*
1321	* then add the attribute to the list (without the value),
1322	* otherwise store a partial chunk to be prepended later.
1323	*/
1324	if (context->iter != context->current_text_end)
1325	context->state = STATE_AFTER_ATTRIBUTE_NAME;
1326	break;
1327
1328	case STATE_AFTER_ATTRIBUTE_NAME:
1329	/ Possible next states: AFTER_ATTRIBUTE_EQUALS_SIGN /
1330
1331	skip_spaces (context);
1332
1333	if (context->iter != context->current_text_end)
1334	{
1335	/ The name has ended. Combine it with the partial chunk*
1336	* if any; push it on the stack; enter next state.
1337	*/
1338	if (!name_validate (context, name: context->partial_chunk->str, error))
1339	break;
1340
1341	if (!add_attribute (context, str: context->partial_chunk))
1342	{
1343	set_error (context,
1344	error,
1345	code: G_MARKUP_ERROR_PARSE,
1346	_("Too many attributes in element “%s”"),
1347	current_element (context));
1348	break;
1349	}
1350
1351	context->partial_chunk = NULL;
1352	context->start = NULL;
1353
1354	if (*context->iter == `'='`)
1355	{
1356	advance_char (context);
1357	context->state = STATE_AFTER_ATTRIBUTE_EQUALS_SIGN;
1358	}
1359	else
1360	{
1361	gchar buf[`8`];
1362
1363	set_error (context,
1364	error,
1365	code: G_MARKUP_ERROR_PARSE,
1366	_("Odd character “%s”, expected a “=” after "
1367	"attribute name “%s” of element “%s”"),
1368	utf8_str (utf8: context->iter,
1369	max_len: context->current_text_end - context->iter, buf),
1370	current_attribute (context),
1371	current_element (context));
1372
1373	}
1374	}
1375	break;
1376
1377	case STATE_BETWEEN_ATTRIBUTES:
1378	/ Possible next states: AFTER_CLOSE_ANGLE,*
1379	* AFTER_ELISION_SLASH, INSIDE_ATTRIBUTE_NAME
1380	*/
1381	skip_spaces (context);
1382
1383	if (context->iter != context->current_text_end)
1384	{
1385	if (*context->iter == `'/'`)
1386	{
1387	advance_char (context);
1388	context->state = STATE_AFTER_ELISION_SLASH;
1389	}
1390	else if (*context->iter == `'>'`)
1391	{
1392	advance_char (context);
1393	context->state = STATE_AFTER_CLOSE_ANGLE;
1394	}
1395	else if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1396	{
1397	context->state = STATE_INSIDE_ATTRIBUTE_NAME;
1398	/ start of attribute name /
1399	context->start = context->iter;
1400	}
1401	else
1402	{
1403	gchar buf[`8`];
1404
1405	set_error (context,
1406	error,
1407	code: G_MARKUP_ERROR_PARSE,
1408	_("Odd character “%s”, expected a “>” or “/” "
1409	"character to end the start tag of "
1410	"element “%s”, or optionally an attribute; "
1411	"perhaps you used an invalid character in "
1412	"an attribute name"),
1413	utf8_str (utf8: context->iter,
1414	max_len: context->current_text_end - context->iter, buf),
1415	current_element (context));
1416	}
1417
1418	/ If we're done with attributes, invoke*
1419	* the start_element callback
1420	*/
1421	if (context->state == STATE_AFTER_ELISION_SLASH \|\|
1422	context->state == STATE_AFTER_CLOSE_ANGLE)
1423	emit_start_element (context, error);
1424	}
1425	break;
1426
1427	case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1428	/ Possible next state: INSIDE_ATTRIBUTE_VALUE_[SQ/DQ] /
1429
1430	skip_spaces (context);
1431
1432	if (context->iter != context->current_text_end)
1433	{
1434	if (*context->iter == `'"'`)
1435	{
1436	advance_char (context);
1437	context->state = STATE_INSIDE_ATTRIBUTE_VALUE_DQ;
1438	context->start = context->iter;
1439	}
1440	else if (*context->iter == `'\''`)
1441	{
1442	advance_char (context);
1443	context->state = STATE_INSIDE_ATTRIBUTE_VALUE_SQ;
1444	context->start = context->iter;
1445	}
1446	else
1447	{
1448	gchar buf[`8`];
1449
1450	set_error (context,
1451	error,
1452	code: G_MARKUP_ERROR_PARSE,
1453	_("Odd character “%s”, expected an open quote mark "
1454	"after the equals sign when giving value for "
1455	"attribute “%s” of element “%s”"),
1456	utf8_str (utf8: context->iter,
1457	max_len: context->current_text_end - context->iter, buf),
1458	current_attribute (context),
1459	current_element (context));
1460	}
1461	}
1462	break;
1463
1464	case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1465	case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1466	/ Possible next states: BETWEEN_ATTRIBUTES /
1467	{
1468	gchar delim;
1469
1470	if (context->state == STATE_INSIDE_ATTRIBUTE_VALUE_SQ)
1471	{
1472	delim = `'\''`;
1473	}
1474	else
1475	{
1476	delim = `'"'`;
1477	}
1478
1479	do
1480	{
1481	if (*context->iter == delim)
1482	break;
1483	}
1484	while (advance_char (context));
1485	}
1486	if (context->iter == context->current_text_end)
1487	{
1488	/ The value hasn't necessarily ended. Merge with*
1489	* partial chunk, leave state unchanged.
1490	*/
1491	add_to_partial (context, text_start: context->start, text_end: context->iter);
1492	}
1493	else
1494	{
1495	gboolean is_ascii;
1496	/ The value has ended at the quote mark. Combine it*
1497	* with the partial chunk if any; set it for the current
1498	* attribute.
1499	*/
1500	add_to_partial (context, text_start: context->start, text_end: context->iter);
1501
1502	g_assert (context->cur_attr >= `0`);
1503
1504	if (unescape_gstring_inplace (context, string: context->partial_chunk, is_ascii: &is_ascii, error) &&
1505	(is_ascii \|\| text_validate (context, p: context->partial_chunk->str,
1506	len: context->partial_chunk->len, error)))
1507	{
1508	/ success, advance past quote and set state. /
1509	context->attr_values[context->cur_attr] = context->partial_chunk;
1510	context->partial_chunk = NULL;
1511	advance_char (context);
1512	context->state = STATE_BETWEEN_ATTRIBUTES;
1513	context->start = NULL;
1514	}
1515
1516	truncate_partial (context);
1517	}
1518	break;
1519
1520	case STATE_INSIDE_TEXT:
1521	/ Possible next states: AFTER_OPEN_ANGLE /
1522	do
1523	{
1524	if (*context->iter == `'<'`)
1525	break;
1526	}
1527	while (advance_char (context));
1528
1529	/ The text hasn't necessarily ended. Merge with*
1530	* partial chunk, leave state unchanged.
1531	*/
1532
1533	add_to_partial (context, text_start: context->start, text_end: context->iter);
1534
1535	if (context->iter != context->current_text_end)
1536	{
1537	gboolean is_ascii;
1538
1539	/ The text has ended at the open angle. Call the text*
1540	* callback.
1541	*/
1542	if (unescape_gstring_inplace (context, string: context->partial_chunk, is_ascii: &is_ascii, error) &&
1543	(is_ascii \|\| text_validate (context, p: context->partial_chunk->str,
1544	len: context->partial_chunk->len, error)))
1545	{
1546	GError *tmp_error = NULL;
1547
1548	if (context->parser->text)
1549	(*context->parser->text) (context,
1550	context->partial_chunk->str,
1551	context->partial_chunk->len,
1552	context->user_data,
1553	&tmp_error);
1554
1555	if (tmp_error == NULL)
1556	{
1557	/ advance past open angle and set state. /
1558	advance_char (context);
1559	context->state = STATE_AFTER_OPEN_ANGLE;
1560	/ could begin a passthrough /
1561	context->start = context->iter;
1562	}
1563	else
1564	propagate_error (context, dest: error, src: tmp_error);
1565	}
1566
1567	truncate_partial (context);
1568	}
1569	break;
1570
1571	case STATE_AFTER_CLOSE_TAG_SLASH:
1572	/ Possible next state: INSIDE_CLOSE_TAG_NAME /
1573	if (!IS_COMMON_NAME_END_CHAR (*(context->iter)))
1574	{
1575	context->state = STATE_INSIDE_CLOSE_TAG_NAME;
1576
1577	/ start of tag name /
1578	context->start = context->iter;
1579	}
1580	else
1581	{
1582	gchar buf[`8`];
1583
1584	set_error (context,
1585	error,
1586	code: G_MARKUP_ERROR_PARSE,
1587	_("“%s” is not a valid character following "
1588	"the characters “</”; “%s” may not begin an "
1589	"element name"),
1590	utf8_str (utf8: context->iter,
1591	max_len: context->current_text_end - context->iter, buf),
1592	utf8_str (utf8: context->iter,
1593	max_len: context->current_text_end - context->iter, buf));
1594	}
1595	break;
1596
1597	case STATE_INSIDE_CLOSE_TAG_NAME:
1598	/ Possible next state: AFTER_CLOSE_TAG_NAME /
1599	advance_to_name_end (context);
1600	add_to_partial (context, text_start: context->start, text_end: context->iter);
1601
1602	if (context->iter != context->current_text_end)
1603	context->state = STATE_AFTER_CLOSE_TAG_NAME;
1604	break;
1605
1606	case STATE_AFTER_CLOSE_TAG_NAME:
1607	/ Possible next state: AFTER_CLOSE_TAG_SLASH /
1608
1609	skip_spaces (context);
1610
1611	if (context->iter != context->current_text_end)
1612	{
1613	GString *close_name;
1614
1615	close_name = context->partial_chunk;
1616	context->partial_chunk = NULL;
1617
1618	if (*context->iter != `'>'`)
1619	{
1620	gchar buf[`8`];
1621
1622	set_error (context,
1623	error,
1624	code: G_MARKUP_ERROR_PARSE,
1625	_("“%s” is not a valid character following "
1626	"the close element name “%s”; the allowed "
1627	"character is “>”"),
1628	utf8_str (utf8: context->iter,
1629	max_len: context->current_text_end - context->iter, buf),
1630	close_name->str);
1631	}
1632	else if (context->tag_stack == NULL)
1633	{
1634	set_error (context,
1635	error,
1636	code: G_MARKUP_ERROR_PARSE,
1637	_("Element “%s” was closed, no element "
1638	"is currently open"),
1639	close_name->str);
1640	}
1641	else if (strcmp (s1: close_name->str, s2: current_element (context)) != `0`)
1642	{
1643	set_error (context,
1644	error,
1645	code: G_MARKUP_ERROR_PARSE,
1646	_("Element “%s” was closed, but the currently "
1647	"open element is “%s”"),
1648	close_name->str,
1649	current_element (context));
1650	}
1651	else
1652	{
1653	advance_char (context);
1654	context->state = STATE_AFTER_CLOSE_ANGLE;
1655	context->start = NULL;
1656
1657	emit_end_element (context, error);
1658	}
1659	context->partial_chunk = close_name;
1660	truncate_partial (context);
1661	}
1662	break;
1663
1664	case STATE_INSIDE_PASSTHROUGH:
1665	/ Possible next state: AFTER_CLOSE_ANGLE /
1666	do
1667	{
1668	if (*context->iter == `'<'`)
1669	context->balance++;
1670	if (*context->iter == `'>'`)
1671	{
1672	gchar *str;
1673	gsize len;
1674
1675	context->balance--;
1676	add_to_partial (context, text_start: context->start, text_end: context->iter);
1677	context->start = context->iter;
1678
1679	str = context->partial_chunk->str;
1680	len = context->partial_chunk->len;
1681
1682	if (str[`1`] == `'?'` && str[len - `1`] == `'?'`)
1683	break;
1684	if (strncmp (s1: str, s2: "<!--", n: `4`) == `0` &&
1685	strcmp (s1: str + len - `2`, s2: "--") == `0`)
1686	break;
1687	if (strncmp (s1: str, s2: "<![CDATA[", n: `9`) == `0` &&
1688	strcmp (s1: str + len - `2`, s2: "]]") == `0`)
1689	break;
1690	if (strncmp (s1: str, s2: "<!DOCTYPE", n: `9`) == `0` &&
1691	context->balance == `0`)
1692	break;
1693	}
1694	}
1695	while (advance_char (context));
1696
1697	if (context->iter == context->current_text_end)
1698	{
1699	/ The passthrough hasn't necessarily ended. Merge with*
1700	* partial chunk, leave state unchanged.
1701	*/
1702	add_to_partial (context, text_start: context->start, text_end: context->iter);
1703	}
1704	else
1705	{
1706	/ The passthrough has ended at the close angle. Combine*
1707	* it with the partial chunk if any. Call the passthrough
1708	* callback. Note that the open/close angles are
1709	* included in the text of the passthrough.
1710	*/
1711	GError *tmp_error = NULL;
1712
1713	advance_char (context); / advance past close angle /
1714	add_to_partial (context, text_start: context->start, text_end: context->iter);
1715
1716	if (context->flags & G_MARKUP_TREAT_CDATA_AS_TEXT &&
1717	strncmp (s1: context->partial_chunk->str, s2: "<![CDATA[", n: `9`) == `0`)
1718	{
1719	if (context->parser->text &&
1720	text_validate (context,
1721	p: context->partial_chunk->str + `9`,
1722	len: context->partial_chunk->len - `12`,
1723	error))
1724	(*context->parser->text) (context,
1725	context->partial_chunk->str + `9`,
1726	context->partial_chunk->len - `12`,
1727	context->user_data,
1728	&tmp_error);
1729	}
1730	else if (context->parser->passthrough &&
1731	text_validate (context,
1732	p: context->partial_chunk->str,
1733	len: context->partial_chunk->len,
1734	error))
1735	(*context->parser->passthrough) (context,
1736	context->partial_chunk->str,
1737	context->partial_chunk->len,
1738	context->user_data,
1739	&tmp_error);
1740
1741	truncate_partial (context);
1742
1743	if (tmp_error == NULL)
1744	{
1745	context->state = STATE_AFTER_CLOSE_ANGLE;
1746	context->start = context->iter; / could begin text /
1747	}
1748	else
1749	propagate_error (context, dest: error, src: tmp_error);
1750	}
1751	break;
1752
1753	case STATE_ERROR:
1754	goto finished;
1755	break;
1756
1757	default:
1758	g_assert_not_reached ();
1759	break;
1760	}
1761	}
1762
1763	finished:
1764	context->parsing = FALSE;
1765
1766	return context->state != STATE_ERROR;
1767	}
1768
1769	/**
1770	* g_markup_parse_context_end_parse:
1771	* @context: a #GMarkupParseContext
1772	* @error: return location for a #GError
1773	*
1774	* Signals to the #GMarkupParseContext that all data has been
1775	* fed into the parse context with g_markup_parse_context_parse().
1776	*
1777	* This function reports an error if the document isn't complete,
1778	* for example if elements are still open.
1779	*
1780	* Returns: %TRUE on success, %FALSE if an error was set
1781	*/
1782	gboolean
1783	g_markup_parse_context_end_parse (GMarkupParseContext *context,
1784	GError **error)
1785	{
1786	g_return_val_if_fail (context != NULL, FALSE);
1787	g_return_val_if_fail (!context->parsing, FALSE);
1788	g_return_val_if_fail (context->state != STATE_ERROR, FALSE);
1789
1790	if (context->partial_chunk != NULL)
1791	{
1792	g_string_free (string: context->partial_chunk, TRUE);
1793	context->partial_chunk = NULL;
1794	}
1795
1796	if (context->document_empty)
1797	{
1798	set_error_literal (context, error, code: G_MARKUP_ERROR_EMPTY,
1799	_("Document was empty or contained only whitespace"));
1800	return FALSE;
1801	}
1802
1803	context->parsing = TRUE;
1804
1805	switch (context->state)
1806	{
1807	case STATE_START:
1808	/ Nothing to do /
1809	break;
1810
1811	case STATE_AFTER_OPEN_ANGLE:
1812	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1813	_("Document ended unexpectedly just after an open angle bracket “<”"));
1814	break;
1815
1816	case STATE_AFTER_CLOSE_ANGLE:
1817	if (context->tag_stack != NULL)
1818	{
1819	/ Error message the same as for INSIDE_TEXT /
1820	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
1821	_("Document ended unexpectedly with elements still open — "
1822	"“%s” was the last element opened"),
1823	current_element (context));
1824	}
1825	break;
1826
1827	case STATE_AFTER_ELISION_SLASH:
1828	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
1829	_("Document ended unexpectedly, expected to see a close angle "
1830	"bracket ending the tag <%s/>"), current_element (context));
1831	break;
1832
1833	case STATE_INSIDE_OPEN_TAG_NAME:
1834	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1835	_("Document ended unexpectedly inside an element name"));
1836	break;
1837
1838	case STATE_INSIDE_ATTRIBUTE_NAME:
1839	case STATE_AFTER_ATTRIBUTE_NAME:
1840	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1841	_("Document ended unexpectedly inside an attribute name"));
1842	break;
1843
1844	case STATE_BETWEEN_ATTRIBUTES:
1845	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1846	_("Document ended unexpectedly inside an element-opening "
1847	"tag."));
1848	break;
1849
1850	case STATE_AFTER_ATTRIBUTE_EQUALS_SIGN:
1851	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1852	_("Document ended unexpectedly after the equals sign "
1853	"following an attribute name; no attribute value"));
1854	break;
1855
1856	case STATE_INSIDE_ATTRIBUTE_VALUE_SQ:
1857	case STATE_INSIDE_ATTRIBUTE_VALUE_DQ:
1858	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1859	_("Document ended unexpectedly while inside an attribute "
1860	"value"));
1861	break;
1862
1863	case STATE_INSIDE_TEXT:
1864	g_assert (context->tag_stack != NULL);
1865	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
1866	_("Document ended unexpectedly with elements still open — "
1867	"“%s” was the last element opened"),
1868	current_element (context));
1869	break;
1870
1871	case STATE_AFTER_CLOSE_TAG_SLASH:
1872	case STATE_INSIDE_CLOSE_TAG_NAME:
1873	case STATE_AFTER_CLOSE_TAG_NAME:
1874	if (context->tag_stack != NULL)
1875	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
1876	_("Document ended unexpectedly inside the close tag for "
1877	"element “%s”"), current_element (context));
1878	else
1879	set_error (context, error, code: G_MARKUP_ERROR_PARSE,
1880	_("Document ended unexpectedly inside the close tag for an "
1881	"unopened element"));
1882	break;
1883
1884	case STATE_INSIDE_PASSTHROUGH:
1885	set_error_literal (context, error, code: G_MARKUP_ERROR_PARSE,
1886	_("Document ended unexpectedly inside a comment or "
1887	"processing instruction"));
1888	break;
1889
1890	case STATE_ERROR:
1891	default:
1892	g_assert_not_reached ();
1893	break;
1894	}
1895
1896	context->parsing = FALSE;
1897
1898	return context->state != STATE_ERROR;
1899	}
1900
1901	/**
1902	* g_markup_parse_context_get_element:
1903	* @context: a #GMarkupParseContext
1904	*
1905	* Retrieves the name of the currently open element.
1906	*
1907	* If called from the start_element or end_element handlers this will
1908	* give the element_name as passed to those functions. For the parent
1909	* elements, see g_markup_parse_context_get_element_stack().
1910	*
1911	* Returns: the name of the currently open element, or %NULL
1912	*
1913	* Since: 2.2
1914	*/
1915	const gchar *
1916	g_markup_parse_context_get_element (GMarkupParseContext *context)
1917	{
1918	g_return_val_if_fail (context != NULL, NULL);
1919
1920	if (context->tag_stack == NULL)
1921	return NULL;
1922	else
1923	return current_element (context);
1924	}
1925
1926	/**
1927	* g_markup_parse_context_get_element_stack:
1928	* @context: a #GMarkupParseContext
1929	*
1930	* Retrieves the element stack from the internal state of the parser.
1931	*
1932	* The returned #GSList is a list of strings where the first item is
1933	* the currently open tag (as would be returned by
1934	* g_markup_parse_context_get_element()) and the next item is its
1935	* immediate parent.
1936	*
1937	* This function is intended to be used in the start_element and
1938	* end_element handlers where g_markup_parse_context_get_element()
1939	* would merely return the name of the element that is being
1940	* processed.
1941	*
1942	* Returns: the element stack, which must not be modified
1943	*
1944	* Since: 2.16
1945	*/
1946	const GSList *
1947	g_markup_parse_context_get_element_stack (GMarkupParseContext *context)
1948	{
1949	g_return_val_if_fail (context != NULL, NULL);
1950	return context->tag_stack;
1951	}
1952
1953	/**
1954	* g_markup_parse_context_get_position:
1955	* @context: a #GMarkupParseContext
1956	* @line_number: (out) (optional): return location for a line number, or %NULL
1957	* @char_number: (out) (optional): return location for a char-on-line number, or %NULL
1958	*
1959	* Retrieves the current line number and the number of the character on
1960	* that line. Intended for use in error messages; there are no strict
1961	* semantics for what constitutes the "current" line number other than
1962	* "the best number we could come up with for error messages."
1963	*/
1964	void
1965	g_markup_parse_context_get_position (GMarkupParseContext *context,
1966	gint *line_number,
1967	gint *char_number)
1968	{
1969	g_return_if_fail (context != NULL);
1970
1971	if (line_number)
1972	*line_number = context->line_number;
1973
1974	if (char_number)
1975	*char_number = context->char_number;
1976	}
1977
1978	/**
1979	* g_markup_parse_context_get_user_data:
1980	* @context: a #GMarkupParseContext
1981	*
1982	* Returns the user_data associated with @context.
1983	*
1984	* This will either be the user_data that was provided to
1985	* g_markup_parse_context_new() or to the most recent call
1986	* of g_markup_parse_context_push().
1987	*
1988	* Returns: the provided user_data. The returned data belongs to
1989	* the markup context and will be freed when
1990	* g_markup_parse_context_free() is called.
1991	*
1992	* Since: 2.18
1993	*/
1994	gpointer
1995	g_markup_parse_context_get_user_data (GMarkupParseContext *context)
1996	{
1997	return context->user_data;
1998	}
1999
2000	/**
2001	* g_markup_parse_context_push:
2002	* @context: a #GMarkupParseContext
2003	* @parser: a #GMarkupParser
2004	* @user_data: user data to pass to #GMarkupParser functions
2005	*
2006	* Temporarily redirects markup data to a sub-parser.
2007	*
2008	* This function may only be called from the start_element handler of
2009	* a #GMarkupParser. It must be matched with a corresponding call to
2010	* g_markup_parse_context_pop() in the matching end_element handler
2011	* (except in the case that the parser aborts due to an error).
2012	*
2013	* All tags, text and other data between the matching tags is
2014	* redirected to the subparser given by @parser. @user_data is used
2015	* as the user_data for that parser. @user_data is also passed to the
2016	* error callback in the event that an error occurs. This includes
2017	* errors that occur in subparsers of the subparser.
2018	*
2019	* The end tag matching the start tag for which this call was made is
2020	* handled by the previous parser (which is given its own user_data)
2021	* which is why g_markup_parse_context_pop() is provided to allow "one
2022	* last access" to the @user_data provided to this function. In the
2023	* case of error, the @user_data provided here is passed directly to
2024	* the error callback of the subparser and g_markup_parse_context_pop()
2025	* should not be called. In either case, if @user_data was allocated
2026	* then it ought to be freed from both of these locations.
2027	*
2028	* This function is not intended to be directly called by users
2029	* interested in invoking subparsers. Instead, it is intended to be
2030	* used by the subparsers themselves to implement a higher-level
2031	* interface.
2032	*
2033	* As an example, see the following implementation of a simple
2034	* parser that counts the number of tags encountered.
2035	*
2036	* \|[<!-- language="C" -->
2037	* typedef struct
2038	* {
2039	* gint tag_count;
2040	* } CounterData;
2041	*
2042	* static void
2043	* counter_start_element (GMarkupParseContext *context,
2044	* const gchar *element_name,
2045	* const gchar **attribute_names,
2046	* const gchar **attribute_values,
2047	* gpointer user_data,
2048	* GError **error)
2049	* {
2050	* CounterData *data = user_data;
2051	*
2052	* data->tag_count++;
2053	* }
2054	*
2055	* static void
2056	* counter_error (GMarkupParseContext *context,
2057	* GError *error,
2058	* gpointer user_data)
2059	* {
2060	* CounterData *data = user_data;
2061	*
2062	* g_slice_free (CounterData, data);
2063	* }
2064	*
2065	* static GMarkupParser counter_subparser =
2066	* {
2067	* counter_start_element,
2068	* NULL,
2069	* NULL,
2070	* NULL,
2071	* counter_error
2072	* };
2073	* ]\|
2074	*
2075	* In order to allow this parser to be easily used as a subparser, the
2076	* following interface is provided:
2077	*
2078	* \|[<!-- language="C" -->
2079	* void
2080	* start_counting (GMarkupParseContext *context)
2081	* {
2082	* CounterData *data = g_slice_new (CounterData);
2083	*
2084	* data->tag_count = 0;
2085	* g_markup_parse_context_push (context, &counter_subparser, data);
2086	* }
2087	*
2088	* gint
2089	* end_counting (GMarkupParseContext *context)
2090	* {
2091	* CounterData *data = g_markup_parse_context_pop (context);
2092	* int result;
2093	*
2094	* result = data->tag_count;
2095	* g_slice_free (CounterData, data);
2096	*
2097	* return result;
2098	* }
2099	* ]\|
2100	*
2101	* The subparser would then be used as follows:
2102	*
2103	* \|[<!-- language="C" -->
2104	* static void start_element (context, element_name, ...)
2105	* {
2106	* if (strcmp (element_name, "count-these") == 0)
2107	* start_counting (context);
2108	*
2109	* // else, handle other tags...
2110	* }
2111	*
2112	* static void end_element (context, element_name, ...)
2113	* {
2114	* if (strcmp (element_name, "count-these") == 0)
2115	* g_print ("Counted %d tags\n", end_counting (context));
2116	*
2117	* // else, handle other tags...
2118	* }
2119	* ]\|
2120	*
2121	* Since: 2.18
2122	**/
2123	void
2124	g_markup_parse_context_push (GMarkupParseContext *context,
2125	const GMarkupParser *parser,
2126	gpointer user_data)
2127	{
2128	GMarkupRecursionTracker *tracker;
2129
2130	tracker = g_slice_new (GMarkupRecursionTracker);
2131	tracker->prev_element = context->subparser_element;
2132	tracker->prev_parser = context->parser;
2133	tracker->prev_user_data = context->user_data;
2134
2135	context->subparser_element = current_element (context);
2136	context->parser = parser;
2137	context->user_data = user_data;
2138
2139	context->subparser_stack = g_slist_prepend (list: context->subparser_stack,
2140	data: tracker);
2141	}
2142
2143	/**
2144	* g_markup_parse_context_pop:
2145	* @context: a #GMarkupParseContext
2146	*
2147	* Completes the process of a temporary sub-parser redirection.
2148	*
2149	* This function exists to collect the user_data allocated by a
2150	* matching call to g_markup_parse_context_push(). It must be called
2151	* in the end_element handler corresponding to the start_element
2152	* handler during which g_markup_parse_context_push() was called.
2153	* You must not call this function from the error callback -- the
2154	* @user_data is provided directly to the callback in that case.
2155	*
2156	* This function is not intended to be directly called by users
2157	* interested in invoking subparsers. Instead, it is intended to
2158	* be used by the subparsers themselves to implement a higher-level
2159	* interface.
2160	*
2161	* Returns: the user data passed to g_markup_parse_context_push()
2162	*
2163	* Since: 2.18
2164	*/
2165	gpointer
2166	g_markup_parse_context_pop (GMarkupParseContext *context)
2167	{
2168	gpointer user_data;
2169
2170	if (!context->awaiting_pop)
2171	possibly_finish_subparser (context);
2172
2173	g_assert (context->awaiting_pop);
2174
2175	context->awaiting_pop = FALSE;
2176
2177	/ valgrind friendliness /
2178	user_data = context->held_user_data;
2179	context->held_user_data = NULL;
2180
2181	return user_data;
2182	}
2183
2184	#define APPEND_TEXT_AND_SEEK(_str, _start, _end) \
2185	G_STMT_START { \
2186	if (_end > _start) \
2187	g_string_append_len (_str, _start, _end - _start); \
2188	_start = ++_end; \
2189	} G_STMT_END
2190
2191	/*
2192	* https://www.w3.org/TR/REC-xml/ defines the set of valid
2193	* characters as:
2194	* #x9 \| #xA \| #xD \| [#x20-#xD7FF] \| [#xE000-#xFFFD] \| [#x10000-#x10FFFF]
2195	*
2196	* That is, from non-ASCII UTF-8 character set, only 0xC27F - 0xC284 and
2197	* 0xC286 - 0xC29F have to be escaped (excluding the surrogate blocks).
2198	* Corresponding Unicode code points are [0x7F-0x84] and [0x86-0x9F].
2199	*
2200	* So instead of using costly g_utf8_next_char or similar UTF8 functions, it's
2201	* better to read each byte, and make an exception for 0xC2XX.
2202	*/
2203	static void
2204	append_escaped_text (GString *str,
2205	const gchar *text,
2206	gssize length)
2207	{
2208	const gchar p, pending;
2209	const gchar *end;
2210
2211	p = pending = text;
2212	end = text + length;
2213
2214	while (p < end && pending < end)
2215	{
2216	guchar c = (guchar) *pending;
2217
2218	switch (c)
2219	{
2220	case `'&'`:
2221	APPEND_TEXT_AND_SEEK (str, p, pending);
2222	g_string_append (string: str, val: "&");
2223	break;
2224
2225	case `'<'`:
2226	APPEND_TEXT_AND_SEEK (str, p, pending);
2227	g_string_append (string: str, val: "<");
2228	break;
2229
2230	case `'>'`:
2231	APPEND_TEXT_AND_SEEK (str, p, pending);
2232	g_string_append (string: str, val: ">");
2233	break;
2234
2235	case `'\''`:
2236	APPEND_TEXT_AND_SEEK (str, p, pending);
2237	g_string_append (string: str, val: "'");
2238	break;
2239
2240	case `'"'`:
2241	APPEND_TEXT_AND_SEEK (str, p, pending);
2242	g_string_append (string: str, val: """);
2243	break;
2244
2245	default:
2246	if ((`0x1` <= c && c <= `0x8`) \|\|
2247	(`0xb` <= c && c <= `0xc`) \|\|
2248	(`0xe` <= c && c <= `0x1f`) \|\|
2249	(c == `0x7f`))
2250	{
2251	APPEND_TEXT_AND_SEEK (str, p, pending);
2252	g_string_append_printf (string: str, format: "&#x%x;", c);
2253	}
2254	/ The utf-8 control characters to escape begins with 0xc2 byte /
2255	else if (c == `0xc2`)
2256	{
2257	gunichar u = g_utf8_get_char (p: pending);
2258
2259	if ((`0x7f` < u && u <= `0x84`) \|\|
2260	(`0x86` <= u && u <= `0x9f`))
2261	{
2262	APPEND_TEXT_AND_SEEK (str, p, pending);
2263	g_string_append_printf (string: str, format: "&#x%x;", u);
2264
2265	/*
2266	* We have appended a two byte character above, which
2267	* is one byte ahead of what we read on every loop.
2268	* Increment to skip 0xc2 and point to the right location.
2269	*/
2270	p++;
2271	}
2272	else
2273	pending++;
2274	}
2275	else
2276	pending++;
2277	break;
2278	}
2279	}
2280
2281	if (pending > p)
2282	g_string_append_len (string: str, val: p, len: pending - p);
2283	}
2284
2285	#undef APPEND_TEXT_AND_SEEK
2286
2287	/**
2288	* g_markup_escape_text:
2289	* @text: some valid UTF-8 text
2290	* @length: length of @text in bytes, or -1 if the text is nul-terminated
2291	*
2292	* Escapes text so that the markup parser will parse it verbatim.
2293	* Less than, greater than, ampersand, etc. are replaced with the
2294	* corresponding entities. This function would typically be used
2295	* when writing out a file to be parsed with the markup parser.
2296	*
2297	* Note that this function doesn't protect whitespace and line endings
2298	* from being processed according to the XML rules for normalization
2299	* of line endings and attribute values.
2300	*
2301	* Note also that this function will produce character references in
2302	* the range of ... for all control sequences
2303	* except for tabstop, newline and carriage return. The character
2304	* references in this range are not valid XML 1.0, but they are
2305	* valid XML 1.1 and will be accepted by the GMarkup parser.
2306	*
2307	* Returns: a newly allocated string with the escaped text
2308	*/
2309	gchar*
2310	g_markup_escape_text (const gchar *text,
2311	gssize length)
2312	{
2313	GString *str;
2314
2315	g_return_val_if_fail (text != NULL, NULL);
2316
2317	if (length < `0`)
2318	length = strlen (s: text);
2319
2320	/ prealloc at least as long as original text /
2321	str = g_string_sized_new (dfl_size: length);
2322	append_escaped_text (str, text, length);
2323
2324	return g_string_free (string: str, FALSE);
2325	}
2326
2327	/*
2328	* find_conversion:
2329	* @format: a printf-style format string
2330	* @after: location to store a pointer to the character after
2331	* the returned conversion. On a %NULL return, returns the
2332	* pointer to the trailing NUL in the string
2333	*
2334	* Find the next conversion in a printf-style format string.
2335	* Partially based on code from printf-parser.c,
2336	* Copyright (C) 1999-2000, 2002-2003 Free Software Foundation, Inc.
2337	*
2338	* Returns: pointer to the next conversion in @format,
2339	* or %NULL, if none.
2340	*/
2341	static const char *
2342	find_conversion (const char *format,
2343	const char **after)
2344	{
2345	const char *start = format;
2346	const char *cp;
2347
2348	while (start != `'\0'` && start != `'%'`)
2349	start++;
2350
2351	if (*start == `'\0'`)
2352	{
2353	*after = start;
2354	return NULL;
2355	}
2356
2357	cp = start + `1`;
2358
2359	if (*cp == `'\0'`)
2360	{
2361	*after = cp;
2362	return NULL;
2363	}
2364
2365	/ Test for positional argument. /
2366	if (cp >= `'0'` && cp <= `'9'`)
2367	{
2368	const char *np;
2369
2370	for (np = cp; np >= `'0'` && np <= `'9'`; np++)
2371	;
2372	if (*np == `'$'`)
2373	cp = np + `1`;
2374	}
2375
2376	/ Skip the flags. /
2377	for (;;)
2378	{
2379	if (*cp == `'\''` \|\|
2380	*cp == `'-'` \|\|
2381	*cp == `'+'` \|\|
2382	*cp == `' '` \|\|
2383	*cp == `'#'` \|\|
2384	*cp == `'0'`)
2385	cp++;
2386	else
2387	break;
2388	}
2389
2390	/ Skip the field width. /
2391	if (cp == `''`)
2392	{
2393	cp++;
2394
2395	/ Test for positional argument. /
2396	if (cp >= `'0'` && cp <= `'9'`)
2397	{
2398	const char *np;
2399
2400	for (np = cp; np >= `'0'` && np <= `'9'`; np++)
2401	;
2402	if (*np == `'$'`)
2403	cp = np + `1`;
2404	}
2405	}
2406	else
2407	{
2408	for (; cp >= `'0'` && cp <= `'9'`; cp++)
2409	;
2410	}
2411
2412	/ Skip the precision. /
2413	if (*cp == `'.'`)
2414	{
2415	cp++;
2416	if (cp == `''`)
2417	{
2418	/ Test for positional argument. /
2419	if (cp >= `'0'` && cp <= `'9'`)
2420	{
2421	const char *np;
2422
2423	for (np = cp; np >= `'0'` && np <= `'9'`; np++)
2424	;
2425	if (*np == `'$'`)
2426	cp = np + `1`;
2427	}
2428	}
2429	else
2430	{
2431	for (; cp >= `'0'` && cp <= `'9'`; cp++)
2432	;
2433	}
2434	}
2435
2436	/ Skip argument type/size specifiers. /
2437	while (*cp == `'h'` \|\|
2438	*cp == `'L'` \|\|
2439	*cp == `'l'` \|\|
2440	*cp == `'j'` \|\|
2441	*cp == `'z'` \|\|
2442	*cp == `'Z'` \|\|
2443	*cp == `'t'`)
2444	cp++;
2445
2446	/ Skip the conversion character. /
2447	cp++;
2448
2449	*after = cp;
2450	return start;
2451	}
2452
2453	/**
2454	* g_markup_vprintf_escaped:
2455	* @format: printf() style format string
2456	* @args: variable argument list, similar to vprintf()
2457	*
2458	* Formats the data in @args according to @format, escaping
2459	* all string and character arguments in the fashion
2460	* of g_markup_escape_text(). See g_markup_printf_escaped().
2461	*
2462	* Returns: newly allocated result from formatting
2463	* operation. Free with g_free().
2464	*
2465	* Since: 2.4
2466	*/
2467	#pragma GCC diagnostic push
2468	#pragma GCC diagnostic ignored "-Wformat-nonliteral"
2469
2470	gchar *
2471	g_markup_vprintf_escaped (const gchar *format,
2472	va_list args)
2473	{
2474	GString *format1;
2475	GString *format2;
2476	GString *result = NULL;
2477	gchar *output1 = NULL;
2478	gchar *output2 = NULL;
2479	const char p, op1, *op2;
2480	va_list args2;
2481
2482	/ The technique here, is that we make two format strings that*
2483	* have the identical conversions in the identical order to the
2484	* original strings, but differ in the text in-between. We
2485	* then use the normal g_strdup_vprintf() to format the arguments
2486	* with the two new format strings. By comparing the results,
2487	* we can figure out what segments of the output come from
2488	* the original format string, and what from the arguments,
2489	* and thus know what portions of the string to escape.
2490	*
2491	* For instance, for:
2492	*
2493	* g_markup_printf_escaped ("%s ate %d apples", "Susan & Fred", 5);
2494	*
2495	* We form the two format strings "%sX%dX" and %sY%sY". The results
2496	* of formatting with those two strings are
2497	*
2498	* "%sX%dX" => "Susan & FredX5X"
2499	* "%sY%dY" => "Susan & FredY5Y"
2500	*
2501	* To find the span of the first argument, we find the first position
2502	* where the two arguments differ, which tells us that the first
2503	* argument formatted to "Susan & Fred". We then escape that
2504	* to "Susan & Fred" and join up with the intermediate portions
2505	* of the format string and the second argument to get
2506	* "Susan & Fred ate 5 apples".
2507	*/
2508
2509	/ Create the two modified format strings*
2510	*/
2511	format1 = g_string_new (NULL);
2512	format2 = g_string_new (NULL);
2513	p = format;
2514	while (TRUE)
2515	{
2516	const char *after;
2517	const char *conv = find_conversion (format: p, after: &after);
2518	if (!conv)
2519	break;
2520
2521	g_string_append_len (string: format1, val: conv, len: after - conv);
2522	g_string_append_c (format1, `'X'`);
2523	g_string_append_len (string: format2, val: conv, len: after - conv);
2524	g_string_append_c (format2, `'Y'`);
2525
2526	p = after;
2527	}
2528
2529	/ Use them to format the arguments*
2530	*/
2531	G_VA_COPY (args2, args);
2532
2533	output1 = g_strdup_vprintf (format: format1->str, args);
2534
2535	if (!output1)
2536	{
2537	va_end (args2);
2538	goto cleanup;
2539	}
2540
2541	output2 = g_strdup_vprintf (format: format2->str, args: args2);
2542	va_end (args2);
2543	if (!output2)
2544	goto cleanup;
2545	result = g_string_new (NULL);
2546
2547	/ Iterate through the original format string again,*
2548	* copying the non-conversion portions and the escaped
2549	* converted arguments to the output string.
2550	*/
2551	op1 = output1;
2552	op2 = output2;
2553	p = format;
2554	while (TRUE)
2555	{
2556	const char *after;
2557	const char *output_start;
2558	const char *conv = find_conversion (format: p, after: &after);
2559	char *escaped;
2560
2561	if (!conv) / The end, after points to the trailing \0 /
2562	{
2563	g_string_append_len (string: result, val: p, len: after - p);
2564	break;
2565	}
2566
2567	g_string_append_len (string: result, val: p, len: conv - p);
2568	output_start = op1;
2569	while (op1 == op2)
2570	{
2571	op1++;
2572	op2++;
2573	}
2574
2575	escaped = g_markup_escape_text (text: output_start, length: op1 - output_start);
2576	g_string_append (string: result, val: escaped);
2577	g_free (mem: escaped);
2578
2579	p = after;
2580	op1++;
2581	op2++;
2582	}
2583
2584	cleanup:
2585	g_string_free (string: format1, TRUE);
2586	g_string_free (string: format2, TRUE);
2587	g_free (mem: output1);
2588	g_free (mem: output2);
2589
2590	if (result)
2591	return g_string_free (string: result, FALSE);
2592	else
2593	return NULL;
2594	}
2595
2596	#pragma GCC diagnostic pop
2597
2598	/**
2599	* g_markup_printf_escaped:
2600	* @format: printf() style format string
2601	* @...: the arguments to insert in the format string
2602	*
2603	* Formats arguments according to @format, escaping
2604	* all string and character arguments in the fashion
2605	* of g_markup_escape_text(). This is useful when you
2606	* want to insert literal strings into XML-style markup
2607	* output, without having to worry that the strings
2608	* might themselves contain markup.
2609	*
2610	* \|[<!-- language="C" -->
2611	* const char *store = "Fortnum & Mason";
2612	* const char *item = "Tea";
2613	* char *output;
2614	*
2615	* output = g_markup_printf_escaped ("<purchase>"
2616	* "<store>%s</store>"
2617	* "<item>%s</item>"
2618	* "</purchase>",
2619	* store, item);
2620	* ]\|
2621	*
2622	* Returns: newly allocated result from formatting
2623	* operation. Free with g_free().
2624	*
2625	* Since: 2.4
2626	*/
2627	gchar *
2628	g_markup_printf_escaped (const gchar *format, ...)
2629	{
2630	char *result;
2631	va_list args;
2632
2633	va_start (args, format);
2634	result = g_markup_vprintf_escaped (format, args);
2635	va_end (args);
2636
2637	return result;
2638	}
2639
2640	static gboolean
2641	g_markup_parse_boolean (const char *string,
2642	gboolean *value)
2643	{
2644	char const * const falses[] = { "false", "f", "no", "n", "0" };
2645	char const * const trues[] = { "true", "t", "yes", "y", "1" };
2646	gsize i;
2647
2648	for (i = `0`; i < G_N_ELEMENTS (falses); i++)
2649	{
2650	if (g_ascii_strcasecmp (s1: string, s2: falses[i]) == `0`)
2651	{
2652	if (value != NULL)
2653	*value = FALSE;
2654
2655	return TRUE;
2656	}
2657	}
2658
2659	for (i = `0`; i < G_N_ELEMENTS (trues); i++)
2660	{
2661	if (g_ascii_strcasecmp (s1: string, s2: trues[i]) == `0`)
2662	{
2663	if (value != NULL)
2664	*value = TRUE;
2665
2666	return TRUE;
2667	}
2668	}
2669
2670	return FALSE;
2671	}
2672
2673	/**
2674	* GMarkupCollectType:
2675	* @G_MARKUP_COLLECT_INVALID: used to terminate the list of attributes
2676	* to collect
2677	* @G_MARKUP_COLLECT_STRING: collect the string pointer directly from
2678	* the attribute_values[] array. Expects a parameter of type (const
2679	* char **). If %G_MARKUP_COLLECT_OPTIONAL is specified and the
2680	* attribute isn't present then the pointer will be set to %NULL
2681	* @G_MARKUP_COLLECT_STRDUP: as with %G_MARKUP_COLLECT_STRING, but
2682	* expects a parameter of type (char **) and g_strdup()s the
2683	* returned pointer. The pointer must be freed with g_free()
2684	* @G_MARKUP_COLLECT_BOOLEAN: expects a parameter of type (gboolean *)
2685	* and parses the attribute value as a boolean. Sets %FALSE if the
2686	* attribute isn't present. Valid boolean values consist of
2687	* (case-insensitive) "false", "f", "no", "n", "0" and "true", "t",
2688	* "yes", "y", "1"
2689	* @G_MARKUP_COLLECT_TRISTATE: as with %G_MARKUP_COLLECT_BOOLEAN, but
2690	* in the case of a missing attribute a value is set that compares
2691	* equal to neither %FALSE nor %TRUE G_MARKUP_COLLECT_OPTIONAL is
2692	* implied
2693	* @G_MARKUP_COLLECT_OPTIONAL: can be bitwise ORed with the other fields.
2694	* If present, allows the attribute not to appear. A default value
2695	* is set depending on what value type is used
2696	*
2697	* A mixed enumerated type and flags field. You must specify one type
2698	* (string, strdup, boolean, tristate). Additionally, you may optionally
2699	* bitwise OR the type with the flag %G_MARKUP_COLLECT_OPTIONAL.
2700	*
2701	* It is likely that this enum will be extended in the future to
2702	* support other types.
2703	*/
2704
2705	/**
2706	* g_markup_collect_attributes:
2707	* @element_name: the current tag name
2708	* @attribute_names: the attribute names
2709	* @attribute_values: the attribute values
2710	* @error: a pointer to a #GError or %NULL
2711	* @first_type: the #GMarkupCollectType of the first attribute
2712	* @first_attr: the name of the first attribute
2713	* @...: a pointer to the storage location of the first attribute
2714	* (or %NULL), followed by more types names and pointers, ending
2715	* with %G_MARKUP_COLLECT_INVALID
2716	*
2717	* Collects the attributes of the element from the data passed to the
2718	* #GMarkupParser start_element function, dealing with common error
2719	* conditions and supporting boolean values.
2720	*
2721	* This utility function is not required to write a parser but can save
2722	* a lot of typing.
2723	*
2724	* The @element_name, @attribute_names, @attribute_values and @error
2725	* parameters passed to the start_element callback should be passed
2726	* unmodified to this function.
2727	*
2728	* Following these arguments is a list of "supported" attributes to collect.
2729	* It is an error to specify multiple attributes with the same name. If any
2730	* attribute not in the list appears in the @attribute_names array then an
2731	* unknown attribute error will result.
2732	*
2733	* The #GMarkupCollectType field allows specifying the type of collection
2734	* to perform and if a given attribute must appear or is optional.
2735	*
2736	* The attribute name is simply the name of the attribute to collect.
2737	*
2738	* The pointer should be of the appropriate type (see the descriptions
2739	* under #GMarkupCollectType) and may be %NULL in case a particular
2740	* attribute is to be allowed but ignored.
2741	*
2742	* This function deals with issuing errors for missing attributes
2743	* (of type %G_MARKUP_ERROR_MISSING_ATTRIBUTE), unknown attributes
2744	* (of type %G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE) and duplicate
2745	* attributes (of type %G_MARKUP_ERROR_INVALID_CONTENT) as well
2746	* as parse errors for boolean-valued attributes (again of type
2747	* %G_MARKUP_ERROR_INVALID_CONTENT). In all of these cases %FALSE
2748	* will be returned and @error will be set as appropriate.
2749	*
2750	* Returns: %TRUE if successful
2751	*
2752	* Since: 2.16
2753	**/
2754	gboolean
2755	g_markup_collect_attributes (const gchar *element_name,
2756	const gchar **attribute_names,
2757	const gchar **attribute_values,
2758	GError **error,
2759	GMarkupCollectType first_type,
2760	const gchar *first_attr,
2761	...)
2762	{
2763	GMarkupCollectType type;
2764	const gchar *attr;
2765	guint64 collected;
2766	int written;
2767	va_list ap;
2768	int i;
2769
2770	type = first_type;
2771	attr = first_attr;
2772	collected = `0`;
2773	written = `0`;
2774
2775	va_start (ap, first_attr);
2776	while (type != G_MARKUP_COLLECT_INVALID)
2777	{
2778	gboolean mandatory;
2779	const gchar *value;
2780
2781	mandatory = !(type & G_MARKUP_COLLECT_OPTIONAL);
2782	type &= (G_MARKUP_COLLECT_OPTIONAL - `1`);
2783
2784	/ tristate records a value != TRUE and != FALSE*
2785	* for the case where the attribute is missing
2786	*/
2787	if (type == G_MARKUP_COLLECT_TRISTATE)
2788	mandatory = FALSE;
2789
2790	for (i = `0`; attribute_names[i]; i++)
2791	if (i >= `40` \|\| !(collected & (G_GUINT64_CONSTANT(`1`) << i)))
2792	if (!strcmp (s1: attribute_names[i], s2: attr))
2793	break;
2794
2795	/ ISO C99 only promises that the user can pass up to 127 arguments.*
2796	* Subtracting the first 4 arguments plus the final NULL and dividing
2797	* by 3 arguments per collected attribute, we are left with a maximum
2798	* number of supported attributes of (127 - 5) / 3 = 40.
2799	*
2800	* In reality, nobody is ever going to call us with anywhere close to
2801	* 40 attributes to collect, so it is safe to assume that if i > 40
2802	* then the user has given some invalid or repeated arguments. These
2803	* problems will be caught and reported at the end of the function.
2804	*
2805	* We know at this point that we have an error, but we don't know
2806	* what error it is, so just continue...
2807	*/
2808	if (i < `40`)
2809	collected \|= (G_GUINT64_CONSTANT(`1`) << i);
2810
2811	value = attribute_values[i];
2812
2813	if (value == NULL && mandatory)
2814	{
2815	g_set_error (err: error, G_MARKUP_ERROR,
2816	code: G_MARKUP_ERROR_MISSING_ATTRIBUTE,
2817	format: "element '%s' requires attribute '%s'",
2818	element_name, attr);
2819
2820	va_end (ap);
2821	goto failure;
2822	}
2823
2824	switch (type)
2825	{
2826	case G_MARKUP_COLLECT_STRING:
2827	{
2828	const char **str_ptr;
2829
2830	str_ptr = va_arg (ap, const char **);
2831
2832	if (str_ptr != NULL)
2833	*str_ptr = value;
2834	}
2835	break;
2836
2837	case G_MARKUP_COLLECT_STRDUP:
2838	{
2839	char **str_ptr;
2840
2841	str_ptr = va_arg (ap, char **);
2842
2843	if (str_ptr != NULL)
2844	*str_ptr = g_strdup (str: value);
2845	}
2846	break;
2847
2848	case G_MARKUP_COLLECT_BOOLEAN:
2849	case G_MARKUP_COLLECT_TRISTATE:
2850	if (value == NULL)
2851	{
2852	gboolean *bool_ptr;
2853
2854	bool_ptr = va_arg (ap, gboolean *);
2855
2856	if (bool_ptr != NULL)
2857	{
2858	if (type == G_MARKUP_COLLECT_TRISTATE)
2859	/ constructivists rejoice!*
2860	* neither false nor true...
2861	*/
2862	*bool_ptr = -`1`;
2863
2864	else / G_MARKUP_COLLECT_BOOLEAN /
2865	*bool_ptr = FALSE;
2866	}
2867	}
2868	else
2869	{
2870	if (!g_markup_parse_boolean (string: value, va_arg (ap, gboolean *)))
2871	{
2872	g_set_error (err: error, G_MARKUP_ERROR,
2873	code: G_MARKUP_ERROR_INVALID_CONTENT,
2874	format: "element '%s', attribute '%s', value '%s' "
2875	"cannot be parsed as a boolean value",
2876	element_name, attr, value);
2877
2878	va_end (ap);
2879	goto failure;
2880	}
2881	}
2882
2883	break;
2884
2885	default:
2886	g_assert_not_reached ();
2887	}
2888
2889	written++;
2890	type = va_arg (ap, GMarkupCollectType);
2891	if (type != G_MARKUP_COLLECT_INVALID)
2892	attr = va_arg (ap, const char *);
2893	}
2894	va_end (ap);
2895
2896	/ ensure we collected all the arguments /
2897	for (i = `0`; attribute_names[i]; i++)
2898	if ((collected & (G_GUINT64_CONSTANT(`1`) << i)) == `0`)
2899	{
2900	/ attribute not collected: could be caused by two things.*
2901	*
2902	* 1) it doesn't exist in our list of attributes
2903	* 2) it existed but was matched by a duplicate attribute earlier
2904	*
2905	* find out.
2906	*/
2907	int j;
2908
2909	for (j = `0`; j < i; j++)
2910	if (strcmp (s1: attribute_names[i], s2: attribute_names[j]) == `0`)
2911	/ duplicate! /
2912	break;
2913
2914	/ j is now the first occurrence of attribute_names[i] /
2915	if (i == j)
2916	g_set_error (err: error, G_MARKUP_ERROR,
2917	code: G_MARKUP_ERROR_UNKNOWN_ATTRIBUTE,
2918	format: "attribute '%s' invalid for element '%s'",
2919	attribute_names[i], element_name);
2920	else
2921	g_set_error (err: error, G_MARKUP_ERROR,
2922	code: G_MARKUP_ERROR_INVALID_CONTENT,
2923	format: "attribute '%s' given multiple times for element '%s'",
2924	attribute_names[i], element_name);
2925
2926	goto failure;
2927	}
2928
2929	return TRUE;
2930
2931	failure:
2932	/ replay the above to free allocations /
2933	type = first_type;
2934
2935	va_start (ap, first_attr);
2936	while (type != G_MARKUP_COLLECT_INVALID)
2937	{
2938	gpointer ptr;
2939
2940	ptr = va_arg (ap, gpointer);
2941
2942	if (ptr != NULL)
2943	{
2944	switch (type & (G_MARKUP_COLLECT_OPTIONAL - `1`))
2945	{
2946	case G_MARKUP_COLLECT_STRDUP:
2947	if (written)
2948	g_free (mem: (char* **) ptr);
2949	(char* **) ptr = NULL;
2950	break;
2951
2952	case G_MARKUP_COLLECT_STRING:
2953	(char* **) ptr = NULL;
2954	break;
2955
2956	case G_MARKUP_COLLECT_BOOLEAN:
2957	(gboolean ) ptr = FALSE;
2958	break;
2959
2960	case G_MARKUP_COLLECT_TRISTATE:
2961	(gboolean ) ptr = -`1`;
2962	break;
2963	}
2964	}
2965
2966	type = va_arg (ap, GMarkupCollectType);
2967	if (type != G_MARKUP_COLLECT_INVALID)
2968	{
2969	attr = va_arg (ap, const char *);
2970	(void) attr;
2971	}
2972	}
2973	va_end (ap);
2974
2975	return FALSE;
2976	}
2977

source code of gtk/subprojects/glib/glib/gmarkup.c