json.cpp source code [gtk/subprojects/libsass/src/json.cpp]

1	/*
2	Copyright (C) 2011 Joseph A. Adams (joeyadams3.14159@gmail.com)
3	All rights reserved.
4
5	Permission is hereby granted, free of charge, to any person obtaining a copy
6	of this software and associated documentation files (the "Software"), to deal
7	in the Software without restriction, including without limitation the rights
8	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9	copies of the Software, and to permit persons to whom the Software is
10	furnished to do so, subject to the following conditions:
11
12	The above copyright notice and this permission notice shall be included in
13	all copies or substantial portions of the Software.
14
15	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21	THE SOFTWARE.
22	*/
23
24	#ifdef _MSC_VER
25	#define _CRT_SECURE_NO_WARNINGS
26	#define _CRT_NONSTDC_NO_DEPRECATE
27	#endif
28
29	#include "json.hpp"
30
31	// include utf8 library used by libsass
32	// ToDo: replace internal json utf8 code
33	#include "utf8.h"
34
35	#include <assert.h>
36	#include <stdint.h>
37	#include <stdio.h>
38	#include <stdlib.h>
39	#include <string.h>
40
41	#if defined(_MSC_VER) && _MSC_VER < 1900
42	#include <stdarg.h>
43	#ifdef snprintf
44	#undef snprintf
45	#endif
46	extern "C" int snprintf(char , size_t, const* char *, ...);
47	#endif
48
49	#define out_of_memory() do { \
50	fprintf(stderr, "Out of memory.\n"); \
51	exit(EXIT_FAILURE); \
52	} while (0)
53
54	/ Sadly, strdup is not portable. /
55	static char json_strdup(const* char *str)
56	{
57	char ret = (char**) malloc(size: strlen(s: str) + `1`);
58	if (ret == NULL)
59	out_of_memory();
60	strcpy(dest: ret, src: str);
61	return ret;
62	}
63
64	/ String buffer /
65
66	typedef struct
67	{
68	char *cur;
69	char *end;
70	char *start;
71	} SB;
72
73	static void sb_init(SB *sb)
74	{
75	sb->start = (char*) malloc(size: `17`);
76	if (sb->start == NULL)
77	out_of_memory();
78	sb->cur = sb->start;
79	sb->end = sb->start + `16`;
80	}
81
82	/ sb and need may be evaluated multiple times. /
83	#define sb_need(sb, need) do { \
84	if ((sb)->end - (sb)->cur < (need)) \
85	sb_grow(sb, need); \
86	} while (0)
87
88	static void sb_grow(SB sb, int* need)
89	{
90	size_t length = sb->cur - sb->start;
91	size_t alloc = sb->end - sb->start;
92
93	do {
94	alloc *= `2`;
95	} while (alloc < length + need);
96
97	sb->start = (char*) realloc(ptr: sb->start, size: alloc + `1`);
98	if (sb->start == NULL)
99	out_of_memory();
100	sb->cur = sb->start + length;
101	sb->end = sb->start + alloc;
102	}
103
104	static void sb_put(SB sb, const* char bytes, int* count)
105	{
106	sb_need(sb, count);
107	memcpy(dest: sb->cur, src: bytes, n: count);
108	sb->cur += count;
109	}
110
111	#define sb_putc(sb, c) do { \
112	if ((sb)->cur >= (sb)->end) \
113	sb_grow(sb, 1); \
114	*(sb)->cur++ = (c); \
115	} while (0)
116
117	static void sb_puts(SB sb, const* char *str)
118	{
119	sb_put(sb, bytes: str, count: (int)strlen(s: str));
120	}
121
122	static char sb_finish(SB sb)
123	{
124	*sb->cur = `0`;
125	assert(sb->start <= sb->cur && strlen(sb->start) == (size_t)(sb->cur - sb->start));
126	return sb->start;
127	}
128
129	static void sb_free(SB *sb)
130	{
131	free(ptr: sb->start);
132	}
133
134	/*
135	* Unicode helper functions
136	*
137	* These are taken from the ccan/charset module and customized a bit.
138	* Putting them here means the compiler can (choose to) inline them,
139	* and it keeps ccan/json from having a dependency.
140	*
141	* We use uint32_t Type for Unicode codepoints.
142	* We need our own because wchar_t might be 16 bits.
143	*/
144
145	/*
146	* Validate a single UTF-8 character starting at @s.
147	* The string must be null-terminated.
148	*
149	* If it's valid, return its length (1 thru 4).
150	* If it's invalid or clipped, return 0.
151	*
152	* This function implements the syntax given in RFC3629, which is
153	* the same as that given in The Unicode Standard, Version 6.0.
154	*
155	* It has the following properties:
156	*
157	* * All codepoints U+0000..U+10FFFF may be encoded,
158	* except for U+D800..U+DFFF, which are reserved
159	* for UTF-16 surrogate pair encoding.
160	* * UTF-8 byte sequences longer than 4 bytes are not permitted,
161	* as they exceed the range of Unicode.
162	* * The sixty-six Unicode "non-characters" are permitted
163	* (namely, U+FDD0..U+FDEF, U+xxFFFE, and U+xxFFFF).
164	*/
165	static int utf8_validate_cz(const char *s)
166	{
167	unsigned char c = *s++;
168
169	if (c <= `0x7F`) { / 00..7F /
170	return `1`;
171	} else if (c <= `0xC1`) { / 80..C1 /
172	/ Disallow overlong 2-byte sequence. /
173	return `0`;
174	} else if (c <= `0xDF`) { / C2..DF /
175	/ Make sure subsequent byte is in the range 0x80..0xBF. /
176	if (((unsigned char)*s++ & `0xC0`) != `0x80`)
177	return `0`;
178
179	return `2`;
180	} else if (c <= `0xEF`) { / E0..EF /
181	/ Disallow overlong 3-byte sequence. /
182	if (c == `0xE0` && (unsigned char)*s < `0xA0`)
183	return `0`;
184
185	/ Disallow U+D800..U+DFFF. /
186	if (c == `0xED` && (unsigned char)*s > `0x9F`)
187	return `0`;
188
189	/ Make sure subsequent bytes are in the range 0x80..0xBF. /
190	if (((unsigned char)*s++ & `0xC0`) != `0x80`)
191	return `0`;
192	if (((unsigned char)*s++ & `0xC0`) != `0x80`)
193	return `0`;
194
195	return `3`;
196	} else if (c <= `0xF4`) { / F0..F4 /
197	/ Disallow overlong 4-byte sequence. /
198	if (c == `0xF0` && (unsigned char)*s < `0x90`)
199	return `0`;
200
201	/ Disallow codepoints beyond U+10FFFF. /
202	if (c == `0xF4` && (unsigned char)*s > `0x8F`)
203	return `0`;
204
205	/ Make sure subsequent bytes are in the range 0x80..0xBF. /
206	if (((unsigned char)*s++ & `0xC0`) != `0x80`)
207	return `0`;
208	if (((unsigned char)*s++ & `0xC0`) != `0x80`)
209	return `0`;
210	if (((unsigned char)*s++ & `0xC0`) != `0x80`)
211	return `0`;
212
213	return `4`;
214	} else { / F5..FF /
215	return `0`;
216	}
217	}
218
219	/ Validate a null-terminated UTF-8 string. /
220	static bool utf8_validate(const char *s)
221	{
222	int len;
223
224	for (; *s != `0`; s += len) {
225	len = utf8_validate_cz(s);
226	if (len == `0`)
227	return false;
228	}
229
230	return true;
231	}
232
233	/*
234	* Read a single UTF-8 character starting at @s,
235	* returning the length, in bytes, of the character read.
236	*
237	* This function assumes input is valid UTF-8,
238	* and that there are enough characters in front of @s.
239	*/
240	static int utf8_read_char(const char s, uint32_t out)
241	{
242	const unsigned char c = (const* unsigned char*) s;
243
244	assert(utf8_validate_cz(s));
245
246	if (c[`0`] <= `0x7F`) {
247	/ 00..7F /
248	*out = c[`0`];
249	return `1`;
250	} else if (c[`0`] <= `0xDF`) {
251	/ C2..DF (unless input is invalid) /
252	*out = ((uint32_t)c[`0`] & `0x1F`) << `6` \|
253	((uint32_t)c[`1`] & `0x3F`);
254	return `2`;
255	} else if (c[`0`] <= `0xEF`) {
256	/ E0..EF /
257	*out = ((uint32_t)c[`0`] & `0xF`) << `12` \|
258	((uint32_t)c[`1`] & `0x3F`) << `6` \|
259	((uint32_t)c[`2`] & `0x3F`);
260	return `3`;
261	} else {
262	/ F0..F4 (unless input is invalid) /
263	*out = ((uint32_t)c[`0`] & `0x7`) << `18` \|
264	((uint32_t)c[`1`] & `0x3F`) << `12` \|
265	((uint32_t)c[`2`] & `0x3F`) << `6` \|
266	((uint32_t)c[`3`] & `0x3F`);
267	return `4`;
268	}
269	}
270
271	/*
272	* Write a single UTF-8 character to @s,
273	* returning the length, in bytes, of the character written.
274	*
275	* @unicode must be U+0000..U+10FFFF, but not U+D800..U+DFFF.
276	*
277	* This function will write up to 4 bytes to @out.
278	*/
279	static int utf8_write_char(uint32_t unicode, char *out)
280	{
281	unsigned char o = (unsigned* char*) out;
282
283	assert(unicode <= `0x10FFFF` && !(unicode >= `0xD800` && unicode <= `0xDFFF`));
284
285	if (unicode <= `0x7F`) {
286	/ U+0000..U+007F /
287	*o++ = unicode;
288	return `1`;
289	} else if (unicode <= `0x7FF`) {
290	/ U+0080..U+07FF /
291	*o++ = `0xC0` \| unicode >> `6`;
292	*o++ = `0x80` \| (unicode & `0x3F`);
293	return `2`;
294	} else if (unicode <= `0xFFFF`) {
295	/ U+0800..U+FFFF /
296	*o++ = `0xE0` \| unicode >> `12`;
297	*o++ = `0x80` \| (unicode >> `6` & `0x3F`);
298	*o++ = `0x80` \| (unicode & `0x3F`);
299	return `3`;
300	} else {
301	/ U+10000..U+10FFFF /
302	*o++ = `0xF0` \| unicode >> `18`;
303	*o++ = `0x80` \| (unicode >> `12` & `0x3F`);
304	*o++ = `0x80` \| (unicode >> `6` & `0x3F`);
305	*o++ = `0x80` \| (unicode & `0x3F`);
306	return `4`;
307	}
308	}
309
310	/*
311	* Compute the Unicode codepoint of a UTF-16 surrogate pair.
312	*
313	* @uc should be 0xD800..0xDBFF, and @lc should be 0xDC00..0xDFFF.
314	* If they aren't, this function returns false.
315	*/
316	static bool from_surrogate_pair(uint16_t uc, uint16_t lc, uint32_t *unicode)
317	{
318	if (uc >= `0xD800` && uc <= `0xDBFF` && lc >= `0xDC00` && lc <= `0xDFFF`) {
319	*unicode = `0x10000` + ((((uint32_t)uc & `0x3FF`) << `10`) \| (lc & `0x3FF`));
320	return true;
321	} else {
322	return false;
323	}
324	}
325
326	/*
327	* Construct a UTF-16 surrogate pair given a Unicode codepoint.
328	*
329	* @unicode must be U+10000..U+10FFFF.
330	*/
331	static void to_surrogate_pair(uint32_t unicode, uint16_t uc, uint16_t lc)
332	{
333	uint32_t n;
334
335	assert(unicode >= `0x10000` && unicode <= `0x10FFFF`);
336
337	n = unicode - `0x10000`;
338	*uc = ((n >> `10`) & `0x3FF`) \| `0xD800`;
339	*lc = (n & `0x3FF`) \| `0xDC00`;
340	}
341
342	static bool is_space (const char *c);
343	static bool is_digit (const char *c);
344	static bool parse_value (const char sp, JsonNode out);
345	static bool parse_string (const char *sp, char* **out);
346	static bool parse_number (const char *sp, double* *out);
347	static bool parse_array (const char sp, JsonNode out);
348	static bool parse_object (const char sp, JsonNode out);
349	static bool parse_hex16 (const char *sp, uint16_t out);
350
351	static bool expect_literal (const char *sp, const* char *str);
352	static void skip_space (const char **sp);
353
354	static void emit_value (SB out, const* JsonNode *node);
355	static void emit_value_indented (SB out, const* JsonNode node, const* char space, int* indent_level);
356	static void emit_string (SB out, const* char *str);
357	static void emit_number (SB out, double* num);
358	static void emit_array (SB out, const* JsonNode *array);
359	static void emit_array_indented (SB out, const* JsonNode array, const* char space, int* indent_level);
360	static void emit_object (SB out, const* JsonNode *object);
361	static void emit_object_indented (SB out, const* JsonNode object, const* char space, int* indent_level);
362
363	static int write_hex16(char *out, uint16_t val);
364
365	static JsonNode *mknode(JsonTag tag);
366	static void append_node(JsonNode parent, JsonNode child);
367	static void prepend_node(JsonNode parent, JsonNode child);
368	static void append_member(JsonNode object, char* key, JsonNode value);
369
370	/ Assertion-friendly validity checks /
371	static bool tag_is_valid(unsigned int tag);
372	static bool number_is_valid(const char *num);
373
374	JsonNode json_decode(const* char *json)
375	{
376	const char *s = json;
377	JsonNode *ret;
378
379	skip_space(sp: &s);
380	if (!parse_value(sp: &s, out: &ret))
381	return NULL;
382
383	skip_space(sp: &s);
384	if (*s != `0`) {
385	json_delete(node: ret);
386	return NULL;
387	}
388
389	return ret;
390	}
391
392	char json_encode(const* JsonNode *node)
393	{
394	return json_stringify(node, NULL);
395	}
396
397	char json_encode_string(const* char *str)
398	{
399	SB sb;
400	sb_init(sb: &sb);
401
402	try {
403	emit_string(out: &sb, str);
404	}
405	catch (std::exception&) {
406	sb_free(sb: &sb);
407	throw;
408	}
409
410	return sb_finish(sb: &sb);
411	}
412
413	char json_stringify(const* JsonNode node, const* char *space)
414	{
415	SB sb;
416	sb_init(sb: &sb);
417
418	try {
419	if (space != NULL)
420	emit_value_indented(out: &sb, node, space, indent_level: `0`);
421	else
422	emit_value(out: &sb, node);
423	}
424	catch (std::exception&) {
425	sb_free(sb: &sb);
426	throw;
427	}
428
429	return sb_finish(sb: &sb);
430	}
431
432	void json_delete(JsonNode *node)
433	{
434	if (node != NULL) {
435	json_remove_from_parent(node);
436
437	switch (node->tag) {
438	case JSON_STRING:
439	free(ptr: node->string_);
440	break;
441	case JSON_ARRAY:
442	case JSON_OBJECT:
443	{
444	JsonNode child, next;
445	for (child = node->children.head; child != NULL; child = next) {
446	next = child->next;
447	json_delete(node: child);
448	}
449	break;
450	}
451	default:;
452	}
453
454	free(ptr: node);
455	}
456	}
457
458	bool json_validate(const char *json)
459	{
460	const char *s = json;
461
462	skip_space(sp: &s);
463	if (!parse_value(sp: &s, NULL))
464	return false;
465
466	skip_space(sp: &s);
467	if (*s != `0`)
468	return false;
469
470	return true;
471	}
472
473	JsonNode json_find_element(JsonNode array, int index)
474	{
475	JsonNode *element;
476	int i = `0`;
477
478	if (array == NULL \|\| array->tag != JSON_ARRAY)
479	return NULL;
480
481	json_foreach(element, array) {
482	if (i == index)
483	return element;
484	i++;
485	}
486
487	return NULL;
488	}
489
490	JsonNode json_find_member(JsonNode object, const char *name)
491	{
492	JsonNode *member;
493
494	if (object == NULL \|\| object->tag != JSON_OBJECT)
495	return NULL;
496
497	json_foreach(member, object)
498	if (strcmp(s1: member->key, s2: name) == `0`)
499	return member;
500
501	return NULL;
502	}
503
504	JsonNode json_first_child(const* JsonNode *node)
505	{
506	if (node != NULL && (node->tag == JSON_ARRAY \|\| node->tag == JSON_OBJECT))
507	return node->children.head;
508	return NULL;
509	}
510
511	static JsonNode *mknode(JsonTag tag)
512	{
513	JsonNode ret = (JsonNode) calloc(nmemb: `1`, size: sizeof(JsonNode));
514	if (ret == NULL)
515	out_of_memory();
516	ret->tag = tag;
517	return ret;
518	}
519
520	JsonNode json_mknull(void*)
521	{
522	return mknode(tag: JSON_NULL);
523	}
524
525	JsonNode json_mkbool(bool* b)
526	{
527	JsonNode *ret = mknode(tag: JSON_BOOL);
528	ret->bool_ = b;
529	return ret;
530	}
531
532	static JsonNode mkstring(char* *s)
533	{
534	JsonNode *ret = mknode(tag: JSON_STRING);
535	ret->string_ = s;
536	return ret;
537	}
538
539	JsonNode json_mkstring(const* char *s)
540	{
541	return mkstring(s: json_strdup(str: s));
542	}
543
544	JsonNode json_mknumber(double* n)
545	{
546	JsonNode *node = mknode(tag: JSON_NUMBER);
547	node->number_ = n;
548	return node;
549	}
550
551	JsonNode json_mkarray(void*)
552	{
553	return mknode(tag: JSON_ARRAY);
554	}
555
556	JsonNode json_mkobject(void*)
557	{
558	return mknode(tag: JSON_OBJECT);
559	}
560
561	static void append_node(JsonNode parent, JsonNode child)
562	{
563	if (child != NULL && parent != NULL) {
564	child->parent = parent;
565	child->prev = parent->children.tail;
566	child->next = NULL;
567
568	if (parent->children.tail != NULL)
569	parent->children.tail->next = child;
570	else
571	parent->children.head = child;
572	parent->children.tail = child;
573	}
574	}
575
576	static void prepend_node(JsonNode parent, JsonNode child)
577	{
578	if (child != NULL && parent != NULL) {
579	child->parent = parent;
580	child->prev = NULL;
581	child->next = parent->children.head;
582
583	if (parent->children.head != NULL)
584	parent->children.head->prev = child;
585	else
586	parent->children.tail = child;
587	parent->children.head = child;
588	}
589	}
590
591	static void append_member(JsonNode object, char* key, JsonNode value)
592	{
593	if (value != NULL && object != NULL) {
594	value->key = key;
595	append_node(parent: object, child: value);
596	}
597	}
598
599	void json_append_element(JsonNode array, JsonNode element)
600	{
601	if (array != NULL && element !=NULL) {
602	assert(array->tag == JSON_ARRAY);
603	assert(element->parent == NULL);
604
605	append_node(parent: array, child: element);
606	}
607	}
608
609	void json_prepend_element(JsonNode array, JsonNode element)
610	{
611	assert(array->tag == JSON_ARRAY);
612	assert(element->parent == NULL);
613
614	prepend_node(parent: array, child: element);
615	}
616
617	void json_append_member(JsonNode object, const* char key, JsonNode value)
618	{
619	if (object != NULL && key != NULL && value != NULL) {
620	assert(object->tag == JSON_OBJECT);
621	assert(value->parent == NULL);
622
623	append_member(object, key: json_strdup(str: key), value);
624	}
625	}
626
627	void json_prepend_member(JsonNode object, const* char key, JsonNode value)
628	{
629	if (object != NULL && key != NULL && value != NULL) {
630	assert(object->tag == JSON_OBJECT);
631	assert(value->parent == NULL);
632
633	value->key = json_strdup(str: key);
634	prepend_node(parent: object, child: value);
635	}
636	}
637
638	void json_remove_from_parent(JsonNode *node)
639	{
640	if (node != NULL) {
641	JsonNode *parent = node->parent;
642
643	if (parent != NULL) {
644	if (node->prev != NULL)
645	node->prev->next = node->next;
646	else
647	parent->children.head = node->next;
648
649	if (node->next != NULL)
650	node->next->prev = node->prev;
651	else
652	parent->children.tail = node->prev;
653
654	free(ptr: node->key);
655
656	node->parent = NULL;
657	node->prev = node->next = NULL;
658	node->key = NULL;
659	}
660	}
661	}
662
663	static bool parse_value(const char sp, JsonNode out)
664	{
665	const char s = sp;
666
667	switch (*s) {
668	case `'n'`:
669	if (expect_literal(sp: &s, str: "null")) {
670	if (out)
671	*out = json_mknull();
672	*sp = s;
673	return true;
674	}
675	return false;
676
677	case `'f'`:
678	if (expect_literal(sp: &s, str: "false")) {
679	if (out)
680	out = json_mkbool(b: false*);
681	*sp = s;
682	return true;
683	}
684	return false;
685
686	case `'t'`:
687	if (expect_literal(sp: &s, str: "true")) {
688	if (out)
689	out = json_mkbool(b: true*);
690	*sp = s;
691	return true;
692	}
693	return false;
694
695	case `'"'`: {
696	char *str = NULL;
697	if (parse_string(sp: &s, out: out ? &str : NULL)) {
698	if (out)
699	*out = mkstring(s: str);
700	*sp = s;
701	return true;
702	}
703	return false;
704	}
705
706	case `'['`:
707	if (parse_array(sp: &s, out)) {
708	*sp = s;
709	return true;
710	}
711	return false;
712
713	case `'{'`:
714	if (parse_object(sp: &s, out)) {
715	*sp = s;
716	return true;
717	}
718	return false;
719
720	default: {
721	double num;
722	if (parse_number(sp: &s, out: out ? &num : NULL)) {
723	if (out)
724	*out = json_mknumber(n: num);
725	*sp = s;
726	return true;
727	}
728	return false;
729	}
730	}
731	}
732
733	static bool parse_array(const char sp, JsonNode out)
734	{
735	const char s = sp;
736	JsonNode *ret = out ? json_mkarray() : NULL;
737	JsonNode *element = NULL;
738
739	if (*s++ != `'['`)
740	goto failure;
741	skip_space(sp: &s);
742
743	if (*s == `']'`) {
744	s++;
745	goto success;
746	}
747
748	for (;;) {
749	if (!parse_value(sp: &s, out: out ? &element : NULL))
750	goto failure;
751	skip_space(sp: &s);
752
753	if (out)
754	json_append_element(array: ret, element);
755
756	if (*s == `']'`) {
757	s++;
758	goto success;
759	}
760
761	if (*s++ != `','`)
762	goto failure;
763	skip_space(sp: &s);
764	}
765
766	success:
767	*sp = s;
768	if (out)
769	*out = ret;
770	return true;
771
772	failure:
773	json_delete(node: ret);
774	return false;
775	}
776
777	static bool parse_object(const char sp, JsonNode out)
778	{
779	const char s = sp;
780	JsonNode *ret = out ? json_mkobject() : NULL;
781	char *key = NULL;
782	JsonNode *value = NULL;
783
784	if (*s++ != `'{'`)
785	goto failure;
786	skip_space(sp: &s);
787
788	if (*s == `'}'`) {
789	s++;
790	goto success;
791	}
792
793	for (;;) {
794	if (!parse_string(sp: &s, out: out ? &key : NULL))
795	goto failure;
796	skip_space(sp: &s);
797
798	if (*s++ != `':'`)
799	goto failure_free_key;
800	skip_space(sp: &s);
801
802	if (!parse_value(sp: &s, out: out ? &value : NULL))
803	goto failure_free_key;
804	skip_space(sp: &s);
805
806	if (out)
807	append_member(object: ret, key, value);
808
809	if (*s == `'}'`) {
810	s++;
811	goto success;
812	}
813
814	if (*s++ != `','`)
815	goto failure;
816	skip_space(sp: &s);
817	}
818
819	success:
820	*sp = s;
821	if (out)
822	*out = ret;
823	return true;
824
825	failure_free_key:
826	if (out)
827	free(ptr: key);
828	failure:
829	json_delete(node: ret);
830	return false;
831	}
832
833	bool parse_string(const char *sp, char* **out)
834	{
835	const char s = sp;
836	SB sb = { .cur: `0`, .end: `0`, .start: `0` };
837	char throwaway_buffer[`4`];
838	/ enough space for a UTF-8 character /
839	char *b;
840
841	if (*s++ != `'"'`)
842	return false;
843
844	if (out) {
845	sb_init(sb: &sb);
846	sb_need(&sb, `4`);
847	b = sb.cur;
848	} else {
849	b = throwaway_buffer;
850	}
851
852	while (*s != `'"'`) {
853	unsigned char c = *s++;
854
855	/ Parse next character, and write it to b. /
856	if (c == `'\\'`) {
857	c = *s++;
858	switch (c) {
859	case `'"'`:
860	case `'\\'`:
861	case `'/'`:
862	*b++ = c;
863	break;
864	case `'b'`:
865	*b++ = `'\b'`;
866	break;
867	case `'f'`:
868	*b++ = `'\f'`;
869	break;
870	case `'n'`:
871	*b++ = `'\n'`;
872	break;
873	case `'r'`:
874	*b++ = `'\r'`;
875	break;
876	case `'t'`:
877	*b++ = `'\t'`;
878	break;
879	case `'u'`:
880	{
881	uint16_t uc, lc;
882	uint32_t unicode;
883
884	if (!parse_hex16(sp: &s, out: &uc))
885	goto failed;
886
887	if (uc >= `0xD800` && uc <= `0xDFFF`) {
888	/ Handle UTF-16 surrogate pair. /
889	if (s++ != `'\\'` \|\| s++ != `'u'` \|\| !parse_hex16(sp: &s, out: &lc))
890	goto failed; / Incomplete surrogate pair. /
891	if (!from_surrogate_pair(uc, lc, unicode: &unicode))
892	goto failed; / Invalid surrogate pair. /
893	} else if (uc == `0`) {
894	/ Disallow "\u0000". /
895	goto failed;
896	} else {
897	unicode = uc;
898	}
899
900	b += utf8_write_char(unicode, out: b);
901	break;
902	}
903	default:
904	/ Invalid escape /
905	goto failed;
906	}
907	} else if (c <= `0x1F`) {
908	/ Control characters are not allowed in string literals. /
909	goto failed;
910	} else {
911	/ Validate and echo a UTF-8 character. /
912	int len;
913
914	s--;
915	len = utf8_validate_cz(s);
916	if (len == `0`)
917	goto failed; / Invalid UTF-8 character. /
918
919	while (len--)
920	b++ = s++;
921	}
922
923	/*
924	* Update sb to know about the new bytes,
925	* and set up b to write another character.
926	*/
927	if (out) {
928	sb.cur = b;
929	sb_need(&sb, `4`);
930	b = sb.cur;
931	} else {
932	b = throwaway_buffer;
933	}
934	}
935	s++;
936
937	if (out)
938	*out = sb_finish(sb: &sb);
939	*sp = s;
940	return true;
941
942	failed:
943	if (out)
944	sb_free(sb: &sb);
945	return false;
946	}
947
948	bool is_space(const char *c) {
949	return ((c) == `'\t'` \|\| (c) == `'\n'` \|\| (c) == `'\r'` \|\| (c) == `' '`);
950	}
951
952	bool is_digit(const char *c){
953	return ((c) >= `'0'` && (c) <= `'9'`);
954	}
955
956	/*
957	* The JSON spec says that a number shall follow this precise pattern
958	* (spaces and quotes added for readability):
959	* '-'? (0 \| [1-9][0-9]*) ('.' [0-9]+)? ([Ee] [+-]? [0-9]+)?
960	*
961	* However, some JSON parsers are more liberal. For instance, PHP accepts
962	* '.5' and '1.'. JSON.parse accepts '+3'.
963	*
964	* This function takes the strict approach.
965	*/
966	bool parse_number(const char *sp, double* *out)
967	{
968	const char s = sp;
969
970	/ '-'? /
971	if (*s == `'-'`)
972	s++;
973
974	/ (0 \| [1-9][0-9]) /*
975	if (*s == `'0'`) {
976	s++;
977	} else {
978	if (!is_digit(c: s))
979	return false;
980	do {
981	s++;
982	} while (is_digit(c: s));
983	}
984
985	/ ('.' [0-9]+)? /
986	if (*s == `'.'`) {
987	s++;
988	if (!is_digit(c: s))
989	return false;
990	do {
991	s++;
992	} while (is_digit(c: s));
993	}
994
995	/ ([Ee] [+-]? [0-9]+)? /
996	if (s == `'E'` \|\| s == `'e'`) {
997	s++;
998	if (s == `'+'` \|\| s == `'-'`)
999	s++;
1000	if (!is_digit(c: s))
1001	return false;
1002	do {
1003	s++;
1004	} while (is_digit(c: s));
1005	}
1006
1007	if (out)
1008	out = strtod(nptr: sp, NULL);
1009
1010	*sp = s;
1011	return true;
1012	}
1013
1014	static void skip_space(const char **sp)
1015	{
1016	const char s = sp;
1017	while (is_space(c: s))
1018	s++;
1019	*sp = s;
1020	}
1021
1022	static void emit_value(SB out, const* JsonNode *node)
1023	{
1024	assert(tag_is_valid(node->tag));
1025	switch (node->tag) {
1026	case JSON_NULL:
1027	sb_puts(sb: out, str: "null");
1028	break;
1029	case JSON_BOOL:
1030	sb_puts(sb: out, str: node->bool_ ? "true" : "false");
1031	break;
1032	case JSON_STRING:
1033	emit_string(out, str: node->string_);
1034	break;
1035	case JSON_NUMBER:
1036	emit_number(out, num: node->number_);
1037	break;
1038	case JSON_ARRAY:
1039	emit_array(out, array: node);
1040	break;
1041	case JSON_OBJECT:
1042	emit_object(out, object: node);
1043	break;
1044	default:
1045	assert(false);
1046	}
1047	}
1048
1049	void emit_value_indented(SB out, const* JsonNode node, const* char space, int* indent_level)
1050	{
1051	assert(tag_is_valid(node->tag));
1052	switch (node->tag) {
1053	case JSON_NULL:
1054	sb_puts(sb: out, str: "null");
1055	break;
1056	case JSON_BOOL:
1057	sb_puts(sb: out, str: node->bool_ ? "true" : "false");
1058	break;
1059	case JSON_STRING:
1060	emit_string(out, str: node->string_);
1061	break;
1062	case JSON_NUMBER:
1063	emit_number(out, num: node->number_);
1064	break;
1065	case JSON_ARRAY:
1066	emit_array_indented(out, array: node, space, indent_level);
1067	break;
1068	case JSON_OBJECT:
1069	emit_object_indented(out, object: node, space, indent_level);
1070	break;
1071	default:
1072	assert(false);
1073	}
1074	}
1075
1076	static void emit_array(SB out, const* JsonNode *array)
1077	{
1078	const JsonNode *element;
1079
1080	sb_putc(out, `'['`);
1081	json_foreach(element, array) {
1082	emit_value(out, node: element);
1083	if (element->next != NULL)
1084	sb_putc(out, `','`);
1085	}
1086	sb_putc(out, `']'`);
1087	}
1088
1089	static void emit_array_indented(SB out, const* JsonNode array, const* char space, int* indent_level)
1090	{
1091	const JsonNode *element = array->children.head;
1092	int i;
1093
1094	if (element == NULL) {
1095	sb_puts(sb: out, str: "[]");
1096	return;
1097	}
1098
1099	sb_puts(sb: out, str: "[\n");
1100	while (element != NULL) {
1101	for (i = `0`; i < indent_level + `1`; i++)
1102	sb_puts(sb: out, str: space);
1103	emit_value_indented(out, node: element, space, indent_level: indent_level + `1`);
1104
1105	element = element->next;
1106	sb_puts(sb: out, str: element != NULL ? ",\n" : "\n");
1107	}
1108	for (i = `0`; i < indent_level; i++)
1109	sb_puts(sb: out, str: space);
1110	sb_putc(out, `']'`);
1111	}
1112
1113	static void emit_object(SB out, const* JsonNode *object)
1114	{
1115	const JsonNode *member;
1116
1117	sb_putc(out, `'{'`);
1118	json_foreach(member, object) {
1119	emit_string(out, str: member->key);
1120	sb_putc(out, `':'`);
1121	emit_value(out, node: member);
1122	if (member->next != NULL)
1123	sb_putc(out, `','`);
1124	}
1125	sb_putc(out, `'}'`);
1126	}
1127
1128	static void emit_object_indented(SB out, const* JsonNode object, const* char space, int* indent_level)
1129	{
1130	const JsonNode *member = object->children.head;
1131	int i;
1132
1133	if (member == NULL) {
1134	sb_puts(sb: out, str: "{}");
1135	return;
1136	}
1137
1138	sb_puts(sb: out, str: "{\n");
1139	while (member != NULL) {
1140	for (i = `0`; i < indent_level + `1`; i++)
1141	sb_puts(sb: out, str: space);
1142	emit_string(out, str: member->key);
1143	sb_puts(sb: out, str: ": ");
1144	emit_value_indented(out, node: member, space, indent_level: indent_level + `1`);
1145
1146	member = member->next;
1147	sb_puts(sb: out, str: member != NULL ? ",\n" : "\n");
1148	}
1149	for (i = `0`; i < indent_level; i++)
1150	sb_puts(sb: out, str: space);
1151	sb_putc(out, `'}'`);
1152	}
1153
1154	void emit_string(SB out, const* char *str)
1155	{
1156	bool escape_unicode = false;
1157	const char *s = str;
1158	char *b;
1159
1160	// make assertion catchable
1161	#ifndef NDEBUG
1162	if (!utf8_validate(s: str)) {
1163	throw utf8::invalid_utf8 (`0`);
1164	}
1165	#endif
1166
1167	assert(utf8_validate(str));
1168
1169	/*
1170	* 14 bytes is enough space to write up to two
1171	* \uXXXX escapes and two quotation marks.
1172	*/
1173	sb_need(out, `14`);
1174	b = out->cur;
1175
1176	*b++ = `'"'`;
1177	while (*s != `0`) {
1178	unsigned char c = *s++;
1179
1180	/ Encode the next character, and write it to b. /
1181	switch (c) {
1182	case `'"'`:
1183	*b++ = `'\\'`;
1184	*b++ = `'"'`;
1185	break;
1186	case `'\\'`:
1187	*b++ = `'\\'`;
1188	*b++ = `'\\'`;
1189	break;
1190	case `'\b'`:
1191	*b++ = `'\\'`;
1192	*b++ = `'b'`;
1193	break;
1194	case `'\f'`:
1195	*b++ = `'\\'`;
1196	*b++ = `'f'`;
1197	break;
1198	case `'\n'`:
1199	*b++ = `'\\'`;
1200	*b++ = `'n'`;
1201	break;
1202	case `'\r'`:
1203	*b++ = `'\\'`;
1204	*b++ = `'r'`;
1205	break;
1206	case `'\t'`:
1207	*b++ = `'\\'`;
1208	*b++ = `'t'`;
1209	break;
1210	default: {
1211	int len;
1212
1213	s--;
1214	len = utf8_validate_cz(s);
1215
1216	if (len == `0`) {
1217	/*
1218	* Handle invalid UTF-8 character gracefully in production
1219	* by writing a replacement character (U+FFFD)
1220	* and skipping a single byte.
1221	*
1222	* This should never happen when assertions are enabled
1223	* due to the assertion at the beginning of this function.
1224	*/
1225	assert(false);
1226	if (escape_unicode) {
1227	strcpy(dest: b, src: "\\uFFFD");
1228	b += `6`;
1229	} else {
1230	*b++ = `0xEFu`;
1231	*b++ = `0xBFu`;
1232	*b++ = `0xBDu`;
1233	}
1234	s++;
1235	} else if (c < `0x1F` \|\| (c >= `0x80` && escape_unicode)) {
1236	/ Encode using \u.... /
1237	uint32_t unicode;
1238
1239	s += utf8_read_char(s, out: &unicode);
1240
1241	if (unicode <= `0xFFFF`) {
1242	*b++ = `'\\'`;
1243	*b++ = `'u'`;
1244	b += write_hex16(out: b, val: unicode);
1245	} else {
1246	/ Produce a surrogate pair. /
1247	uint16_t uc, lc;
1248	assert(unicode <= `0x10FFFF`);
1249	to_surrogate_pair(unicode, uc: &uc, lc: &lc);
1250	*b++ = `'\\'`;
1251	*b++ = `'u'`;
1252	b += write_hex16(out: b, val: uc);
1253	*b++ = `'\\'`;
1254	*b++ = `'u'`;
1255	b += write_hex16(out: b, val: lc);
1256	}
1257	} else {
1258	/ Write the character directly. /
1259	while (len--)
1260	b++ = s++;
1261	}
1262
1263	break;
1264	}
1265	}
1266
1267	/*
1268	* Update *out to know about the new bytes,
1269	* and set up b to write another encoded character.
1270	*/
1271	out->cur = b;
1272	sb_need(out, `14`);
1273	b = out->cur;
1274	}
1275	*b++ = `'"'`;
1276
1277	out->cur = b;
1278	}
1279
1280	static void emit_number(SB out, double* num)
1281	{
1282	/*
1283	* This isn't exactly how JavaScript renders numbers,
1284	* but it should produce valid JSON for reasonable numbers
1285	* preserve precision well enough, and avoid some oddities
1286	* like 0.3 -> 0.299999999999999988898 .
1287	*/
1288	char buf[`64`];
1289	sprintf(s: buf, format: "%.16g", num);
1290
1291	if (number_is_valid(num: buf))
1292	sb_puts(sb: out, str: buf);
1293	else
1294	sb_puts(sb: out, str: "null");
1295	}
1296
1297	static bool tag_is_valid(unsigned int tag)
1298	{
1299	return (/ tag >= JSON_NULL && / tag <= JSON_OBJECT);
1300	}
1301
1302	static bool number_is_valid(const char *num)
1303	{
1304	return (parse_number(sp: &num, NULL) && *num == `'\0'`);
1305	}
1306
1307	static bool expect_literal(const char *sp, const* char *str)
1308	{
1309	const char s = sp;
1310
1311	while (*str != `'\0'`)
1312	if (s++ != str++)
1313	return false;
1314
1315	*sp = s;
1316	return true;
1317	}
1318
1319	/*
1320	* Parses exactly 4 hex characters (capital or lowercase).
1321	* Fails if any input chars are not [0-9A-Fa-f].
1322	*/
1323	static bool parse_hex16(const char *sp, uint16_t out)
1324	{
1325	const char s = sp;
1326	uint16_t ret = `0`;
1327	uint16_t i;
1328	uint16_t tmp;
1329	char c;
1330
1331	for (i = `0`; i < `4`; i++) {
1332	c = *s++;
1333	if (c >= `'0'` && c <= `'9'`)
1334	tmp = c - `'0'`;
1335	else if (c >= `'A'` && c <= `'F'`)
1336	tmp = c - `'A'` + `10`;
1337	else if (c >= `'a'` && c <= `'f'`)
1338	tmp = c - `'a'` + `10`;
1339	else
1340	return false;
1341
1342	ret <<= `4`;
1343	ret += tmp;
1344	}
1345
1346	if (out)
1347	*out = ret;
1348	*sp = s;
1349	return true;
1350	}
1351
1352	/*
1353	* Encodes a 16-bit number into hexadecimal,
1354	* writing exactly 4 hex chars.
1355	*/
1356	static int write_hex16(char *out, uint16_t val)
1357	{
1358	const char *hex = "0123456789ABCDEF";
1359
1360	*out++ = hex[(val >> `12`) & `0xF`];
1361	*out++ = hex[(val >> `8`) & `0xF`];
1362	*out++ = hex[(val >> `4`) & `0xF`];
1363	*out++ = hex[ val & `0xF`];
1364
1365	return `4`;
1366	}
1367
1368	bool json_check(const JsonNode node, char* errmsg[`256`])
1369	{
1370	#define problem(...) do { \
1371	if (errmsg != NULL) \
1372	snprintf(errmsg, 256, __VA_ARGS__); \
1373	return false; \
1374	} while (0)
1375
1376	if (node->key != NULL && !utf8_validate(s: node->key))
1377	problem("key contains invalid UTF-8");
1378
1379	if (!tag_is_valid(tag: node->tag))
1380	problem("tag is invalid (%u)", node->tag);
1381
1382	if (node->tag == JSON_BOOL) {
1383	if (node->bool_ != false && node->bool_ != true)
1384	problem("bool_ is neither false (%d) nor true (%d)", (int)false, (int)true);
1385	} else if (node->tag == JSON_STRING) {
1386	if (node->string_ == NULL)
1387	problem("string_ is NULL");
1388	if (!utf8_validate(s: node->string_))
1389	problem("string_ contains invalid UTF-8");
1390	} else if (node->tag == JSON_ARRAY \|\| node->tag == JSON_OBJECT) {
1391	JsonNode *head = node->children.head;
1392	JsonNode *tail = node->children.tail;
1393
1394	if (head == NULL \|\| tail == NULL) {
1395	if (head != NULL)
1396	problem("tail is NULL, but head is not");
1397	if (tail != NULL)
1398	problem("head is NULL, but tail is not");
1399	} else {
1400	JsonNode *child;
1401	JsonNode *last = NULL;
1402
1403	if (head->prev != NULL)
1404	problem("First child's prev pointer is not NULL");
1405
1406	for (child = head; child != NULL; last = child, child = child->next) {
1407	if (child == node)
1408	problem("node is its own child");
1409	if (child->next == child)
1410	problem("child->next == child (cycle)");
1411	if (child->next == head)
1412	problem("child->next == head (cycle)");
1413
1414	if (child->parent != node)
1415	problem("child does not point back to parent");
1416	if (child->next != NULL && child->next->prev != child)
1417	problem("child->next does not point back to child");
1418
1419	if (node->tag == JSON_ARRAY && child->key != NULL)
1420	problem("Array element's key is not NULL");
1421	if (node->tag == JSON_OBJECT && child->key == NULL)
1422	problem("Object member's key is NULL");
1423
1424	if (!json_check(node: child, errmsg))
1425	return false;
1426	}
1427
1428	if (last != tail)
1429	problem("tail does not match pointer found by starting at head and following next links");
1430	}
1431	}
1432
1433	return true;
1434
1435	#undef problem
1436	}
1437

source code of gtk/subprojects/libsass/src/json.cpp