1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2021 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#ifndef QT_BOOTSTRAPPED
6#include <qcoreapplication.h>
7#endif
8#include <qdebug.h>
9#include "qjsonparser_p.h"
10#include "qjson_p.h"
11#include "private/qstringconverter_p.h"
12#include "private/qcborvalue_p.h"
13#include "private/qnumeric_p.h"
14#include <private/qtools_p.h>
15
16//#define PARSER_DEBUG
17#ifdef PARSER_DEBUG
18# error currently broken after `current` was moved to StashedContainer
19Q_CONSTINIT static int indent = 0;
20# define QT_PARSER_TRACING_BEGIN \
21 qDebug() << QByteArray(4 * indent++, ' ').constData() << "pos=" << current
22# define QT_PARSER_TRACING_END --indent
23# define QT_PARSER_TRACING_DEBUG qDebug() << QByteArray(4 * indent, ' ').constData()
24#else
25# define QT_PARSER_TRACING_BEGIN QT_NO_QDEBUG_MACRO()
26# define QT_PARSER_TRACING_END \
27 do { \
28 } while (0)
29# define QT_PARSER_TRACING_DEBUG QT_NO_QDEBUG_MACRO()
30#endif
31
32static const int nestingLimit = 1024;
33
34QT_BEGIN_NAMESPACE
35
36using namespace QtMiscUtils;
37
38// error strings for the JSON parser
39#define JSONERR_OK QT_TRANSLATE_NOOP("QJsonParseError", "no error occurred")
40#define JSONERR_UNTERM_OBJ QT_TRANSLATE_NOOP("QJsonParseError", "unterminated object")
41#define JSONERR_MISS_NSEP QT_TRANSLATE_NOOP("QJsonParseError", "missing name separator")
42#define JSONERR_UNTERM_AR QT_TRANSLATE_NOOP("QJsonParseError", "unterminated array")
43#define JSONERR_MISS_VSEP QT_TRANSLATE_NOOP("QJsonParseError", "missing value separator")
44#define JSONERR_ILLEGAL_VAL QT_TRANSLATE_NOOP("QJsonParseError", "illegal value")
45#define JSONERR_END_OF_NUM QT_TRANSLATE_NOOP("QJsonParseError", "invalid termination by number")
46#define JSONERR_ILLEGAL_NUM QT_TRANSLATE_NOOP("QJsonParseError", "illegal number")
47#define JSONERR_STR_ESC_SEQ QT_TRANSLATE_NOOP("QJsonParseError", "invalid escape sequence")
48#define JSONERR_STR_UTF8 QT_TRANSLATE_NOOP("QJsonParseError", "invalid UTF8 string")
49#define JSONERR_UTERM_STR QT_TRANSLATE_NOOP("QJsonParseError", "unterminated string")
50#define JSONERR_MISS_OBJ QT_TRANSLATE_NOOP("QJsonParseError", "object is missing after a comma")
51#define JSONERR_DEEP_NEST QT_TRANSLATE_NOOP("QJsonParseError", "too deeply nested document")
52#define JSONERR_DOC_LARGE QT_TRANSLATE_NOOP("QJsonParseError", "too large document")
53#define JSONERR_GARBAGEEND QT_TRANSLATE_NOOP("QJsonParseError", "garbage at the end of the document")
54
55/*!
56 \class QJsonParseError
57 \inmodule QtCore
58 \ingroup json
59 \ingroup shared
60 \ingroup qtserialization
61 \reentrant
62 \since 5.0
63
64 \brief The QJsonParseError class is used to report errors during JSON parsing.
65
66 \sa {JSON Support in Qt}, {Saving and Loading a Game}
67*/
68
69/*!
70 \enum QJsonParseError::ParseError
71
72 This enum describes the type of error that occurred during the parsing of a JSON document.
73
74 \value NoError No error occurred
75 \value UnterminatedObject An object is not correctly terminated with a closing curly bracket
76 \value MissingNameSeparator A comma separating different items is missing
77 \value UnterminatedArray The array is not correctly terminated with a closing square bracket
78 \value MissingValueSeparator A colon separating keys from values inside objects is missing
79 \value IllegalValue The value is illegal
80 \value TerminationByNumber The input stream ended while parsing a number
81 \value IllegalNumber The number is not well formed
82 \value IllegalEscapeSequence An illegal escape sequence occurred in the input
83 \value IllegalUTF8String An illegal UTF8 sequence occurred in the input
84 \value UnterminatedString A string wasn't terminated with a quote
85 \value MissingObject An object was expected but couldn't be found
86 \value DeepNesting The JSON document is too deeply nested for the parser to parse it
87 \value DocumentTooLarge The JSON document is too large for the parser to parse it
88 \value GarbageAtEnd The parsed document contains additional garbage characters at the end
89
90*/
91
92/*!
93 \variable QJsonParseError::error
94
95 Contains the type of the parse error. Is equal to QJsonParseError::NoError if the document
96 was parsed correctly.
97
98 \sa ParseError, errorString()
99*/
100
101
102/*!
103 \variable QJsonParseError::offset
104
105 Contains the byte offset in the UTF-8 byte array where the parse error occurred.
106
107 \sa error, errorString(), QJsonDocument::fromJson()
108*/
109
110/*!
111 Returns the human-readable message appropriate to the reported JSON parsing error.
112
113 \sa error
114 */
115QString QJsonParseError::errorString() const
116{
117 const char *sz = "";
118 switch (error) {
119 case NoError:
120 sz = JSONERR_OK;
121 break;
122 case UnterminatedObject:
123 sz = JSONERR_UNTERM_OBJ;
124 break;
125 case MissingNameSeparator:
126 sz = JSONERR_MISS_NSEP;
127 break;
128 case UnterminatedArray:
129 sz = JSONERR_UNTERM_AR;
130 break;
131 case MissingValueSeparator:
132 sz = JSONERR_MISS_VSEP;
133 break;
134 case IllegalValue:
135 sz = JSONERR_ILLEGAL_VAL;
136 break;
137 case TerminationByNumber:
138 sz = JSONERR_END_OF_NUM;
139 break;
140 case IllegalNumber:
141 sz = JSONERR_ILLEGAL_NUM;
142 break;
143 case IllegalEscapeSequence:
144 sz = JSONERR_STR_ESC_SEQ;
145 break;
146 case IllegalUTF8String:
147 sz = JSONERR_STR_UTF8;
148 break;
149 case UnterminatedString:
150 sz = JSONERR_UTERM_STR;
151 break;
152 case MissingObject:
153 sz = JSONERR_MISS_OBJ;
154 break;
155 case DeepNesting:
156 sz = JSONERR_DEEP_NEST;
157 break;
158 case DocumentTooLarge:
159 sz = JSONERR_DOC_LARGE;
160 break;
161 case GarbageAtEnd:
162 sz = JSONERR_GARBAGEEND;
163 break;
164 }
165#ifndef QT_BOOTSTRAPPED
166 return QCoreApplication::translate(context: "QJsonParseError", key: sz);
167#else
168 return QLatin1StringView(sz);
169#endif
170}
171
172using namespace QJsonPrivate;
173
174class StashedContainer
175{
176 Q_DISABLE_COPY_MOVE(StashedContainer)
177public:
178 StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> *container,
179 QCborValue::Type type)
180 : type(type), stashed(std::move(*container)), current(container)
181 {
182 }
183
184 ~StashedContainer()
185 {
186 stashed->append(v: QCborContainerPrivate::makeValue(type, n: -1, d: current->take(),
187 disp: QCborContainerPrivate::MoveContainer));
188 *current = std::move(stashed);
189 }
190
191private:
192 QCborValue::Type type;
193 QExplicitlySharedDataPointer<QCborContainerPrivate> stashed;
194 QExplicitlySharedDataPointer<QCborContainerPrivate> *current;
195};
196
197Parser::Parser(const char *json, int length)
198 : head(json), json(json)
199 , nestingLevel(0)
200 , lastError(QJsonParseError::NoError)
201{
202 end = json + length;
203}
204
205
206
207/*
208
209begin-array = ws %x5B ws ; [ left square bracket
210
211begin-object = ws %x7B ws ; { left curly bracket
212
213end-array = ws %x5D ws ; ] right square bracket
214
215end-object = ws %x7D ws ; } right curly bracket
216
217name-separator = ws %x3A ws ; : colon
218
219value-separator = ws %x2C ws ; , comma
220
221Insignificant whitespace is allowed before or after any of the six
222structural characters.
223
224ws = *(
225 %x20 / ; Space
226 %x09 / ; Horizontal tab
227 %x0A / ; Line feed or New line
228 %x0D ; Carriage return
229 )
230
231*/
232
233enum {
234 Space = 0x20,
235 Tab = 0x09,
236 LineFeed = 0x0a,
237 Return = 0x0d,
238 BeginArray = 0x5b,
239 BeginObject = 0x7b,
240 EndArray = 0x5d,
241 EndObject = 0x7d,
242 NameSeparator = 0x3a,
243 ValueSeparator = 0x2c,
244 Quote = 0x22
245};
246
247void Parser::eatBOM()
248{
249 // eat UTF-8 byte order mark
250 uchar utf8bom[3] = { 0xef, 0xbb, 0xbf };
251 if (end - json > 3 &&
252 (uchar)json[0] == utf8bom[0] &&
253 (uchar)json[1] == utf8bom[1] &&
254 (uchar)json[2] == utf8bom[2])
255 json += 3;
256}
257
258bool Parser::eatSpace()
259{
260 while (json < end) {
261 if (*json > Space)
262 break;
263 if (*json != Space &&
264 *json != Tab &&
265 *json != LineFeed &&
266 *json != Return)
267 break;
268 ++json;
269 }
270 return (json < end);
271}
272
273char Parser::nextToken()
274{
275 if (!eatSpace())
276 return 0;
277 char token = *json++;
278 switch (token) {
279 case BeginArray:
280 case BeginObject:
281 case NameSeparator:
282 case ValueSeparator:
283 case EndArray:
284 case EndObject:
285 case Quote:
286 break;
287 default:
288 token = 0;
289 break;
290 }
291 return token;
292}
293
294/*
295 JSON-text = object / array
296*/
297QCborValue Parser::parse(QJsonParseError *error)
298{
299#ifdef PARSER_DEBUG
300 indent = 0;
301 qDebug(">>>>> parser begin");
302#endif
303 eatBOM();
304 char token = nextToken();
305
306 QCborValue data;
307
308 QT_PARSER_TRACING_DEBUG << Qt::hex << (uint)token;
309 if (token == BeginArray) {
310 container = new QCborContainerPrivate;
311 if (!parseArray())
312 goto error;
313 data = QCborContainerPrivate::makeValue(type: QCborValue::Array, n: -1, d: container.take(),
314 disp: QCborContainerPrivate::MoveContainer);
315 } else if (token == BeginObject) {
316 container = new QCborContainerPrivate;
317 if (!parseObject())
318 goto error;
319 data = QCborContainerPrivate::makeValue(type: QCborValue::Map, n: -1, d: container.take(),
320 disp: QCborContainerPrivate::MoveContainer);
321 } else {
322 lastError = QJsonParseError::IllegalValue;
323 goto error;
324 }
325
326 eatSpace();
327 if (json < end) {
328 lastError = QJsonParseError::GarbageAtEnd;
329 goto error;
330 }
331
332 QT_PARSER_TRACING_END;
333 {
334 if (error) {
335 error->offset = 0;
336 error->error = QJsonParseError::NoError;
337 }
338
339 return data;
340 }
341
342error:
343#ifdef PARSER_DEBUG
344 qDebug(">>>>> parser error");
345#endif
346 container.reset();
347 if (error) {
348 error->offset = json - head;
349 error->error = lastError;
350 }
351 return QCborValue();
352}
353
354// We need to retain the _last_ value for any duplicate keys and we need to deref containers.
355// Therefore the manual implementation of std::unique().
356template<typename Iterator, typename Compare, typename Assign>
357static Iterator customAssigningUniqueLast(Iterator first, Iterator last,
358 Compare compare, Assign assign)
359{
360 first = std::adjacent_find(first, last, compare);
361 if (first == last)
362 return last;
363
364 // After adjacent_find, we know that *first and *(first+1) compare equal,
365 // and that first+1 != last.
366 Iterator result = first++;
367 Q_ASSERT(compare(*result, *first));
368 assign(*result, *first);
369 Q_ASSERT(first != last);
370
371 while (++first != last) {
372 if (!compare(*result, *first))
373 ++result;
374
375 // Due to adjacent_find above, we know that we've at least eliminated one element.
376 // Therefore we have to move each further element across the gap.
377 Q_ASSERT(result != first);
378
379 // We have to overwrite each element we want to eliminate, to deref() the container.
380 // Therefore we don't try to optimize the number of assignments here.
381 assign(*result, *first);
382 }
383
384 return ++result;
385}
386
387static void sortContainer(QCborContainerPrivate *container)
388{
389 using Forward = QJsonPrivate::KeyIterator;
390 using Value = Forward::value_type;
391
392 auto compare = [container](const Value &a, const Value &b)
393 {
394 const auto &aKey = a.key();
395 const auto &bKey = b.key();
396
397 Q_ASSERT(aKey.flags & QtCbor::Element::HasByteData);
398 Q_ASSERT(bKey.flags & QtCbor::Element::HasByteData);
399
400 const QtCbor::ByteData *aData = container->byteData(e: aKey);
401 const QtCbor::ByteData *bData = container->byteData(e: bKey);
402
403 if (!aData)
404 return bData ? -1 : 0;
405 if (!bData)
406 return 1;
407
408 // US-ASCII (StringIsAscii flag) is just a special case of UTF-8
409 // string, so we can safely ignore the flag.
410
411 if (aKey.flags & QtCbor::Element::StringIsUtf16) {
412 if (bKey.flags & QtCbor::Element::StringIsUtf16)
413 return QtPrivate::compareStrings(lhs: aData->asStringView(), rhs: bData->asStringView());
414
415 return -QCborContainerPrivate::compareUtf8(b: bData, s: aData->asStringView());
416 } else {
417 if (bKey.flags & QtCbor::Element::StringIsUtf16)
418 return QCborContainerPrivate::compareUtf8(b: aData, s: bData->asStringView());
419
420 return QtPrivate::compareStrings(lhs: aData->asUtf8StringView(), rhs: bData->asUtf8StringView());
421 }
422 };
423
424 // The elements' containers are owned by the outer container, not by the elements themselves.
425 auto move = [](Forward::reference target, Forward::reference source)
426 {
427 QtCbor::Element &targetValue = target.value();
428
429 // If the target has a container, deref it before overwriting, so that we don't leak.
430 if (targetValue.flags & QtCbor::Element::IsContainer)
431 targetValue.container->deref();
432
433 // Do not move, so that we can clear the value afterwards.
434 target = source;
435
436 // Clear the source value, so that we don't store the same container twice.
437 source.value() = QtCbor::Element();
438 };
439
440 std::stable_sort(
441 first: Forward(container->elements.begin()), last: Forward(container->elements.end()),
442 comp: [&compare](const Value &a, const Value &b) { return compare(a, b) < 0; });
443
444 Forward result = customAssigningUniqueLast(
445 first: Forward(container->elements.begin()), last: Forward(container->elements.end()),
446 compare: [&compare](const Value &a, const Value &b) { return compare(a, b) == 0; }, assign: move);
447
448 container->elements.erase(abegin: result.elementsIterator(), aend: container->elements.end());
449}
450
451
452/*
453 object = begin-object [ member *( value-separator member ) ]
454 end-object
455*/
456
457bool Parser::parseObject()
458{
459 if (++nestingLevel > nestingLimit) {
460 lastError = QJsonParseError::DeepNesting;
461 return false;
462 }
463
464 QT_PARSER_TRACING_BEGIN << "parseObject" << json;
465
466 char token = nextToken();
467 while (token == Quote) {
468 if (!container)
469 container = new QCborContainerPrivate;
470 if (!parseMember())
471 return false;
472 token = nextToken();
473 if (token != ValueSeparator)
474 break;
475 token = nextToken();
476 if (token == EndObject) {
477 lastError = QJsonParseError::MissingObject;
478 return false;
479 }
480 }
481
482 QT_PARSER_TRACING_DEBUG << "end token=" << token;
483 if (token != EndObject) {
484 lastError = QJsonParseError::UnterminatedObject;
485 return false;
486 }
487
488 QT_PARSER_TRACING_END;
489
490 --nestingLevel;
491
492 if (container)
493 sortContainer(container: container.data());
494 return true;
495}
496
497/*
498 member = string name-separator value
499*/
500bool Parser::parseMember()
501{
502 QT_PARSER_TRACING_BEGIN << "parseMember";
503
504 if (!parseString())
505 return false;
506 char token = nextToken();
507 if (token != NameSeparator) {
508 lastError = QJsonParseError::MissingNameSeparator;
509 return false;
510 }
511 if (!eatSpace()) {
512 lastError = QJsonParseError::UnterminatedObject;
513 return false;
514 }
515 if (!parseValue())
516 return false;
517
518 QT_PARSER_TRACING_END;
519 return true;
520}
521
522/*
523 array = begin-array [ value *( value-separator value ) ] end-array
524*/
525bool Parser::parseArray()
526{
527 QT_PARSER_TRACING_BEGIN << "parseArray";
528
529 if (++nestingLevel > nestingLimit) {
530 lastError = QJsonParseError::DeepNesting;
531 return false;
532 }
533
534 if (!eatSpace()) {
535 lastError = QJsonParseError::UnterminatedArray;
536 return false;
537 }
538 if (*json == EndArray) {
539 nextToken();
540 } else {
541 while (1) {
542 if (!eatSpace()) {
543 lastError = QJsonParseError::UnterminatedArray;
544 return false;
545 }
546 if (!container)
547 container = new QCborContainerPrivate;
548 if (!parseValue())
549 return false;
550 char token = nextToken();
551 if (token == EndArray)
552 break;
553 else if (token != ValueSeparator) {
554 if (!eatSpace())
555 lastError = QJsonParseError::UnterminatedArray;
556 else
557 lastError = QJsonParseError::MissingValueSeparator;
558 return false;
559 }
560 }
561 }
562
563 QT_PARSER_TRACING_DEBUG << "size =" << (container ? container->elements.size() : 0);
564 QT_PARSER_TRACING_END;
565
566 --nestingLevel;
567
568 return true;
569}
570
571/*
572value = false / null / true / object / array / number / string
573
574*/
575
576bool Parser::parseValue()
577{
578 QT_PARSER_TRACING_BEGIN << "parse Value" << json;
579
580 switch (*json++) {
581 case 'n':
582 if (end - json < 4) {
583 lastError = QJsonParseError::IllegalValue;
584 return false;
585 }
586 if (*json++ == 'u' &&
587 *json++ == 'l' &&
588 *json++ == 'l') {
589 container->append(v: QCborValue(QCborValue::Null));
590 QT_PARSER_TRACING_DEBUG << "value: null";
591 QT_PARSER_TRACING_END;
592 return true;
593 }
594 lastError = QJsonParseError::IllegalValue;
595 return false;
596 case 't':
597 if (end - json < 4) {
598 lastError = QJsonParseError::IllegalValue;
599 return false;
600 }
601 if (*json++ == 'r' &&
602 *json++ == 'u' &&
603 *json++ == 'e') {
604 container->append(v: QCborValue(true));
605 QT_PARSER_TRACING_DEBUG << "value: true";
606 QT_PARSER_TRACING_END;
607 return true;
608 }
609 lastError = QJsonParseError::IllegalValue;
610 return false;
611 case 'f':
612 if (end - json < 5) {
613 lastError = QJsonParseError::IllegalValue;
614 return false;
615 }
616 if (*json++ == 'a' &&
617 *json++ == 'l' &&
618 *json++ == 's' &&
619 *json++ == 'e') {
620 container->append(v: QCborValue(false));
621 QT_PARSER_TRACING_DEBUG << "value: false";
622 QT_PARSER_TRACING_END;
623 return true;
624 }
625 lastError = QJsonParseError::IllegalValue;
626 return false;
627 case Quote: {
628 if (!parseString())
629 return false;
630 QT_PARSER_TRACING_DEBUG << "value: string";
631 QT_PARSER_TRACING_END;
632 return true;
633 }
634 case BeginArray: {
635 StashedContainer stashedContainer(&container, QCborValue::Array);
636 if (!parseArray())
637 return false;
638 QT_PARSER_TRACING_DEBUG << "value: array";
639 QT_PARSER_TRACING_END;
640 return true;
641 }
642 case BeginObject: {
643 StashedContainer stashedContainer(&container, QCborValue::Map);
644 if (!parseObject())
645 return false;
646 QT_PARSER_TRACING_DEBUG << "value: object";
647 QT_PARSER_TRACING_END;
648 return true;
649 }
650 case ValueSeparator:
651 // Essentially missing value, but after a colon, not after a comma
652 // like the other MissingObject errors.
653 lastError = QJsonParseError::IllegalValue;
654 return false;
655 case EndObject:
656 case EndArray:
657 lastError = QJsonParseError::MissingObject;
658 return false;
659 default:
660 --json;
661 if (!parseNumber())
662 return false;
663 QT_PARSER_TRACING_DEBUG << "value: number";
664 QT_PARSER_TRACING_END;
665 }
666
667 return true;
668}
669
670
671
672
673
674/*
675 number = [ minus ] int [ frac ] [ exp ]
676 decimal-point = %x2E ; .
677 digit1-9 = %x31-39 ; 1-9
678 e = %x65 / %x45 ; e E
679 exp = e [ minus / plus ] 1*DIGIT
680 frac = decimal-point 1*DIGIT
681 int = zero / ( digit1-9 *DIGIT )
682 minus = %x2D ; -
683 plus = %x2B ; +
684 zero = %x30 ; 0
685
686*/
687
688bool Parser::parseNumber()
689{
690 QT_PARSER_TRACING_BEGIN << "parseNumber" << json;
691
692 const char *start = json;
693 bool isInt = true;
694
695 // minus
696 if (json < end && *json == '-')
697 ++json;
698
699 // int = zero / ( digit1-9 *DIGIT )
700 if (json < end && *json == '0') {
701 ++json;
702 } else {
703 while (json < end && isAsciiDigit(c: *json))
704 ++json;
705 }
706
707 // frac = decimal-point 1*DIGIT
708 if (json < end && *json == '.') {
709 ++json;
710 while (json < end && isAsciiDigit(c: *json)) {
711 isInt = isInt && *json == '0';
712 ++json;
713 }
714 }
715
716 // exp = e [ minus / plus ] 1*DIGIT
717 if (json < end && (*json == 'e' || *json == 'E')) {
718 isInt = false;
719 ++json;
720 if (json < end && (*json == '-' || *json == '+'))
721 ++json;
722 while (json < end && isAsciiDigit(c: *json))
723 ++json;
724 }
725
726 if (json >= end) {
727 lastError = QJsonParseError::TerminationByNumber;
728 return false;
729 }
730
731 const QByteArray number = QByteArray::fromRawData(data: start, size: json - start);
732 QT_PARSER_TRACING_DEBUG << "numberstring" << number;
733
734 if (isInt) {
735 bool ok;
736 qlonglong n = number.toLongLong(ok: &ok);
737 if (ok) {
738 container->append(v: QCborValue(n));
739 QT_PARSER_TRACING_END;
740 return true;
741 }
742 }
743
744 bool ok;
745 double d = number.toDouble(ok: &ok);
746
747 if (!ok) {
748 lastError = QJsonParseError::IllegalNumber;
749 return false;
750 }
751
752 qint64 n;
753 if (convertDoubleTo(v: d, value: &n))
754 container->append(v: QCborValue(n));
755 else
756 container->append(v: QCborValue(d));
757
758 QT_PARSER_TRACING_END;
759 return true;
760}
761
762/*
763
764 string = quotation-mark *char quotation-mark
765
766 char = unescaped /
767 escape (
768 %x22 / ; " quotation mark U+0022
769 %x5C / ; \ reverse solidus U+005C
770 %x2F / ; / solidus U+002F
771 %x62 / ; b backspace U+0008
772 %x66 / ; f form feed U+000C
773 %x6E / ; n line feed U+000A
774 %x72 / ; r carriage return U+000D
775 %x74 / ; t tab U+0009
776 %x75 4HEXDIG ) ; uXXXX U+XXXX
777
778 escape = %x5C ; \
779
780 quotation-mark = %x22 ; "
781
782 unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
783 */
784static inline bool addHexDigit(char digit, char32_t *result)
785{
786 *result <<= 4;
787 const int h = fromHex(c: digit);
788 if (h != -1) {
789 *result |= h;
790 return true;
791 }
792
793 return false;
794}
795
796static inline bool scanEscapeSequence(const char *&json, const char *end, char32_t *ch)
797{
798 ++json;
799 if (json >= end)
800 return false;
801
802 QT_PARSER_TRACING_DEBUG << "scan escape" << (char)*json;
803 uchar escaped = *json++;
804 switch (escaped) {
805 case '"':
806 *ch = '"'; break;
807 case '\\':
808 *ch = '\\'; break;
809 case '/':
810 *ch = '/'; break;
811 case 'b':
812 *ch = 0x8; break;
813 case 'f':
814 *ch = 0xc; break;
815 case 'n':
816 *ch = 0xa; break;
817 case 'r':
818 *ch = 0xd; break;
819 case 't':
820 *ch = 0x9; break;
821 case 'u': {
822 *ch = 0;
823 if (json > end - 4)
824 return false;
825 for (int i = 0; i < 4; ++i) {
826 if (!addHexDigit(digit: *json, result: ch))
827 return false;
828 ++json;
829 }
830 return true;
831 }
832 default:
833 // this is not as strict as one could be, but allows for more Json files
834 // to be parsed correctly.
835 *ch = escaped;
836 return true;
837 }
838 return true;
839}
840
841static inline bool scanUtf8Char(const char *&json, const char *end, char32_t *result)
842{
843 const auto *usrc = reinterpret_cast<const uchar *>(json);
844 const auto *uend = reinterpret_cast<const uchar *>(end);
845 const uchar b = *usrc++;
846 qsizetype res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst&: result, src&: usrc, end: uend);
847 if (res < 0)
848 return false;
849
850 json = reinterpret_cast<const char *>(usrc);
851 return true;
852}
853
854bool Parser::parseString()
855{
856 const char *start = json;
857
858 // try to parse a utf-8 string without escape sequences, and note whether it's 7bit ASCII.
859
860 QT_PARSER_TRACING_BEGIN << "parse string" << json;
861 bool isUtf8 = true;
862 bool isAscii = true;
863 while (json < end) {
864 char32_t ch = 0;
865 if (*json == '"')
866 break;
867 if (*json == '\\') {
868 isAscii = false;
869 // If we find escape sequences, we store UTF-16 as there are some
870 // escape sequences which are hard to represent in UTF-8.
871 // (plain "\\ud800" for example)
872 isUtf8 = false;
873 break;
874 }
875 if (!scanUtf8Char(json, end, result: &ch)) {
876 lastError = QJsonParseError::IllegalUTF8String;
877 return false;
878 }
879 if (ch > 0x7f)
880 isAscii = false;
881 QT_PARSER_TRACING_DEBUG << " " << ch << char(ch);
882 }
883 ++json;
884 QT_PARSER_TRACING_DEBUG << "end of string";
885 if (json >= end) {
886 lastError = QJsonParseError::UnterminatedString;
887 return false;
888 }
889
890 // no escape sequences, we are done
891 if (isUtf8) {
892 if (isAscii)
893 container->appendAsciiString(str: start, len: json - start - 1);
894 else
895 container->appendUtf8String(str: start, len: json - start - 1);
896 QT_PARSER_TRACING_END;
897 return true;
898 }
899
900 QT_PARSER_TRACING_DEBUG << "has escape sequences";
901
902 json = start;
903
904 QString ucs4;
905 while (json < end) {
906 char32_t ch = 0;
907 if (*json == '"')
908 break;
909 else if (*json == '\\') {
910 if (!scanEscapeSequence(json, end, ch: &ch)) {
911 lastError = QJsonParseError::IllegalEscapeSequence;
912 return false;
913 }
914 } else {
915 if (!scanUtf8Char(json, end, result: &ch)) {
916 lastError = QJsonParseError::IllegalUTF8String;
917 return false;
918 }
919 }
920 ucs4.append(v: QChar::fromUcs4(c: ch));
921 }
922 ++json;
923
924 if (json >= end) {
925 lastError = QJsonParseError::UnterminatedString;
926 return false;
927 }
928
929 container->appendByteData(data: reinterpret_cast<const char *>(ucs4.constData()), len: ucs4.size() * 2,
930 type: QCborValue::String, extraFlags: QtCbor::Element::StringIsUtf16);
931 QT_PARSER_TRACING_END;
932 return true;
933}
934
935QT_END_NAMESPACE
936
937#undef QT_PARSER_TRACING_BEGIN
938#undef QT_PARSER_TRACING_END
939#undef QT_PARSER_TRACING_DEBUG
940

Provided by KDAB

Privacy Policy
Learn Advanced QML with KDAB
Find out more

source code of qtbase/src/corelib/serialization/qjsonparser.cpp