1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qqmljslexer_p.h"
5#include "qqmljsengine_p.h"
6#include "qqmljskeywords_p.h"
7
8#include <private/qqmljsdiagnosticmessage_p.h>
9#include <private/qqmljsmemorypool_p.h>
10#include <private/qlocale_tools_p.h>
11
12
13#include <QtCore/qcoreapplication.h>
14#include <QtCore/qvarlengtharray.h>
15#include <QtCore/qdebug.h>
16#include <QtCore/QScopedValueRollback>
17
18#include <optional>
19
20QT_BEGIN_NAMESPACE
21using namespace QQmlJS;
22using namespace Qt::StringLiterals;
23
24static inline int regExpFlagFromChar(const QChar &ch)
25{
26 switch (ch.unicode()) {
27 case 'g': return Lexer::RegExp_Global;
28 case 'i': return Lexer::RegExp_IgnoreCase;
29 case 'm': return Lexer::RegExp_Multiline;
30 case 'u': return Lexer::RegExp_Unicode;
31 case 'y': return Lexer::RegExp_Sticky;
32 }
33 return 0;
34}
35
36static inline unsigned char convertHex(ushort c)
37{
38 if (c >= '0' && c <= '9')
39 return (c - '0');
40 else if (c >= 'a' && c <= 'f')
41 return (c - 'a' + 10);
42 else
43 return (c - 'A' + 10);
44}
45
46static inline QChar convertHex(QChar c1, QChar c2)
47{
48 return QChar((convertHex(c: c1.unicode()) << 4) + convertHex(c: c2.unicode()));
49}
50
51Lexer::Lexer(Engine *engine, LexMode lexMode)
52 : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true)
53{
54 if (engine)
55 engine->setLexer(this);
56}
57
58bool Lexer::qmlMode() const
59{
60 return _qmlMode;
61}
62
63QString Lexer::code() const
64{
65 return _code;
66}
67
68std::optional<DiagnosticMessage> Lexer::illegalFileLengthError() const
69{
70 Q_ASSERT(_currentOffset >= 0);
71
72 constexpr bool quint32IsBigger = sizeof(qsizetype) <= sizeof(quint32);
73 using BiggerInt = std::conditional_t<quint32IsBigger, quint32, qsizetype>;
74 using SmallerInt = std::conditional_t<!quint32IsBigger, quint32, qsizetype>;
75
76 const BiggerInt codeLength = BiggerInt(_currentOffset) + BiggerInt(_code.size());
77 const BiggerInt maxLength = BiggerInt(std::numeric_limits<SmallerInt>::max());
78 if (codeLength < maxLength)
79 return {};
80
81 constexpr int limit = quint32IsBigger ? 2 : 4;
82 return DiagnosticMessage{ .message: u"File exceeds maximum length (%1GB)."_s.arg(a: limit), .type: QtCriticalMsg,
83 .loc: SourceLocation{ 0, 1, 1, 1 } };
84}
85
86void Lexer::setCode(const QString &code, int lineno, bool qmlMode,
87 Lexer::CodeContinuation codeContinuation)
88{
89 if (codeContinuation == Lexer::CodeContinuation::Continue)
90 _currentOffset += _code.size();
91 else
92 _currentOffset = 0;
93 if (_engine)
94 _engine->setCode(code);
95
96 _qmlMode = qmlMode;
97 _code = code;
98 _skipLinefeed = false;
99
100 _tokenText.clear();
101 _tokenText.reserve(asize: 1024);
102 _errorMessage.clear();
103 _tokenSpell = QStringView();
104 _rawString = QStringView();
105
106 _codePtr = code.unicode();
107 _endPtr = _codePtr + code.size();
108 _tokenStartPtr = _codePtr;
109
110 if (lineno >= 0)
111 _currentLineNumber = lineno;
112 _currentColumnNumber = 0;
113 _tokenLine = _currentLineNumber;
114 _tokenColumn = 0;
115 _tokenLength = 0;
116
117 if (codeContinuation == Lexer::CodeContinuation::Reset)
118 _state = State {};
119}
120
121void Lexer::scanChar()
122{
123 if (_skipLinefeed) {
124 Q_ASSERT(*_codePtr == u'\n');
125 ++_codePtr;
126 _skipLinefeed = false;
127 }
128 _state.currentChar = *_codePtr++;
129 ++_currentColumnNumber;
130
131 if (isLineTerminator()) {
132 if (_state.currentChar == u'\r') {
133 if (_codePtr < _endPtr && *_codePtr == u'\n')
134 _skipLinefeed = true;
135 _state.currentChar = u'\n';
136 }
137 ++_currentLineNumber;
138 _currentColumnNumber = 0;
139 }
140}
141
142QChar Lexer::peekChar()
143{
144 auto peekPtr = _codePtr;
145 if (peekPtr < _endPtr)
146 return *peekPtr;
147 return QChar();
148}
149
150namespace {
151inline bool isBinop(int tok)
152{
153 switch (tok) {
154 case Lexer::T_AND:
155 case Lexer::T_AND_AND:
156 case Lexer::T_AND_EQ:
157 case Lexer::T_DIVIDE_:
158 case Lexer::T_DIVIDE_EQ:
159 case Lexer::T_EQ:
160 case Lexer::T_EQ_EQ:
161 case Lexer::T_EQ_EQ_EQ:
162 case Lexer::T_GE:
163 case Lexer::T_GT:
164 case Lexer::T_GT_GT:
165 case Lexer::T_GT_GT_EQ:
166 case Lexer::T_GT_GT_GT:
167 case Lexer::T_GT_GT_GT_EQ:
168 case Lexer::T_LE:
169 case Lexer::T_LT:
170 case Lexer::T_LT_LT:
171 case Lexer::T_LT_LT_EQ:
172 case Lexer::T_MINUS:
173 case Lexer::T_MINUS_EQ:
174 case Lexer::T_NOT_EQ:
175 case Lexer::T_NOT_EQ_EQ:
176 case Lexer::T_OR:
177 case Lexer::T_OR_EQ:
178 case Lexer::T_OR_OR:
179 case Lexer::T_PLUS:
180 case Lexer::T_PLUS_EQ:
181 case Lexer::T_REMAINDER:
182 case Lexer::T_REMAINDER_EQ:
183 case Lexer::T_RETURN:
184 case Lexer::T_STAR:
185 case Lexer::T_STAR_EQ:
186 case Lexer::T_XOR:
187 case Lexer::T_XOR_EQ:
188 return true;
189
190 default:
191 return false;
192 }
193}
194
195int hexDigit(QChar c)
196{
197 if (c >= u'0' && c <= u'9')
198 return c.unicode() - u'0';
199 if (c >= u'a' && c <= u'f')
200 return c.unicode() - u'a' + 10;
201 if (c >= u'A' && c <= u'F')
202 return c.unicode() - u'A' + 10;
203 return -1;
204}
205
206int octalDigit(QChar c)
207{
208 if (c >= u'0' && c <= u'7')
209 return c.unicode() - u'0';
210 return -1;
211}
212
213} // anonymous namespace
214
215int Lexer::lex()
216{
217 const int previousTokenKind = _state.tokenKind;
218 int tokenKind;
219 bool firstPass = true;
220
221 again:
222 tokenKind = T_ERROR;
223 _tokenSpell = QStringView();
224 _rawString = QStringView();
225 if (firstPass && _state.stackToken == -1) {
226 firstPass = false;
227 if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
228 return T_EOL;
229
230 if (_state.comments == CommentState::InMultilineComment) {
231 scanChar();
232 _tokenStartPtr = _codePtr - 1;
233 _tokenLine = _currentLineNumber;
234 _tokenColumn = _currentColumnNumber;
235 while (_codePtr <= _endPtr) {
236 if (_state.currentChar == u'*') {
237 scanChar();
238 if (_state.currentChar == u'/') {
239 scanChar();
240 if (_engine) {
241 _engine->addComment(pos: tokenOffset() + 2,
242 len: _codePtr - _tokenStartPtr - 1 - 4,
243 line: tokenStartLine(), col: tokenStartColumn() + 2);
244 }
245 tokenKind = T_COMMENT;
246 break;
247 }
248 } else {
249 scanChar();
250 }
251 }
252 if (tokenKind == T_ERROR)
253 tokenKind = T_PARTIAL_COMMENT;
254 } else {
255 // handle multiline continuation
256 std::optional<ScanStringMode> scanMode;
257 switch (previousTokenKind) {
258 case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
259 scanMode = ScanStringMode::SingleQuote;
260 break;
261 case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
262 scanMode = ScanStringMode::DoubleQuote;
263 break;
264 case T_PARTIAL_TEMPLATE_HEAD:
265 scanMode = ScanStringMode::TemplateHead;
266 break;
267 case T_PARTIAL_TEMPLATE_MIDDLE:
268 scanMode = ScanStringMode::TemplateContinuation;
269 break;
270 default:
271 break;
272 }
273 if (scanMode) {
274 scanChar();
275 _tokenStartPtr = _codePtr - 1;
276 _tokenLine = _currentLineNumber;
277 _tokenColumn = _currentColumnNumber;
278 tokenKind = scanString(mode: *scanMode);
279 }
280 }
281 }
282 if (tokenKind == T_ERROR)
283 tokenKind = scanToken();
284 _tokenLength = _codePtr - _tokenStartPtr - 1;
285 switch (tokenKind) {
286 // end of line and comments should not "overwrite" the old token type...
287 case T_EOL:
288 return tokenKind;
289 case T_COMMENT:
290 _state.comments = CommentState::HadComment;
291 return tokenKind;
292 case T_PARTIAL_COMMENT:
293 _state.comments = CommentState::InMultilineComment;
294 return tokenKind;
295 default:
296 _state.comments = CommentState::NoComment;
297 break;
298 }
299 _state.tokenKind = tokenKind;
300
301 _state.delimited = false;
302 _state.restrictedKeyword = false;
303 _state.followsClosingBrace = (previousTokenKind == T_RBRACE);
304
305 // update the flags
306 switch (_state.tokenKind) {
307 case T_LBRACE:
308 if (_state.bracesCount > 0)
309 ++_state.bracesCount;
310 Q_FALLTHROUGH();
311 case T_SEMICOLON:
312 _state.importState = ImportState::NoQmlImport;
313 Q_FALLTHROUGH();
314 case T_QUESTION:
315 case T_COLON:
316 case T_TILDE:
317 _state.delimited = true;
318 break;
319 case T_AUTOMATIC_SEMICOLON:
320 case T_AS:
321 _state.importState = ImportState::NoQmlImport;
322 Q_FALLTHROUGH();
323 default:
324 if (isBinop(tok: _state.tokenKind))
325 _state.delimited = true;
326 break;
327
328 case T_IMPORT:
329 if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT))
330 _state.importState = ImportState::SawImport;
331 if (isBinop(tok: _state.tokenKind))
332 _state.delimited = true;
333 break;
334
335 case T_IF:
336 case T_FOR:
337 case T_WHILE:
338 case T_WITH:
339 _state.parenthesesState = CountParentheses;
340 _state.parenthesesCount = 0;
341 break;
342
343 case T_ELSE:
344 case T_DO:
345 _state.parenthesesState = BalancedParentheses;
346 break;
347
348 case T_CONTINUE:
349 case T_BREAK:
350 case T_RETURN:
351 case T_YIELD:
352 case T_THROW:
353 _state.restrictedKeyword = true;
354 break;
355 case T_RBRACE:
356 if (_state.bracesCount > 0)
357 --_state.bracesCount;
358 if (_state.bracesCount == 0)
359 goto again;
360 } // switch
361
362 // update the parentheses state
363 switch (_state.parenthesesState) {
364 case IgnoreParentheses:
365 break;
366
367 case CountParentheses:
368 if (_state.tokenKind == T_RPAREN) {
369 --_state.parenthesesCount;
370 if (_state.parenthesesCount == 0)
371 _state.parenthesesState = BalancedParentheses;
372 } else if (_state.tokenKind == T_LPAREN) {
373 ++_state.parenthesesCount;
374 }
375 break;
376
377 case BalancedParentheses:
378 if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
379 _state.parenthesesState = IgnoreParentheses;
380 break;
381 } // switch
382
383 return _state.tokenKind;
384}
385
386uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
387{
388 Q_ASSERT(_state.currentChar == u'u');
389 scanChar(); // skip u
390 constexpr int distanceFromFirstHexToLastHex = 3;
391 if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(c: _state.currentChar)) {
392 uint codePoint = 0;
393 for (int i = 0; i < 4; ++i) {
394 int digit = hexDigit(c: _state.currentChar);
395 if (digit < 0)
396 goto error;
397 codePoint *= 16;
398 codePoint += digit;
399 scanChar();
400 }
401
402 *ok = true;
403 return codePoint;
404 } else if (_codePtr < _endPtr && _state.currentChar == u'{') {
405 scanChar(); // skip '{'
406 uint codePoint = 0;
407 if (!isHexDigit(c: _state.currentChar))
408 // need at least one hex digit
409 goto error;
410
411 while (_codePtr <= _endPtr) {
412 int digit = hexDigit(c: _state.currentChar);
413 if (digit < 0)
414 break;
415 codePoint *= 16;
416 codePoint += digit;
417 if (codePoint > 0x10ffff)
418 goto error;
419 scanChar();
420 }
421
422 if (_state.currentChar != u'}')
423 goto error;
424
425 scanChar(); // skip '}'
426
427
428 *ok = true;
429 return codePoint;
430 }
431
432error:
433 _state.errorCode = IllegalUnicodeEscapeSequence;
434 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Illegal unicode escape sequence");
435
436 *ok = false;
437 return 0;
438}
439
440QChar Lexer::decodeHexEscapeCharacter(bool *ok)
441{
442 if (isHexDigit(c: _codePtr[0]) && isHexDigit(c: _codePtr[1])) {
443 scanChar();
444
445 const QChar c1 = _state.currentChar;
446 scanChar();
447
448 const QChar c2 = _state.currentChar;
449 scanChar();
450
451 if (ok)
452 *ok = true;
453
454 return convertHex(c1, c2);
455 }
456
457 *ok = false;
458 return QChar();
459}
460
461namespace QQmlJS {
462QDebug operator<<(QDebug dbg, const Lexer &l)
463{
464 dbg << "{\n"
465 << " engine:" << qsizetype(l._engine) << ",\n"
466 << " lexMode:" << int(l._lexMode) << ",\n"
467 << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n"
468 << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n"
469 << " qmlMode:" << l._qmlMode << ",\n"
470 << " staticIsKeyword:" << l._staticIsKeyword << ",\n"
471 << " currentLineNumber:" << l._currentLineNumber << ",\n"
472 << " currentColumnNumber:" << l._currentColumnNumber << ",\n"
473 << " currentOffset:" << l._currentOffset << ",\n"
474 << " tokenLength:" << l._tokenLength << ",\n"
475 << " tokenLine:" << l._tokenLine << ",\n"
476 << " tokenColumn:" << l._tokenColumn << ",\n"
477 << " tokenText:" << l._tokenText << ",\n"
478 << " skipLinefeed:" << l._skipLinefeed << ",\n"
479 << " errorMessage:" << l._errorMessage << ",\n"
480 << " tokenSpell:" << l._tokenSpell << ",\n"
481 << " rawString:" << l._rawString << ",\n";
482 if (l._codePtr)
483 dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n";
484 else
485 dbg << " codePtr: *null*,\n";
486 if (l._tokenStartPtr)
487 dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n";
488 else
489 dbg << " tokenStartPtr: *null*,\n";
490 dbg << " state:" << l._state << "\n}";
491 return dbg;
492}
493}
494
495static inline bool isIdentifierStart(uint ch)
496{
497 // fast path for ascii
498 if ((ch >= u'a' && ch <= u'z') ||
499 (ch >= u'A' && ch <= u'Z') ||
500 ch == u'$' || ch == u'_')
501 return true;
502
503 switch (QChar::category(ucs4: ch)) {
504 case QChar::Number_Letter:
505 case QChar::Letter_Uppercase:
506 case QChar::Letter_Lowercase:
507 case QChar::Letter_Titlecase:
508 case QChar::Letter_Modifier:
509 case QChar::Letter_Other:
510 return true;
511 default:
512 break;
513 }
514 return false;
515}
516
517static bool isIdentifierPart(uint ch)
518{
519 // fast path for ascii
520 if ((ch >= u'a' && ch <= u'z') ||
521 (ch >= u'A' && ch <= u'Z') ||
522 (ch >= u'0' && ch <= u'9') ||
523 ch == u'$' || ch == u'_' ||
524 ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */)
525 return true;
526
527 switch (QChar::category(ucs4: ch)) {
528 case QChar::Mark_NonSpacing:
529 case QChar::Mark_SpacingCombining:
530
531 case QChar::Number_DecimalDigit:
532 case QChar::Number_Letter:
533
534 case QChar::Letter_Uppercase:
535 case QChar::Letter_Lowercase:
536 case QChar::Letter_Titlecase:
537 case QChar::Letter_Modifier:
538 case QChar::Letter_Other:
539
540 case QChar::Punctuation_Connector:
541 return true;
542 default:
543 break;
544 }
545 return false;
546}
547
548int Lexer::scanToken()
549{
550 if (_state.stackToken != -1) {
551 int tk = _state.stackToken;
552 _state.stackToken = -1;
553 return tk;
554 }
555
556 if (_state.bracesCount == 0) {
557 // we're inside a Template string
558 return scanString(mode: TemplateContinuation);
559 }
560
561 if (_state.comments == CommentState::NoComment)
562 _state.terminator = false;
563
564again:
565 _state.validTokenText = false;
566
567 while (_state.currentChar.isSpace()) {
568 if (isLineTerminator()) {
569 bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr;
570 if (_state.restrictedKeyword) {
571 // automatic semicolon insertion
572 _tokenLine = _currentLineNumber;
573 _tokenColumn = _currentColumnNumber;
574 _tokenStartPtr = _codePtr - 1;
575 return T_SEMICOLON;
576 } else if (_lexMode == LexMode::WholeCode || !isAtEnd) {
577 _state.terminator = true;
578 syncProhibitAutomaticSemicolon();
579 } // else we will do the previous things at the start of next line...
580 }
581
582 scanChar();
583 }
584
585 _tokenStartPtr = _codePtr - 1;
586 _tokenLine = _currentLineNumber;
587 _tokenColumn = _currentColumnNumber;
588
589 if (_codePtr >= _endPtr) {
590 if (_lexMode == LexMode::LineByLine) {
591 if (!_code.isEmpty()) {
592 _state.currentChar = *(_codePtr - 2);
593 return T_EOL;
594 } else {
595 return EOF_SYMBOL;
596 }
597 } else if (_codePtr > _endPtr) {
598 return EOF_SYMBOL;
599 }
600 }
601
602 const QChar ch = _state.currentChar;
603 scanChar();
604
605 switch (ch.unicode()) {
606 case u'~': return T_TILDE;
607 case u'}': return T_RBRACE;
608
609 case u'|':
610 if (_state.currentChar == u'|') {
611 scanChar();
612 return T_OR_OR;
613 } else if (_state.currentChar == u'=') {
614 scanChar();
615 return T_OR_EQ;
616 }
617 return T_OR;
618
619 case u'{': return T_LBRACE;
620
621 case u'^':
622 if (_state.currentChar == u'=') {
623 scanChar();
624 return T_XOR_EQ;
625 }
626 return T_XOR;
627
628 case u']': return T_RBRACKET;
629 case u'[': return T_LBRACKET;
630 case u'?': {
631 if (_state.currentChar == u'?') {
632 scanChar();
633 return T_QUESTION_QUESTION;
634 }
635 if (_state.currentChar == u'.' && !peekChar().isDigit()) {
636 scanChar();
637 return T_QUESTION_DOT;
638 }
639
640 return T_QUESTION;
641 }
642
643 case u'>':
644 if (_state.currentChar == u'>') {
645 scanChar();
646 if (_state.currentChar == u'>') {
647 scanChar();
648 if (_state.currentChar == u'=') {
649 scanChar();
650 return T_GT_GT_GT_EQ;
651 }
652 return T_GT_GT_GT;
653 } else if (_state.currentChar == u'=') {
654 scanChar();
655 return T_GT_GT_EQ;
656 }
657 return T_GT_GT;
658 } else if (_state.currentChar == u'=') {
659 scanChar();
660 return T_GE;
661 }
662 return T_GT;
663
664 case u'=':
665 if (_state.currentChar == u'=') {
666 scanChar();
667 if (_state.currentChar == u'=') {
668 scanChar();
669 return T_EQ_EQ_EQ;
670 }
671 return T_EQ_EQ;
672 } else if (_state.currentChar == u'>') {
673 scanChar();
674 return T_ARROW;
675 }
676 return T_EQ;
677
678 case u'<':
679 if (_state.currentChar == u'=') {
680 scanChar();
681 return T_LE;
682 } else if (_state.currentChar == u'<') {
683 scanChar();
684 if (_state.currentChar == u'=') {
685 scanChar();
686 return T_LT_LT_EQ;
687 }
688 return T_LT_LT;
689 }
690 return T_LT;
691
692 case u';': return T_SEMICOLON;
693 case u':': return T_COLON;
694
695 case u'/':
696 switch (_state.currentChar.unicode()) {
697 case u'*':
698 scanChar();
699 while (_codePtr <= _endPtr) {
700 if (_state.currentChar == u'*') {
701 scanChar();
702 if (_state.currentChar == u'/') {
703 scanChar();
704 if (_engine) {
705 _engine->addComment(pos: tokenOffset() + 2,
706 len: _codePtr - _tokenStartPtr - 1 - 4, line: tokenStartLine(),
707 col: tokenStartColumn() + 2);
708 }
709 if (_lexMode == LexMode::LineByLine)
710 return T_COMMENT;
711 else
712 goto again;
713 }
714 } else {
715 scanChar();
716 }
717 }
718 if (_lexMode == LexMode::LineByLine)
719 return T_PARTIAL_COMMENT;
720 else
721 goto again;
722 case u'/':
723 while (_codePtr <= _endPtr && !isLineTerminator()) {
724 scanChar();
725 }
726 if (_engine) {
727 _engine->addComment(pos: tokenOffset() + 2, len: _codePtr - _tokenStartPtr - 1 - 2,
728 line: tokenStartLine(), col: tokenStartColumn() + 2);
729 }
730 if (_lexMode == LexMode::LineByLine)
731 return T_COMMENT;
732 else
733 goto again;
734 case u'=':
735 scanChar();
736 return T_DIVIDE_EQ;
737 default:
738 return T_DIVIDE_;
739 }
740 case u'.':
741 if (_state.importState == ImportState::SawImport)
742 return T_DOT;
743 if (isDecimalDigit(c: _state.currentChar.unicode()))
744 return scanNumber(ch);
745 if (_state.currentChar == u'.') {
746 scanChar();
747 if (_state.currentChar == u'.') {
748 scanChar();
749 return T_ELLIPSIS;
750 } else {
751 _state.errorCode = IllegalCharacter;
752 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unexpected token '.'");
753 return T_ERROR;
754 }
755 }
756 return T_DOT;
757
758 case u'-':
759 if (_state.currentChar == u'=') {
760 scanChar();
761 return T_MINUS_EQ;
762 } else if (_state.currentChar == u'-') {
763 scanChar();
764
765 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
766 && _state.tokenKind != T_LPAREN) {
767 _state.stackToken = T_MINUS_MINUS;
768 return T_SEMICOLON;
769 }
770
771 return T_MINUS_MINUS;
772 }
773 return T_MINUS;
774
775 case u',': return T_COMMA;
776
777 case u'+':
778 if (_state.currentChar == u'=') {
779 scanChar();
780 return T_PLUS_EQ;
781 } else if (_state.currentChar == u'+') {
782 scanChar();
783
784 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
785 && _state.tokenKind != T_LPAREN) {
786 _state.stackToken = T_PLUS_PLUS;
787 return T_SEMICOLON;
788 }
789
790 return T_PLUS_PLUS;
791 }
792 return T_PLUS;
793
794 case u'*':
795 if (_state.currentChar == u'=') {
796 scanChar();
797 return T_STAR_EQ;
798 } else if (_state.currentChar == u'*') {
799 scanChar();
800 if (_state.currentChar == u'=') {
801 scanChar();
802 return T_STAR_STAR_EQ;
803 }
804 return T_STAR_STAR;
805 }
806 return T_STAR;
807
808 case u')': return T_RPAREN;
809 case u'(': return T_LPAREN;
810
811 case u'@': return T_AT;
812
813 case u'&':
814 if (_state.currentChar == u'=') {
815 scanChar();
816 return T_AND_EQ;
817 } else if (_state.currentChar == u'&') {
818 scanChar();
819 return T_AND_AND;
820 }
821 return T_AND;
822
823 case u'%':
824 if (_state.currentChar == u'=') {
825 scanChar();
826 return T_REMAINDER_EQ;
827 }
828 return T_REMAINDER;
829
830 case u'!':
831 if (_state.currentChar == u'=') {
832 scanChar();
833 if (_state.currentChar == u'=') {
834 scanChar();
835 return T_NOT_EQ_EQ;
836 }
837 return T_NOT_EQ;
838 }
839 return T_NOT;
840
841 case u'`':
842 _state.outerTemplateBraceCount.push(t: _state.bracesCount);
843 Q_FALLTHROUGH();
844 case u'\'':
845 case u'"':
846 return scanString(mode: ScanStringMode(ch.unicode()));
847 case u'0':
848 case u'1':
849 case u'2':
850 case u'3':
851 case u'4':
852 case u'5':
853 case u'6':
854 case u'7':
855 case u'8':
856 case u'9':
857 if (_state.importState == ImportState::SawImport)
858 return scanVersionNumber(ch);
859 else
860 return scanNumber(ch);
861
862 case '#':
863 if (_currentLineNumber == 1 && _currentColumnNumber == 2) {
864 // shebang support
865 while (_codePtr <= _endPtr && !isLineTerminator()) {
866 scanChar();
867 }
868 if (_engine) {
869 _engine->addComment(pos: tokenOffset(), len: _codePtr - _tokenStartPtr - 1, line: tokenStartLine(),
870 col: tokenStartColumn());
871 }
872 if (_lexMode == LexMode::LineByLine)
873 return T_COMMENT;
874 else
875 goto again;
876 }
877 Q_FALLTHROUGH();
878
879 default: {
880 uint c = ch.unicode();
881 bool identifierWithEscapeChars = false;
882 if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _state.currentChar.unicode())) {
883 c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
884 scanChar();
885 } else if (c == '\\' && _state.currentChar == u'u') {
886 identifierWithEscapeChars = true;
887 bool ok = false;
888 c = decodeUnicodeEscapeCharacter(ok: &ok);
889 if (!ok)
890 return T_ERROR;
891 }
892 if (isIdentifierStart(ch: c)) {
893 if (identifierWithEscapeChars) {
894 _tokenText.resize(size: 0);
895 if (QChar::requiresSurrogates(ucs4: c)) {
896 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
897 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
898 } else {
899 _tokenText += QChar(c);
900 }
901 _state.validTokenText = true;
902 }
903 while (_codePtr <= _endPtr) {
904 c = _state.currentChar.unicode();
905 if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _codePtr->unicode())) {
906 scanChar();
907 c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
908 } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') {
909 if (!identifierWithEscapeChars) {
910 identifierWithEscapeChars = true;
911 _tokenText.resize(size: 0);
912 _tokenText.insert(i: 0, uc: _tokenStartPtr, len: _codePtr - _tokenStartPtr - 1);
913 _state.validTokenText = true;
914 }
915
916 scanChar(); // skip '\\'
917 bool ok = false;
918 c = decodeUnicodeEscapeCharacter(ok: &ok);
919 if (!ok)
920 return T_ERROR;
921
922 if (!isIdentifierPart(ch: c))
923 break;
924
925 if (QChar::requiresSurrogates(ucs4: c)) {
926 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
927 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
928 } else {
929 _tokenText += QChar(c);
930 }
931 continue;
932 }
933
934 if (!isIdentifierPart(ch: c))
935 break;
936
937 if (identifierWithEscapeChars) {
938 if (QChar::requiresSurrogates(ucs4: c)) {
939 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
940 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
941 } else {
942 _tokenText += QChar(c);
943 }
944 }
945 scanChar();
946 }
947
948 const auto token = QStringView(_tokenStartPtr, _codePtr - 1);
949 _tokenLength = token.size();
950 int kind = T_IDENTIFIER;
951
952 if (!identifierWithEscapeChars)
953 kind = classify(s: token, parseModeFlags: parseModeFlags());
954
955 if (_engine) {
956 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
957 _tokenSpell = _engine->newStringRef(text: _tokenText);
958 else
959 _tokenSpell = _engine->midRef(position: _tokenStartPtr - _code.unicode(), size: _tokenLength);
960 }
961
962 return kind;
963 }
964 }
965
966 break;
967 }
968
969 return T_ERROR;
970}
971
972int Lexer::scanString(ScanStringMode mode)
973{
974 const char16_t quote = mode == TemplateContinuation ? TemplateHead : mode;
975 // we actually use T_STRING_LITERAL also for multiline strings, should we want to
976 // change that we should set it to:
977 // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL ||
978 // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL
979 // here and uncomment the multilineStringLiteral = true below.
980 bool multilineStringLiteral = false;
981
982 const QChar *startCode = _codePtr - 1;
983 // in case we just parsed a \r, we need to reset this flag to get things working
984 // correctly in the loop below and afterwards
985 _skipLinefeed = false;
986 bool first = true;
987
988 if (_engine) {
989 while (_codePtr <= _endPtr) {
990 if (isLineTerminator()) {
991 if ((quote == u'`' || qmlMode())) {
992 if (first)
993 --_currentLineNumber; // will be read again in scanChar()
994 break;
995 }
996 _state.errorCode = IllegalCharacter;
997 _errorMessage = QCoreApplication::translate(context: "QQmlParser",
998 key: "Stray newline in string literal");
999 return T_ERROR;
1000 } else if (_state.currentChar == u'\\') {
1001 break;
1002 } else if (_state.currentChar == u'$' && quote == u'`') {
1003 break;
1004 } else if (_state.currentChar == quote) {
1005 _tokenSpell =
1006 _engine->midRef(position: startCode - _code.unicode(), size: _codePtr - startCode - 1);
1007 _rawString = _tokenSpell;
1008 scanChar();
1009
1010 if (quote == u'`')
1011 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1012 if (mode == TemplateHead)
1013 return T_NO_SUBSTITUTION_TEMPLATE;
1014 else if (mode == TemplateContinuation)
1015 return T_TEMPLATE_TAIL;
1016 else if (multilineStringLiteral)
1017 return T_MULTILINE_STRING_LITERAL;
1018 else
1019 return T_STRING_LITERAL;
1020 }
1021 // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result
1022 _state.currentChar = *_codePtr++;
1023 ++_currentColumnNumber;
1024 first = false;
1025 }
1026 }
1027
1028 // rewind by one char, so things gets scanned correctly
1029 --_codePtr;
1030 --_currentColumnNumber;
1031
1032 _state.validTokenText = true;
1033 _tokenText = QString(startCode, _codePtr - startCode);
1034
1035 auto setRawString = [&](const QChar *end) {
1036 QString raw(startCode, end - startCode - 1);
1037 raw.replace(before: QLatin1String("\r\n"), after: QLatin1String("\n"));
1038 raw.replace(before: u'\r', after: u'\n');
1039 _rawString = _engine->newStringRef(text: raw);
1040 };
1041
1042 scanChar();
1043
1044 while (_codePtr <= _endPtr) {
1045 if (_state.currentChar == quote) {
1046 scanChar();
1047
1048 if (_engine) {
1049 _tokenSpell = _engine->newStringRef(text: _tokenText);
1050 if (quote == u'`')
1051 setRawString(_codePtr - 1);
1052 }
1053
1054 if (quote == u'`')
1055 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1056
1057 if (mode == TemplateContinuation)
1058 return T_TEMPLATE_TAIL;
1059 else if (mode == TemplateHead)
1060 return T_NO_SUBSTITUTION_TEMPLATE;
1061
1062 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1063 } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') {
1064 scanChar();
1065 scanChar();
1066 _state.bracesCount = 1;
1067 if (_engine) {
1068 _tokenSpell = _engine->newStringRef(text: _tokenText);
1069 setRawString(_codePtr - 2);
1070 }
1071
1072 return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1073 } else if (_state.currentChar == u'\\') {
1074 scanChar();
1075 if (_codePtr > _endPtr) {
1076 _state.errorCode = IllegalEscapeSequence;
1077 _errorMessage = QCoreApplication::translate(
1078 context: "QQmlParser", key: "End of file reached at escape sequence");
1079 return T_ERROR;
1080 }
1081
1082 QChar u;
1083
1084 switch (_state.currentChar.unicode()) {
1085 // unicode escape sequence
1086 case u'u': {
1087 bool ok = false;
1088 uint codePoint = decodeUnicodeEscapeCharacter(ok: &ok);
1089 if (!ok)
1090 return T_ERROR;
1091 if (QChar::requiresSurrogates(ucs4: codePoint)) {
1092 // need to use a surrogate pair
1093 _tokenText += QChar(QChar::highSurrogate(ucs4: codePoint));
1094 u = QChar::lowSurrogate(ucs4: codePoint);
1095 } else {
1096 u = QChar(codePoint);
1097 }
1098 } break;
1099
1100 // hex escape sequence
1101 case u'x': {
1102 bool ok = false;
1103 u = decodeHexEscapeCharacter(ok: &ok);
1104 if (!ok) {
1105 _state.errorCode = IllegalHexadecimalEscapeSequence;
1106 _errorMessage = QCoreApplication::translate(
1107 context: "QQmlParser", key: "Illegal hexadecimal escape sequence");
1108 return T_ERROR;
1109 }
1110 } break;
1111
1112 // single character escape sequence
1113 case u'\\': u = u'\\'; scanChar(); break;
1114 case u'\'': u = u'\''; scanChar(); break;
1115 case u'\"': u = u'\"'; scanChar(); break;
1116 case u'b': u = u'\b'; scanChar(); break;
1117 case u'f': u = u'\f'; scanChar(); break;
1118 case u'n': u = u'\n'; scanChar(); break;
1119 case u'r': u = u'\r'; scanChar(); break;
1120 case u't': u = u'\t'; scanChar(); break;
1121 case u'v': u = u'\v'; scanChar(); break;
1122
1123 case u'0':
1124 if (!_codePtr->isDigit()) {
1125 scanChar();
1126 u = u'\0';
1127 break;
1128 }
1129 Q_FALLTHROUGH();
1130 case u'1':
1131 case u'2':
1132 case u'3':
1133 case u'4':
1134 case u'5':
1135 case u'6':
1136 case u'7':
1137 case u'8':
1138 case u'9':
1139 _state.errorCode = IllegalEscapeSequence;
1140 _errorMessage = QCoreApplication::translate(
1141 context: "QQmlParser", key: "Octal escape sequences are not allowed");
1142 return T_ERROR;
1143
1144 case u'\r':
1145 case u'\n':
1146 case 0x2028u:
1147 case 0x2029u:
1148 // uncomment the following to use T_MULTILINE_STRING_LITERAL
1149 // multilineStringLiteral = true;
1150 scanChar();
1151 continue;
1152
1153 default:
1154 // non escape character
1155 u = _state.currentChar;
1156 scanChar();
1157 }
1158
1159 _tokenText += u;
1160 } else {
1161 _tokenText += _state.currentChar;
1162 scanChar();
1163 }
1164 }
1165 if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1166 if (mode == TemplateContinuation)
1167 return T_PARTIAL_TEMPLATE_MIDDLE;
1168 else if (mode == TemplateHead)
1169 return T_PARTIAL_TEMPLATE_HEAD;
1170 else if (mode == SingleQuote)
1171 return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1172 return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1173 }
1174 _state.errorCode = UnclosedStringLiteral;
1175 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unclosed string at end of line");
1176 return T_ERROR;
1177}
1178
1179int Lexer::scanNumber(QChar ch)
1180{
1181 auto scanOptionalNumericSeparator = [this](auto isNextCharacterValid){
1182 if (_state.currentChar == u'_') {
1183 if (peekChar() == u'_') {
1184 _state.errorCode = IllegalNumber;
1185 _errorMessage = QCoreApplication::translate(
1186 context: "QQmlParser",
1187 key: "There can be at most one numeric separator between digits"
1188 );
1189 return false;
1190 }
1191
1192 if (!isNextCharacterValid()) {
1193 _state.errorCode = IllegalNumber;
1194 _errorMessage = QCoreApplication::translate(
1195 context: "QQmlParser",
1196 key: "A trailing numeric separator is not allowed in numeric literals"
1197 );
1198 return false;
1199 }
1200
1201 scanChar();
1202 }
1203
1204 return true;
1205 };
1206
1207 if (ch == u'0') {
1208 if (_state.currentChar == u'x' || _state.currentChar == u'X') {
1209 ch = _state.currentChar; // remember the x or X to use it in the error message below.
1210
1211 // parse hex integer literal
1212 scanChar(); // consume 'x'
1213
1214 if (!isHexDigit(c: _state.currentChar)) {
1215 _state.errorCode = IllegalNumber;
1216 _errorMessage = QCoreApplication::translate(
1217 context: "QQmlParser",
1218 key: "At least one hexadecimal digit is required after '0%1'")
1219 .arg(a: ch);
1220 return T_ERROR;
1221 }
1222
1223 double d = 0.;
1224 while (1) {
1225 int digit = ::hexDigit(c: _state.currentChar);
1226 if (digit < 0)
1227 break;
1228 d *= 16;
1229 d += digit;
1230 scanChar();
1231
1232 if (!scanOptionalNumericSeparator([this](){ return isHexDigit(peekChar()); }))
1233 return T_ERROR;
1234 }
1235
1236 _state.tokenValue = d;
1237 return T_NUMERIC_LITERAL;
1238 } else if (_state.currentChar == u'o' || _state.currentChar == u'O') {
1239 ch = _state.currentChar; // remember the o or O to use it in the error message below.
1240
1241 // parse octal integer literal
1242 scanChar(); // consume 'o'
1243
1244 if (!isOctalDigit(c: _state.currentChar.unicode())) {
1245 _state.errorCode = IllegalNumber;
1246 _errorMessage =
1247 QCoreApplication::translate(
1248 context: "QQmlParser", key: "At least one octal digit is required after '0%1'")
1249 .arg(a: ch);
1250 return T_ERROR;
1251 }
1252
1253 double d = 0.;
1254 while (1) {
1255 int digit = ::octalDigit(c: _state.currentChar);
1256 if (digit < 0)
1257 break;
1258 d *= 8;
1259 d += digit;
1260 scanChar();
1261
1262 if (!scanOptionalNumericSeparator([this](){
1263 return isOctalDigit(c: peekChar().unicode());
1264 })) {
1265 return T_ERROR;
1266 }
1267 }
1268
1269 _state.tokenValue = d;
1270 return T_NUMERIC_LITERAL;
1271 } else if (_state.currentChar == u'b' || _state.currentChar == u'B') {
1272 ch = _state.currentChar; // remember the b or B to use it in the error message below.
1273
1274 // parse binary integer literal
1275 scanChar(); // consume 'b'
1276
1277 if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') {
1278 _state.errorCode = IllegalNumber;
1279 _errorMessage =
1280 QCoreApplication::translate(
1281 context: "QQmlParser", key: "At least one binary digit is required after '0%1'")
1282 .arg(a: ch);
1283 return T_ERROR;
1284 }
1285
1286 double d = 0.;
1287 while (1) {
1288 int digit = 0;
1289 if (_state.currentChar.unicode() == u'1')
1290 digit = 1;
1291 else if (_state.currentChar.unicode() != u'0')
1292 break;
1293 d *= 2;
1294 d += digit;
1295 scanChar();
1296
1297 if (!scanOptionalNumericSeparator([this](){
1298 return peekChar().unicode() == u'0' || peekChar().unicode() == u'1';
1299 })) {
1300 return T_ERROR;
1301 }
1302 }
1303
1304 _state.tokenValue = d;
1305 return T_NUMERIC_LITERAL;
1306 } else if (_state.currentChar.isDigit() && !qmlMode()) {
1307 _state.errorCode = IllegalCharacter;
1308 _errorMessage = QCoreApplication::translate(context: "QQmlParser",
1309 key: "Decimal numbers can't start with '0'");
1310 return T_ERROR;
1311 }
1312 }
1313
1314 // decimal integer literal
1315 QVarLengthArray<char,32> chars;
1316 chars.append(t: ch.unicode());
1317
1318 if (ch != u'.') {
1319 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1320 return T_ERROR;
1321
1322 while (_state.currentChar.isDigit()) {
1323 chars.append(t: _state.currentChar.unicode());
1324 scanChar(); // consume the digit
1325
1326 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1327 return T_ERROR;
1328 }
1329
1330 if (_state.currentChar == u'.') {
1331 chars.append(t: _state.currentChar.unicode());
1332 scanChar(); // consume `.'
1333 }
1334 }
1335
1336 while (_state.currentChar.isDigit()) {
1337 chars.append(t: _state.currentChar.unicode());
1338 scanChar();
1339
1340 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1341 return T_ERROR;
1342 }
1343
1344 if (_state.currentChar == u'e' || _state.currentChar == u'E') {
1345 if (_codePtr[0].isDigit()
1346 || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) {
1347
1348 chars.append(t: _state.currentChar.unicode());
1349 scanChar(); // consume `e'
1350
1351 if (_state.currentChar == u'+' || _state.currentChar == u'-') {
1352 chars.append(t: _state.currentChar.unicode());
1353 scanChar(); // consume the sign
1354 }
1355
1356 while (_state.currentChar.isDigit()) {
1357 chars.append(t: _state.currentChar.unicode());
1358 scanChar();
1359
1360 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1361 return T_ERROR;
1362 }
1363 }
1364 }
1365
1366 const char *begin = chars.constData();
1367 const char *end = nullptr;
1368 bool ok = false;
1369
1370 _state.tokenValue = qstrntod(s00: begin, len: chars.size(), se: &end, ok: &ok);
1371
1372 if (end - begin != chars.size()) {
1373 _state.errorCode = IllegalExponentIndicator;
1374 _errorMessage =
1375 QCoreApplication::translate(context: "QQmlParser", key: "Illegal syntax for exponential number");
1376 return T_ERROR;
1377 }
1378
1379 return T_NUMERIC_LITERAL;
1380}
1381
1382int Lexer::scanVersionNumber(QChar ch)
1383{
1384 if (ch == u'0') {
1385 _state.tokenValue = 0;
1386 return T_VERSION_NUMBER;
1387 }
1388
1389 int acc = 0;
1390 acc += ch.digitValue();
1391
1392 while (_state.currentChar.isDigit()) {
1393 acc *= 10;
1394 acc += _state.currentChar.digitValue();
1395 scanChar(); // consume the digit
1396 }
1397
1398 _state.tokenValue = acc;
1399 return T_VERSION_NUMBER;
1400}
1401
1402bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
1403{
1404 _tokenText.resize(size: 0);
1405 _state.validTokenText = true;
1406 _state.patternFlags = 0;
1407
1408 if (prefix == EqualPrefix)
1409 _tokenText += u'=';
1410
1411 while (true) {
1412 switch (_state.currentChar.unicode()) {
1413 case u'/':
1414 scanChar();
1415
1416 // scan the flags
1417 _state.patternFlags = 0;
1418 while (isIdentLetter(c: _state.currentChar)) {
1419 int flag = regExpFlagFromChar(ch: _state.currentChar);
1420 if (flag == 0 || _state.patternFlags & flag) {
1421 _errorMessage = QCoreApplication::translate(
1422 context: "QQmlParser", key: "Invalid regular expression flag '%0'")
1423 .arg(a: QChar(_state.currentChar));
1424 return false;
1425 }
1426 _state.patternFlags |= flag;
1427 scanChar();
1428 }
1429
1430 _tokenLength = _codePtr - _tokenStartPtr - 1;
1431 return true;
1432
1433 case u'\\':
1434 // regular expression backslash sequence
1435 _tokenText += _state.currentChar;
1436 scanChar();
1437
1438 if (_codePtr > _endPtr || isLineTerminator()) {
1439 _errorMessage = QCoreApplication::translate(
1440 context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1441 return false;
1442 }
1443
1444 _tokenText += _state.currentChar;
1445 scanChar();
1446 break;
1447
1448 case u'[':
1449 // regular expression class
1450 _tokenText += _state.currentChar;
1451 scanChar();
1452
1453 while (_codePtr <= _endPtr && !isLineTerminator()) {
1454 if (_state.currentChar == u']')
1455 break;
1456 else if (_state.currentChar == u'\\') {
1457 // regular expression backslash sequence
1458 _tokenText += _state.currentChar;
1459 scanChar();
1460
1461 if (_codePtr > _endPtr || isLineTerminator()) {
1462 _errorMessage = QCoreApplication::translate(
1463 context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1464 return false;
1465 }
1466
1467 _tokenText += _state.currentChar;
1468 scanChar();
1469 } else {
1470 _tokenText += _state.currentChar;
1471 scanChar();
1472 }
1473 }
1474
1475 if (_state.currentChar != u']') {
1476 _errorMessage = QCoreApplication::translate(
1477 context: "QQmlParser", key: "Unterminated regular expression class");
1478 return false;
1479 }
1480
1481 _tokenText += _state.currentChar;
1482 scanChar(); // skip ]
1483 break;
1484
1485 default:
1486 if (_codePtr > _endPtr || isLineTerminator()) {
1487 _errorMessage = QCoreApplication::translate(
1488 context: "QQmlParser", key: "Unterminated regular expression literal");
1489 return false;
1490 } else {
1491 _tokenText += _state.currentChar;
1492 scanChar();
1493 }
1494 } // switch
1495 } // while
1496
1497 return false;
1498}
1499
1500bool Lexer::isLineTerminator() const
1501{
1502 const ushort unicode = _state.currentChar.unicode();
1503 return unicode == 0x000Au
1504 || unicode == 0x000Du
1505 || unicode == 0x2028u
1506 || unicode == 0x2029u;
1507}
1508
1509unsigned Lexer::isLineTerminatorSequence() const
1510{
1511 switch (_state.currentChar.unicode()) {
1512 case 0x000Au:
1513 case 0x2028u:
1514 case 0x2029u:
1515 return 1;
1516 case 0x000Du:
1517 if (_codePtr->unicode() == 0x000Au)
1518 return 2;
1519 else
1520 return 1;
1521 default:
1522 return 0;
1523 }
1524}
1525
1526bool Lexer::isIdentLetter(QChar ch)
1527{
1528 // ASCII-biased, since all reserved words are ASCII, aand hence the
1529 // bulk of content to be parsed.
1530 if ((ch >= u'a' && ch <= u'z')
1531 || (ch >= u'A' && ch <= u'Z')
1532 || ch == u'$' || ch == u'_')
1533 return true;
1534 if (ch.unicode() < 128)
1535 return false;
1536 return ch.isLetterOrNumber();
1537}
1538
1539bool Lexer::isDecimalDigit(ushort c)
1540{
1541 return (c >= u'0' && c <= u'9');
1542}
1543
1544bool Lexer::isHexDigit(QChar c)
1545{
1546 return ((c >= u'0' && c <= u'9')
1547 || (c >= u'a' && c <= u'f')
1548 || (c >= u'A' && c <= u'F'));
1549}
1550
1551bool Lexer::isOctalDigit(ushort c)
1552{
1553 return (c >= u'0' && c <= u'7');
1554}
1555
1556QString Lexer::tokenText() const
1557{
1558 if (_state.validTokenText)
1559 return _tokenText;
1560
1561 if (_state.tokenKind == T_STRING_LITERAL)
1562 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1563
1564 return QString(_tokenStartPtr, _tokenLength);
1565}
1566
1567Lexer::Error Lexer::errorCode() const
1568{
1569 return _state.errorCode;
1570}
1571
1572QString Lexer::errorMessage() const
1573{
1574 return _errorMessage;
1575}
1576
1577void Lexer::syncProhibitAutomaticSemicolon()
1578{
1579 if (_state.parenthesesState == BalancedParentheses) {
1580 // we have seen something like "if (foo)", which means we should
1581 // never insert an automatic semicolon at this point, since it would
1582 // then be expanded into an empty statement (ECMA-262 7.9.1)
1583 _state.prohibitAutomaticSemicolon = true;
1584 _state.parenthesesState = IgnoreParentheses;
1585 } else {
1586 _state.prohibitAutomaticSemicolon = false;
1587 }
1588}
1589
1590bool Lexer::prevTerminator() const
1591{
1592 return _state.terminator;
1593}
1594
1595bool Lexer::followsClosingBrace() const
1596{
1597 return _state.followsClosingBrace;
1598}
1599
1600bool Lexer::canInsertAutomaticSemicolon(int token) const
1601{
1602 return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator
1603 || _state.followsClosingBrace;
1604}
1605
1606static const int uriTokens[] = {
1607 QQmlJSGrammar::T_IDENTIFIER,
1608 QQmlJSGrammar::T_PROPERTY,
1609 QQmlJSGrammar::T_SIGNAL,
1610 QQmlJSGrammar::T_READONLY,
1611 QQmlJSGrammar::T_ON,
1612 QQmlJSGrammar::T_BREAK,
1613 QQmlJSGrammar::T_CASE,
1614 QQmlJSGrammar::T_CATCH,
1615 QQmlJSGrammar::T_CONTINUE,
1616 QQmlJSGrammar::T_DEFAULT,
1617 QQmlJSGrammar::T_DELETE,
1618 QQmlJSGrammar::T_DO,
1619 QQmlJSGrammar::T_ELSE,
1620 QQmlJSGrammar::T_FALSE,
1621 QQmlJSGrammar::T_FINAL,
1622 QQmlJSGrammar::T_FINALLY,
1623 QQmlJSGrammar::T_FOR,
1624 QQmlJSGrammar::T_FUNCTION,
1625 QQmlJSGrammar::T_IF,
1626 QQmlJSGrammar::T_IN,
1627 QQmlJSGrammar::T_OF,
1628 QQmlJSGrammar::T_INSTANCEOF,
1629 QQmlJSGrammar::T_NEW,
1630 QQmlJSGrammar::T_NULL,
1631 QQmlJSGrammar::T_RETURN,
1632 QQmlJSGrammar::T_SWITCH,
1633 QQmlJSGrammar::T_THIS,
1634 QQmlJSGrammar::T_THROW,
1635 QQmlJSGrammar::T_TRUE,
1636 QQmlJSGrammar::T_TRY,
1637 QQmlJSGrammar::T_TYPEOF,
1638 QQmlJSGrammar::T_VAR,
1639 QQmlJSGrammar::T_VOID,
1640 QQmlJSGrammar::T_WHILE,
1641 QQmlJSGrammar::T_CONST,
1642 QQmlJSGrammar::T_DEBUGGER,
1643 QQmlJSGrammar::T_RESERVED_WORD,
1644 QQmlJSGrammar::T_WITH,
1645
1646 QQmlJSGrammar::EOF_SYMBOL
1647};
1648static inline bool isUriToken(int token)
1649{
1650 const int *current = uriTokens;
1651 while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1652 if (*current == token)
1653 return true;
1654 ++current;
1655 }
1656 return false;
1657}
1658
1659bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error)
1660{
1661 auto setError = [error, this](QString message) {
1662 error->message = std::move(message);
1663 error->loc.startLine = tokenStartLine();
1664 error->loc.startColumn = tokenStartColumn();
1665 };
1666
1667 QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true);
1668 Q_ASSERT(!_qmlMode);
1669
1670 lex(); // fetch the first token
1671
1672 if (_state.tokenKind != T_DOT)
1673 return true;
1674
1675 do {
1676 const int lineNumber = tokenStartLine();
1677 const int column = tokenStartColumn();
1678
1679 lex(); // skip T_DOT
1680
1681 if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT))
1682 return true; // expected a valid QML/JS directive
1683
1684 const QString directiveName = tokenText();
1685
1686 if (! (directiveName == QLatin1String("pragma") ||
1687 directiveName == QLatin1String("import"))) {
1688 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1689 return false; // not a valid directive name
1690 }
1691
1692 // it must be a pragma or an import directive.
1693 if (directiveName == QLatin1String("pragma")) {
1694 // .pragma library
1695 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library"))) {
1696 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1697 return false; // expected `library
1698 }
1699
1700 // we found a .pragma library directive
1701 directives->pragmaLibrary();
1702
1703 } else {
1704 Q_ASSERT(directiveName == QLatin1String("import"));
1705 lex(); // skip .import
1706
1707 QString pathOrUri;
1708 QString version;
1709 bool fileImport = false; // file or uri import
1710
1711 if (_state.tokenKind == T_STRING_LITERAL) {
1712 // .import T_STRING_LITERAL as T_IDENTIFIER
1713
1714 fileImport = true;
1715 pathOrUri = tokenText();
1716
1717 if (!pathOrUri.endsWith(s: QLatin1String("js"))) {
1718 setError(QCoreApplication::translate(context: "QQmlParser",key: "Imported file must be a script"));
1719 return false;
1720 }
1721 lex();
1722
1723 } else if (_state.tokenKind == T_IDENTIFIER) {
1724 // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER
1725 while (true) {
1726 if (!isUriToken(token: _state.tokenKind)) {
1727 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1728 return false;
1729 }
1730
1731 pathOrUri.append(s: tokenText());
1732
1733 lex();
1734 if (tokenStartLine() != lineNumber) {
1735 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1736 return false;
1737 }
1738 if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1739 break;
1740
1741 pathOrUri.append(c: u'.');
1742
1743 lex();
1744 if (tokenStartLine() != lineNumber) {
1745 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1746 return false;
1747 }
1748 }
1749
1750 if (_state.tokenKind == T_VERSION_NUMBER) {
1751 version = tokenText();
1752 lex();
1753 if (_state.tokenKind == T_DOT) {
1754 version += u'.';
1755 lex();
1756 if (_state.tokenKind != T_VERSION_NUMBER) {
1757 setError(QCoreApplication::translate(
1758 context: "QQmlParser", key: "Incomplete version number (dot but no minor)"));
1759 return false; // expected the module version number
1760 }
1761 version += tokenText();
1762 lex();
1763 }
1764 }
1765 }
1766
1767 //
1768 // recognize the mandatory `as' followed by the module name
1769 //
1770 if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1771 if (fileImport)
1772 setError(QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1773 else
1774 setError(QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1775 if (tokenStartLine() != lineNumber) {
1776 error->loc.startLine = lineNumber;
1777 error->loc.startColumn = column;
1778 }
1779 return false; // expected `as'
1780 }
1781
1782 if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) {
1783 if (fileImport)
1784 setError(QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1785 else
1786 setError(QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1787 return false; // expected module name
1788 }
1789
1790 const QString module = tokenText();
1791 if (!module.at(i: 0).isUpper()) {
1792 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid import qualifier"));
1793 return false;
1794 }
1795
1796 if (fileImport)
1797 directives->importFile(jsfile: pathOrUri, module, line: lineNumber, column);
1798 else
1799 directives->importModule(uri: pathOrUri, version, module, line: lineNumber, column);
1800 }
1801
1802 if (tokenStartLine() != lineNumber) {
1803 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1804 return false; // the directives cannot span over multiple lines
1805 }
1806
1807 // fetch the first token after the .pragma/.import directive
1808 lex();
1809 } while (_state.tokenKind == T_DOT);
1810
1811 return true;
1812}
1813
1814const Lexer::State &Lexer::state() const
1815{
1816 return _state;
1817}
1818void Lexer::setState(const Lexer::State &state)
1819{
1820 _state = state;
1821}
1822
1823int Lexer::parseModeFlags() const {
1824 int flags = 0;
1825 if (qmlMode())
1826 flags |= QmlMode|StaticIsKeyword;
1827 if (yieldIsKeyWord())
1828 flags |= YieldIsKeyword;
1829 if (_staticIsKeyword)
1830 flags |= StaticIsKeyword;
1831 return flags;
1832}
1833
1834namespace QQmlJS {
1835QDebug operator<<(QDebug dbg, const Lexer::State &s)
1836{
1837 dbg << "{\n"
1838 << " errorCode:" << int(s.errorCode) << ",\n"
1839 << " currentChar:" << s.currentChar << ",\n"
1840 << " tokenValue:" << s.tokenValue << ",\n"
1841 << " parenthesesState:" << s.parenthesesState << ",\n"
1842 << " parenthesesCount:" << s.parenthesesCount << ",\n"
1843 << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n"
1844 << " bracesCount:" << s.bracesCount << ",\n"
1845 << " stackToken:" << s.stackToken << ",\n"
1846 << " patternFlags:" << s.patternFlags << ",\n"
1847 << " tokenKind:" << s.tokenKind << ",\n"
1848 << " importState:" << int(s.importState) << ",\n"
1849 << " validTokenText:" << s.validTokenText << ",\n"
1850 << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n"
1851 << " restrictedKeyword:" << s.restrictedKeyword << ",\n"
1852 << " terminator:" << s.terminator << ",\n"
1853 << " followsClosingBrace:" << s.followsClosingBrace << ",\n"
1854 << " delimited:" << s.delimited << ",\n"
1855 << " handlingDirectives:" << s.handlingDirectives << ",\n"
1856 << " generatorLevel:" << s.generatorLevel << "\n}";
1857 return dbg;
1858}
1859}
1860
1861QT_END_NAMESPACE
1862

source code of qtdeclarative/src/qml/parser/qqmljslexer.cpp