1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qqmljslexer_p.h"
5#include "qqmljsengine_p.h"
6#include "qqmljskeywords_p.h"
7
8#include <private/qqmljsdiagnosticmessage_p.h>
9#include <private/qqmljsmemorypool_p.h>
10#include <private/qlocale_tools_p.h>
11
12
13#include <QtCore/qcoreapplication.h>
14#include <QtCore/qvarlengtharray.h>
15#include <QtCore/qdebug.h>
16#include <QtCore/QScopedValueRollback>
17
18#include <optional>
19
20QT_BEGIN_NAMESPACE
21using namespace QQmlJS;
22
23static inline int regExpFlagFromChar(const QChar &ch)
24{
25 switch (ch.unicode()) {
26 case 'g': return Lexer::RegExp_Global;
27 case 'i': return Lexer::RegExp_IgnoreCase;
28 case 'm': return Lexer::RegExp_Multiline;
29 case 'u': return Lexer::RegExp_Unicode;
30 case 'y': return Lexer::RegExp_Sticky;
31 }
32 return 0;
33}
34
35static inline unsigned char convertHex(ushort c)
36{
37 if (c >= '0' && c <= '9')
38 return (c - '0');
39 else if (c >= 'a' && c <= 'f')
40 return (c - 'a' + 10);
41 else
42 return (c - 'A' + 10);
43}
44
45static inline QChar convertHex(QChar c1, QChar c2)
46{
47 return QChar((convertHex(c: c1.unicode()) << 4) + convertHex(c: c2.unicode()));
48}
49
50Lexer::Lexer(Engine *engine, LexMode lexMode)
51 : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true)
52{
53 if (engine)
54 engine->setLexer(this);
55}
56
57bool Lexer::qmlMode() const
58{
59 return _qmlMode;
60}
61
62QString Lexer::code() const
63{
64 return _code;
65}
66
67void Lexer::setCode(const QString &code, int lineno, bool qmlMode,
68 Lexer::CodeContinuation codeContinuation)
69{
70 if (codeContinuation == Lexer::CodeContinuation::Continue)
71 _currentOffset += _code.size();
72 else
73 _currentOffset = 0;
74 if (_engine)
75 _engine->setCode(code);
76
77 _qmlMode = qmlMode;
78 _code = code;
79 _skipLinefeed = false;
80
81 _tokenText.clear();
82 _tokenText.reserve(asize: 1024);
83 _errorMessage.clear();
84 _tokenSpell = QStringView();
85 _rawString = QStringView();
86
87 _codePtr = code.unicode();
88 _endPtr = _codePtr + code.size();
89 _tokenStartPtr = _codePtr;
90
91 if (lineno >= 0)
92 _currentLineNumber = lineno;
93 _currentColumnNumber = 0;
94 _tokenLine = _currentLineNumber;
95 _tokenColumn = 0;
96 _tokenLength = 0;
97
98 if (codeContinuation == Lexer::CodeContinuation::Reset)
99 _state = State {};
100}
101
102void Lexer::scanChar()
103{
104 if (_skipLinefeed) {
105 Q_ASSERT(*_codePtr == u'\n');
106 ++_codePtr;
107 _skipLinefeed = false;
108 }
109 _state.currentChar = *_codePtr++;
110 ++_currentColumnNumber;
111
112 if (isLineTerminator()) {
113 if (_state.currentChar == u'\r') {
114 if (_codePtr < _endPtr && *_codePtr == u'\n')
115 _skipLinefeed = true;
116 _state.currentChar = u'\n';
117 }
118 ++_currentLineNumber;
119 _currentColumnNumber = 0;
120 }
121}
122
123QChar Lexer::peekChar()
124{
125 auto peekPtr = _codePtr;
126 if (peekPtr < _endPtr)
127 return *peekPtr;
128 return QChar();
129}
130
131namespace {
132inline bool isBinop(int tok)
133{
134 switch (tok) {
135 case Lexer::T_AND:
136 case Lexer::T_AND_AND:
137 case Lexer::T_AND_EQ:
138 case Lexer::T_DIVIDE_:
139 case Lexer::T_DIVIDE_EQ:
140 case Lexer::T_EQ:
141 case Lexer::T_EQ_EQ:
142 case Lexer::T_EQ_EQ_EQ:
143 case Lexer::T_GE:
144 case Lexer::T_GT:
145 case Lexer::T_GT_GT:
146 case Lexer::T_GT_GT_EQ:
147 case Lexer::T_GT_GT_GT:
148 case Lexer::T_GT_GT_GT_EQ:
149 case Lexer::T_LE:
150 case Lexer::T_LT:
151 case Lexer::T_LT_LT:
152 case Lexer::T_LT_LT_EQ:
153 case Lexer::T_MINUS:
154 case Lexer::T_MINUS_EQ:
155 case Lexer::T_NOT_EQ:
156 case Lexer::T_NOT_EQ_EQ:
157 case Lexer::T_OR:
158 case Lexer::T_OR_EQ:
159 case Lexer::T_OR_OR:
160 case Lexer::T_PLUS:
161 case Lexer::T_PLUS_EQ:
162 case Lexer::T_REMAINDER:
163 case Lexer::T_REMAINDER_EQ:
164 case Lexer::T_RETURN:
165 case Lexer::T_STAR:
166 case Lexer::T_STAR_EQ:
167 case Lexer::T_XOR:
168 case Lexer::T_XOR_EQ:
169 return true;
170
171 default:
172 return false;
173 }
174}
175
176int hexDigit(QChar c)
177{
178 if (c >= u'0' && c <= u'9')
179 return c.unicode() - u'0';
180 if (c >= u'a' && c <= u'f')
181 return c.unicode() - u'a' + 10;
182 if (c >= u'A' && c <= u'F')
183 return c.unicode() - u'A' + 10;
184 return -1;
185}
186
187int octalDigit(QChar c)
188{
189 if (c >= u'0' && c <= u'7')
190 return c.unicode() - u'0';
191 return -1;
192}
193
194} // anonymous namespace
195
196int Lexer::lex()
197{
198 const int previousTokenKind = _state.tokenKind;
199 int tokenKind;
200 bool firstPass = true;
201
202 again:
203 tokenKind = T_ERROR;
204 _tokenSpell = QStringView();
205 _rawString = QStringView();
206 if (firstPass && _state.stackToken == -1) {
207 firstPass = false;
208 if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
209 return T_EOL;
210
211 if (_state.comments == CommentState::InMultilineComment) {
212 scanChar();
213 _tokenStartPtr = _codePtr - 1;
214 _tokenLine = _currentLineNumber;
215 _tokenColumn = _currentColumnNumber;
216 while (_codePtr <= _endPtr) {
217 if (_state.currentChar == u'*') {
218 scanChar();
219 if (_state.currentChar == u'/') {
220 scanChar();
221 if (_engine) {
222 _engine->addComment(pos: tokenOffset() + 2,
223 len: _codePtr - _tokenStartPtr - 1 - 4,
224 line: tokenStartLine(), col: tokenStartColumn() + 2);
225 }
226 tokenKind = T_COMMENT;
227 break;
228 }
229 } else {
230 scanChar();
231 }
232 }
233 if (tokenKind == T_ERROR)
234 tokenKind = T_PARTIAL_COMMENT;
235 } else {
236 // handle multiline continuation
237 std::optional<ScanStringMode> scanMode;
238 switch (previousTokenKind) {
239 case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
240 scanMode = ScanStringMode::SingleQuote;
241 break;
242 case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
243 scanMode = ScanStringMode::DoubleQuote;
244 break;
245 case T_PARTIAL_TEMPLATE_HEAD:
246 scanMode = ScanStringMode::TemplateHead;
247 break;
248 case T_PARTIAL_TEMPLATE_MIDDLE:
249 scanMode = ScanStringMode::TemplateContinuation;
250 break;
251 default:
252 break;
253 }
254 if (scanMode) {
255 scanChar();
256 _tokenStartPtr = _codePtr - 1;
257 _tokenLine = _currentLineNumber;
258 _tokenColumn = _currentColumnNumber;
259 tokenKind = scanString(mode: *scanMode);
260 }
261 }
262 }
263 if (tokenKind == T_ERROR)
264 tokenKind = scanToken();
265 _tokenLength = _codePtr - _tokenStartPtr - 1;
266 switch (tokenKind) {
267 // end of line and comments should not "overwrite" the old token type...
268 case T_EOL:
269 return tokenKind;
270 case T_COMMENT:
271 _state.comments = CommentState::HadComment;
272 return tokenKind;
273 case T_PARTIAL_COMMENT:
274 _state.comments = CommentState::InMultilineComment;
275 return tokenKind;
276 default:
277 _state.comments = CommentState::NoComment;
278 break;
279 }
280 _state.tokenKind = tokenKind;
281
282 _state.delimited = false;
283 _state.restrictedKeyword = false;
284 _state.followsClosingBrace = (previousTokenKind == T_RBRACE);
285
286 // update the flags
287 switch (_state.tokenKind) {
288 case T_LBRACE:
289 if (_state.bracesCount > 0)
290 ++_state.bracesCount;
291 Q_FALLTHROUGH();
292 case T_SEMICOLON:
293 _state.importState = ImportState::NoQmlImport;
294 Q_FALLTHROUGH();
295 case T_QUESTION:
296 case T_COLON:
297 case T_TILDE:
298 _state.delimited = true;
299 break;
300 case T_AUTOMATIC_SEMICOLON:
301 case T_AS:
302 _state.importState = ImportState::NoQmlImport;
303 Q_FALLTHROUGH();
304 default:
305 if (isBinop(tok: _state.tokenKind))
306 _state.delimited = true;
307 break;
308
309 case T_IMPORT:
310 if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT))
311 _state.importState = ImportState::SawImport;
312 if (isBinop(tok: _state.tokenKind))
313 _state.delimited = true;
314 break;
315
316 case T_IF:
317 case T_FOR:
318 case T_WHILE:
319 case T_WITH:
320 _state.parenthesesState = CountParentheses;
321 _state.parenthesesCount = 0;
322 break;
323
324 case T_ELSE:
325 case T_DO:
326 _state.parenthesesState = BalancedParentheses;
327 break;
328
329 case T_CONTINUE:
330 case T_BREAK:
331 case T_RETURN:
332 case T_YIELD:
333 case T_THROW:
334 _state.restrictedKeyword = true;
335 break;
336 case T_RBRACE:
337 if (_state.bracesCount > 0)
338 --_state.bracesCount;
339 if (_state.bracesCount == 0)
340 goto again;
341 } // switch
342
343 // update the parentheses state
344 switch (_state.parenthesesState) {
345 case IgnoreParentheses:
346 break;
347
348 case CountParentheses:
349 if (_state.tokenKind == T_RPAREN) {
350 --_state.parenthesesCount;
351 if (_state.parenthesesCount == 0)
352 _state.parenthesesState = BalancedParentheses;
353 } else if (_state.tokenKind == T_LPAREN) {
354 ++_state.parenthesesCount;
355 }
356 break;
357
358 case BalancedParentheses:
359 if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
360 _state.parenthesesState = IgnoreParentheses;
361 break;
362 } // switch
363
364 return _state.tokenKind;
365}
366
367uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
368{
369 Q_ASSERT(_state.currentChar == u'u');
370 scanChar(); // skip u
371 constexpr int distanceFromFirstHexToLastHex = 3;
372 if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(c: _state.currentChar)) {
373 uint codePoint = 0;
374 for (int i = 0; i < 4; ++i) {
375 int digit = hexDigit(c: _state.currentChar);
376 if (digit < 0)
377 goto error;
378 codePoint *= 16;
379 codePoint += digit;
380 scanChar();
381 }
382
383 *ok = true;
384 return codePoint;
385 } else if (_codePtr < _endPtr && _state.currentChar == u'{') {
386 scanChar(); // skip '{'
387 uint codePoint = 0;
388 if (!isHexDigit(c: _state.currentChar))
389 // need at least one hex digit
390 goto error;
391
392 while (_codePtr <= _endPtr) {
393 int digit = hexDigit(c: _state.currentChar);
394 if (digit < 0)
395 break;
396 codePoint *= 16;
397 codePoint += digit;
398 if (codePoint > 0x10ffff)
399 goto error;
400 scanChar();
401 }
402
403 if (_state.currentChar != u'}')
404 goto error;
405
406 scanChar(); // skip '}'
407
408
409 *ok = true;
410 return codePoint;
411 }
412
413error:
414 _state.errorCode = IllegalUnicodeEscapeSequence;
415 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Illegal unicode escape sequence");
416
417 *ok = false;
418 return 0;
419}
420
421QChar Lexer::decodeHexEscapeCharacter(bool *ok)
422{
423 if (isHexDigit(c: _codePtr[0]) && isHexDigit(c: _codePtr[1])) {
424 scanChar();
425
426 const QChar c1 = _state.currentChar;
427 scanChar();
428
429 const QChar c2 = _state.currentChar;
430 scanChar();
431
432 if (ok)
433 *ok = true;
434
435 return convertHex(c1, c2);
436 }
437
438 *ok = false;
439 return QChar();
440}
441
442namespace QQmlJS {
443QDebug operator<<(QDebug dbg, const Lexer &l)
444{
445 dbg << "{\n"
446 << " engine:" << qsizetype(l._engine) << ",\n"
447 << " lexMode:" << int(l._lexMode) << ",\n"
448 << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n"
449 << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n"
450 << " qmlMode:" << l._qmlMode << ",\n"
451 << " staticIsKeyword:" << l._staticIsKeyword << ",\n"
452 << " currentLineNumber:" << l._currentLineNumber << ",\n"
453 << " currentColumnNumber:" << l._currentColumnNumber << ",\n"
454 << " currentOffset:" << l._currentOffset << ",\n"
455 << " tokenLength:" << l._tokenLength << ",\n"
456 << " tokenLine:" << l._tokenLine << ",\n"
457 << " tokenColumn:" << l._tokenColumn << ",\n"
458 << " tokenText:" << l._tokenText << ",\n"
459 << " skipLinefeed:" << l._skipLinefeed << ",\n"
460 << " errorMessage:" << l._errorMessage << ",\n"
461 << " tokenSpell:" << l._tokenSpell << ",\n"
462 << " rawString:" << l._rawString << ",\n";
463 if (l._codePtr)
464 dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n";
465 else
466 dbg << " codePtr: *null*,\n";
467 if (l._tokenStartPtr)
468 dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n";
469 else
470 dbg << " tokenStartPtr: *null*,\n";
471 dbg << " state:" << l._state << "\n}";
472 return dbg;
473}
474}
475
476static inline bool isIdentifierStart(uint ch)
477{
478 // fast path for ascii
479 if ((ch >= u'a' && ch <= u'z') ||
480 (ch >= u'A' && ch <= u'Z') ||
481 ch == u'$' || ch == u'_')
482 return true;
483
484 switch (QChar::category(ucs4: ch)) {
485 case QChar::Number_Letter:
486 case QChar::Letter_Uppercase:
487 case QChar::Letter_Lowercase:
488 case QChar::Letter_Titlecase:
489 case QChar::Letter_Modifier:
490 case QChar::Letter_Other:
491 return true;
492 default:
493 break;
494 }
495 return false;
496}
497
498static bool isIdentifierPart(uint ch)
499{
500 // fast path for ascii
501 if ((ch >= u'a' && ch <= u'z') ||
502 (ch >= u'A' && ch <= u'Z') ||
503 (ch >= u'0' && ch <= u'9') ||
504 ch == u'$' || ch == u'_' ||
505 ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */)
506 return true;
507
508 switch (QChar::category(ucs4: ch)) {
509 case QChar::Mark_NonSpacing:
510 case QChar::Mark_SpacingCombining:
511
512 case QChar::Number_DecimalDigit:
513 case QChar::Number_Letter:
514
515 case QChar::Letter_Uppercase:
516 case QChar::Letter_Lowercase:
517 case QChar::Letter_Titlecase:
518 case QChar::Letter_Modifier:
519 case QChar::Letter_Other:
520
521 case QChar::Punctuation_Connector:
522 return true;
523 default:
524 break;
525 }
526 return false;
527}
528
529int Lexer::scanToken()
530{
531 if (_state.stackToken != -1) {
532 int tk = _state.stackToken;
533 _state.stackToken = -1;
534 return tk;
535 }
536
537 if (_state.bracesCount == 0) {
538 // we're inside a Template string
539 return scanString(mode: TemplateContinuation);
540 }
541
542 if (_state.comments == CommentState::NoComment)
543 _state.terminator = false;
544
545again:
546 _state.validTokenText = false;
547
548 while (_state.currentChar.isSpace()) {
549 if (isLineTerminator()) {
550 bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr;
551 if (_state.restrictedKeyword) {
552 // automatic semicolon insertion
553 _tokenLine = _currentLineNumber;
554 _tokenColumn = _currentColumnNumber;
555 _tokenStartPtr = _codePtr - 1;
556 return T_SEMICOLON;
557 } else if (_lexMode == LexMode::WholeCode || !isAtEnd) {
558 _state.terminator = true;
559 syncProhibitAutomaticSemicolon();
560 } // else we will do the previous things at the start of next line...
561 }
562
563 scanChar();
564 }
565
566 _tokenStartPtr = _codePtr - 1;
567 _tokenLine = _currentLineNumber;
568 _tokenColumn = _currentColumnNumber;
569
570 if (_codePtr >= _endPtr) {
571 if (_lexMode == LexMode::LineByLine) {
572 if (!_code.isEmpty()) {
573 _state.currentChar = *(_codePtr - 2);
574 return T_EOL;
575 } else {
576 return EOF_SYMBOL;
577 }
578 } else if (_codePtr > _endPtr) {
579 return EOF_SYMBOL;
580 }
581 }
582
583 const QChar ch = _state.currentChar;
584 scanChar();
585
586 switch (ch.unicode()) {
587 case u'~': return T_TILDE;
588 case u'}': return T_RBRACE;
589
590 case u'|':
591 if (_state.currentChar == u'|') {
592 scanChar();
593 return T_OR_OR;
594 } else if (_state.currentChar == u'=') {
595 scanChar();
596 return T_OR_EQ;
597 }
598 return T_OR;
599
600 case u'{': return T_LBRACE;
601
602 case u'^':
603 if (_state.currentChar == u'=') {
604 scanChar();
605 return T_XOR_EQ;
606 }
607 return T_XOR;
608
609 case u']': return T_RBRACKET;
610 case u'[': return T_LBRACKET;
611 case u'?': {
612 if (_state.currentChar == u'?') {
613 scanChar();
614 return T_QUESTION_QUESTION;
615 }
616 if (_state.currentChar == u'.' && !peekChar().isDigit()) {
617 scanChar();
618 return T_QUESTION_DOT;
619 }
620
621 return T_QUESTION;
622 }
623
624 case u'>':
625 if (_state.currentChar == u'>') {
626 scanChar();
627 if (_state.currentChar == u'>') {
628 scanChar();
629 if (_state.currentChar == u'=') {
630 scanChar();
631 return T_GT_GT_GT_EQ;
632 }
633 return T_GT_GT_GT;
634 } else if (_state.currentChar == u'=') {
635 scanChar();
636 return T_GT_GT_EQ;
637 }
638 return T_GT_GT;
639 } else if (_state.currentChar == u'=') {
640 scanChar();
641 return T_GE;
642 }
643 return T_GT;
644
645 case u'=':
646 if (_state.currentChar == u'=') {
647 scanChar();
648 if (_state.currentChar == u'=') {
649 scanChar();
650 return T_EQ_EQ_EQ;
651 }
652 return T_EQ_EQ;
653 } else if (_state.currentChar == u'>') {
654 scanChar();
655 return T_ARROW;
656 }
657 return T_EQ;
658
659 case u'<':
660 if (_state.currentChar == u'=') {
661 scanChar();
662 return T_LE;
663 } else if (_state.currentChar == u'<') {
664 scanChar();
665 if (_state.currentChar == u'=') {
666 scanChar();
667 return T_LT_LT_EQ;
668 }
669 return T_LT_LT;
670 }
671 return T_LT;
672
673 case u';': return T_SEMICOLON;
674 case u':': return T_COLON;
675
676 case u'/':
677 switch (_state.currentChar.unicode()) {
678 case u'*':
679 scanChar();
680 while (_codePtr <= _endPtr) {
681 if (_state.currentChar == u'*') {
682 scanChar();
683 if (_state.currentChar == u'/') {
684 scanChar();
685 if (_engine) {
686 _engine->addComment(pos: tokenOffset() + 2,
687 len: _codePtr - _tokenStartPtr - 1 - 4, line: tokenStartLine(),
688 col: tokenStartColumn() + 2);
689 }
690 if (_lexMode == LexMode::LineByLine)
691 return T_COMMENT;
692 else
693 goto again;
694 }
695 } else {
696 scanChar();
697 }
698 }
699 if (_lexMode == LexMode::LineByLine)
700 return T_PARTIAL_COMMENT;
701 else
702 goto again;
703 case u'/':
704 while (_codePtr <= _endPtr && !isLineTerminator()) {
705 scanChar();
706 }
707 if (_engine) {
708 _engine->addComment(pos: tokenOffset() + 2, len: _codePtr - _tokenStartPtr - 1 - 2,
709 line: tokenStartLine(), col: tokenStartColumn() + 2);
710 }
711 if (_lexMode == LexMode::LineByLine)
712 return T_COMMENT;
713 else
714 goto again;
715 case u'=':
716 scanChar();
717 return T_DIVIDE_EQ;
718 default:
719 return T_DIVIDE_;
720 }
721 case u'.':
722 if (_state.importState == ImportState::SawImport)
723 return T_DOT;
724 if (isDecimalDigit(c: _state.currentChar.unicode()))
725 return scanNumber(ch);
726 if (_state.currentChar == u'.') {
727 scanChar();
728 if (_state.currentChar == u'.') {
729 scanChar();
730 return T_ELLIPSIS;
731 } else {
732 _state.errorCode = IllegalCharacter;
733 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unexpected token '.'");
734 return T_ERROR;
735 }
736 }
737 return T_DOT;
738
739 case u'-':
740 if (_state.currentChar == u'=') {
741 scanChar();
742 return T_MINUS_EQ;
743 } else if (_state.currentChar == u'-') {
744 scanChar();
745
746 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
747 && _state.tokenKind != T_LPAREN) {
748 _state.stackToken = T_MINUS_MINUS;
749 return T_SEMICOLON;
750 }
751
752 return T_MINUS_MINUS;
753 }
754 return T_MINUS;
755
756 case u',': return T_COMMA;
757
758 case u'+':
759 if (_state.currentChar == u'=') {
760 scanChar();
761 return T_PLUS_EQ;
762 } else if (_state.currentChar == u'+') {
763 scanChar();
764
765 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
766 && _state.tokenKind != T_LPAREN) {
767 _state.stackToken = T_PLUS_PLUS;
768 return T_SEMICOLON;
769 }
770
771 return T_PLUS_PLUS;
772 }
773 return T_PLUS;
774
775 case u'*':
776 if (_state.currentChar == u'=') {
777 scanChar();
778 return T_STAR_EQ;
779 } else if (_state.currentChar == u'*') {
780 scanChar();
781 if (_state.currentChar == u'=') {
782 scanChar();
783 return T_STAR_STAR_EQ;
784 }
785 return T_STAR_STAR;
786 }
787 return T_STAR;
788
789 case u')': return T_RPAREN;
790 case u'(': return T_LPAREN;
791
792 case u'@': return T_AT;
793
794 case u'&':
795 if (_state.currentChar == u'=') {
796 scanChar();
797 return T_AND_EQ;
798 } else if (_state.currentChar == u'&') {
799 scanChar();
800 return T_AND_AND;
801 }
802 return T_AND;
803
804 case u'%':
805 if (_state.currentChar == u'=') {
806 scanChar();
807 return T_REMAINDER_EQ;
808 }
809 return T_REMAINDER;
810
811 case u'!':
812 if (_state.currentChar == u'=') {
813 scanChar();
814 if (_state.currentChar == u'=') {
815 scanChar();
816 return T_NOT_EQ_EQ;
817 }
818 return T_NOT_EQ;
819 }
820 return T_NOT;
821
822 case u'`':
823 _state.outerTemplateBraceCount.push(t: _state.bracesCount);
824 Q_FALLTHROUGH();
825 case u'\'':
826 case u'"':
827 return scanString(mode: ScanStringMode(ch.unicode()));
828 case u'0':
829 case u'1':
830 case u'2':
831 case u'3':
832 case u'4':
833 case u'5':
834 case u'6':
835 case u'7':
836 case u'8':
837 case u'9':
838 if (_state.importState == ImportState::SawImport)
839 return scanVersionNumber(ch);
840 else
841 return scanNumber(ch);
842
843 case '#':
844 if (_currentLineNumber == 1 && _currentColumnNumber == 2) {
845 // shebang support
846 while (_codePtr <= _endPtr && !isLineTerminator()) {
847 scanChar();
848 }
849 if (_engine) {
850 _engine->addComment(pos: tokenOffset(), len: _codePtr - _tokenStartPtr - 1, line: tokenStartLine(),
851 col: tokenStartColumn());
852 }
853 if (_lexMode == LexMode::LineByLine)
854 return T_COMMENT;
855 else
856 goto again;
857 }
858 Q_FALLTHROUGH();
859
860 default: {
861 uint c = ch.unicode();
862 bool identifierWithEscapeChars = false;
863 if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _state.currentChar.unicode())) {
864 c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
865 scanChar();
866 } else if (c == '\\' && _state.currentChar == u'u') {
867 identifierWithEscapeChars = true;
868 bool ok = false;
869 c = decodeUnicodeEscapeCharacter(ok: &ok);
870 if (!ok)
871 return T_ERROR;
872 }
873 if (isIdentifierStart(ch: c)) {
874 if (identifierWithEscapeChars) {
875 _tokenText.resize(size: 0);
876 if (QChar::requiresSurrogates(ucs4: c)) {
877 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
878 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
879 } else {
880 _tokenText += QChar(c);
881 }
882 _state.validTokenText = true;
883 }
884 while (_codePtr <= _endPtr) {
885 c = _state.currentChar.unicode();
886 if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _codePtr->unicode())) {
887 scanChar();
888 c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
889 } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') {
890 if (!identifierWithEscapeChars) {
891 identifierWithEscapeChars = true;
892 _tokenText.resize(size: 0);
893 _tokenText.insert(i: 0, uc: _tokenStartPtr, len: _codePtr - _tokenStartPtr - 1);
894 _state.validTokenText = true;
895 }
896
897 scanChar(); // skip '\\'
898 bool ok = false;
899 c = decodeUnicodeEscapeCharacter(ok: &ok);
900 if (!ok)
901 return T_ERROR;
902
903 if (!isIdentifierPart(ch: c))
904 break;
905
906 if (QChar::requiresSurrogates(ucs4: c)) {
907 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
908 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
909 } else {
910 _tokenText += QChar(c);
911 }
912 continue;
913 }
914
915 if (!isIdentifierPart(ch: c))
916 break;
917
918 if (identifierWithEscapeChars) {
919 if (QChar::requiresSurrogates(ucs4: c)) {
920 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
921 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
922 } else {
923 _tokenText += QChar(c);
924 }
925 }
926 scanChar();
927 }
928
929 _tokenLength = _codePtr - _tokenStartPtr - 1;
930
931 int kind = T_IDENTIFIER;
932
933 if (!identifierWithEscapeChars)
934 kind = classify(s: _tokenStartPtr, n: _tokenLength, parseModeFlags: parseModeFlags());
935
936 if (_engine) {
937 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
938 _tokenSpell = _engine->newStringRef(text: _tokenText);
939 else
940 _tokenSpell = _engine->midRef(position: _tokenStartPtr - _code.unicode(), size: _tokenLength);
941 }
942
943 return kind;
944 }
945 }
946
947 break;
948 }
949
950 return T_ERROR;
951}
952
953int Lexer::scanString(ScanStringMode mode)
954{
955 QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode);
956 // we actually use T_STRING_LITERAL also for multiline strings, should we want to
957 // change that we should set it to:
958 // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL ||
959 // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL
960 // here and uncomment the multilineStringLiteral = true below.
961 bool multilineStringLiteral = false;
962
963 const QChar *startCode = _codePtr - 1;
964 // in case we just parsed a \r, we need to reset this flag to get things working
965 // correctly in the loop below and afterwards
966 _skipLinefeed = false;
967 bool first = true;
968
969 if (_engine) {
970 while (_codePtr <= _endPtr) {
971 if (isLineTerminator()) {
972 if ((quote == u'`' || qmlMode())) {
973 if (first)
974 --_currentLineNumber; // will be read again in scanChar()
975 break;
976 }
977 _state.errorCode = IllegalCharacter;
978 _errorMessage = QCoreApplication::translate(context: "QQmlParser",
979 key: "Stray newline in string literal");
980 return T_ERROR;
981 } else if (_state.currentChar == u'\\') {
982 break;
983 } else if (_state.currentChar == u'$' && quote == u'`') {
984 break;
985 } else if (_state.currentChar == quote) {
986 _tokenSpell =
987 _engine->midRef(position: startCode - _code.unicode(), size: _codePtr - startCode - 1);
988 _rawString = _tokenSpell;
989 scanChar();
990
991 if (quote == u'`')
992 _state.bracesCount = _state.outerTemplateBraceCount.pop();
993 if (mode == TemplateHead)
994 return T_NO_SUBSTITUTION_TEMPLATE;
995 else if (mode == TemplateContinuation)
996 return T_TEMPLATE_TAIL;
997 else if (multilineStringLiteral)
998 return T_MULTILINE_STRING_LITERAL;
999 else
1000 return T_STRING_LITERAL;
1001 }
1002 // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result
1003 _state.currentChar = *_codePtr++;
1004 ++_currentColumnNumber;
1005 first = false;
1006 }
1007 }
1008
1009 // rewind by one char, so things gets scanned correctly
1010 --_codePtr;
1011 --_currentColumnNumber;
1012
1013 _state.validTokenText = true;
1014 _tokenText = QString(startCode, _codePtr - startCode);
1015
1016 auto setRawString = [&](const QChar *end) {
1017 QString raw(startCode, end - startCode - 1);
1018 raw.replace(before: QLatin1String("\r\n"), after: QLatin1String("\n"));
1019 raw.replace(before: u'\r', after: u'\n');
1020 _rawString = _engine->newStringRef(text: raw);
1021 };
1022
1023 scanChar();
1024
1025 while (_codePtr <= _endPtr) {
1026 if (_state.currentChar == quote) {
1027 scanChar();
1028
1029 if (_engine) {
1030 _tokenSpell = _engine->newStringRef(text: _tokenText);
1031 if (quote == u'`')
1032 setRawString(_codePtr - 1);
1033 }
1034
1035 if (quote == u'`')
1036 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1037
1038 if (mode == TemplateContinuation)
1039 return T_TEMPLATE_TAIL;
1040 else if (mode == TemplateHead)
1041 return T_NO_SUBSTITUTION_TEMPLATE;
1042
1043 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1044 } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') {
1045 scanChar();
1046 scanChar();
1047 _state.bracesCount = 1;
1048 if (_engine) {
1049 _tokenSpell = _engine->newStringRef(text: _tokenText);
1050 setRawString(_codePtr - 2);
1051 }
1052
1053 return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1054 } else if (_state.currentChar == u'\\') {
1055 scanChar();
1056 if (_codePtr > _endPtr) {
1057 _state.errorCode = IllegalEscapeSequence;
1058 _errorMessage = QCoreApplication::translate(
1059 context: "QQmlParser", key: "End of file reached at escape sequence");
1060 return T_ERROR;
1061 }
1062
1063 QChar u;
1064
1065 switch (_state.currentChar.unicode()) {
1066 // unicode escape sequence
1067 case u'u': {
1068 bool ok = false;
1069 uint codePoint = decodeUnicodeEscapeCharacter(ok: &ok);
1070 if (!ok)
1071 return T_ERROR;
1072 if (QChar::requiresSurrogates(ucs4: codePoint)) {
1073 // need to use a surrogate pair
1074 _tokenText += QChar(QChar::highSurrogate(ucs4: codePoint));
1075 u = QChar::lowSurrogate(ucs4: codePoint);
1076 } else {
1077 u = QChar(codePoint);
1078 }
1079 } break;
1080
1081 // hex escape sequence
1082 case u'x': {
1083 bool ok = false;
1084 u = decodeHexEscapeCharacter(ok: &ok);
1085 if (!ok) {
1086 _state.errorCode = IllegalHexadecimalEscapeSequence;
1087 _errorMessage = QCoreApplication::translate(
1088 context: "QQmlParser", key: "Illegal hexadecimal escape sequence");
1089 return T_ERROR;
1090 }
1091 } break;
1092
1093 // single character escape sequence
1094 case u'\\': u = u'\\'; scanChar(); break;
1095 case u'\'': u = u'\''; scanChar(); break;
1096 case u'\"': u = u'\"'; scanChar(); break;
1097 case u'b': u = u'\b'; scanChar(); break;
1098 case u'f': u = u'\f'; scanChar(); break;
1099 case u'n': u = u'\n'; scanChar(); break;
1100 case u'r': u = u'\r'; scanChar(); break;
1101 case u't': u = u'\t'; scanChar(); break;
1102 case u'v': u = u'\v'; scanChar(); break;
1103
1104 case u'0':
1105 if (!_codePtr->isDigit()) {
1106 scanChar();
1107 u = u'\0';
1108 break;
1109 }
1110 Q_FALLTHROUGH();
1111 case u'1':
1112 case u'2':
1113 case u'3':
1114 case u'4':
1115 case u'5':
1116 case u'6':
1117 case u'7':
1118 case u'8':
1119 case u'9':
1120 _state.errorCode = IllegalEscapeSequence;
1121 _errorMessage = QCoreApplication::translate(
1122 context: "QQmlParser", key: "Octal escape sequences are not allowed");
1123 return T_ERROR;
1124
1125 case u'\r':
1126 case u'\n':
1127 case 0x2028u:
1128 case 0x2029u:
1129 // uncomment the following to use T_MULTILINE_STRING_LITERAL
1130 // multilineStringLiteral = true;
1131 scanChar();
1132 continue;
1133
1134 default:
1135 // non escape character
1136 u = _state.currentChar;
1137 scanChar();
1138 }
1139
1140 _tokenText += u;
1141 } else {
1142 _tokenText += _state.currentChar;
1143 scanChar();
1144 }
1145 }
1146 if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1147 if (mode == TemplateContinuation)
1148 return T_PARTIAL_TEMPLATE_MIDDLE;
1149 else if (mode == TemplateHead)
1150 return T_PARTIAL_TEMPLATE_HEAD;
1151 else if (mode == SingleQuote)
1152 return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1153 return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1154 }
1155 _state.errorCode = UnclosedStringLiteral;
1156 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unclosed string at end of line");
1157 return T_ERROR;
1158}
1159
1160int Lexer::scanNumber(QChar ch)
1161{
1162 auto scanOptionalNumericSeparator = [this](auto isNextCharacterValid){
1163 if (_state.currentChar == u'_') {
1164 if (peekChar() == u'_') {
1165 _state.errorCode = IllegalNumber;
1166 _errorMessage = QCoreApplication::translate(
1167 context: "QQmlParser",
1168 key: "There can be at most one numeric separator between digits"
1169 );
1170 return false;
1171 }
1172
1173 if (!isNextCharacterValid()) {
1174 _state.errorCode = IllegalNumber;
1175 _errorMessage = QCoreApplication::translate(
1176 context: "QQmlParser",
1177 key: "A trailing numeric separator is not allowed in numeric literals"
1178 );
1179 return false;
1180 }
1181
1182 scanChar();
1183 }
1184
1185 return true;
1186 };
1187
1188 if (ch == u'0') {
1189 if (_state.currentChar == u'x' || _state.currentChar == u'X') {
1190 ch = _state.currentChar; // remember the x or X to use it in the error message below.
1191
1192 // parse hex integer literal
1193 scanChar(); // consume 'x'
1194
1195 if (!isHexDigit(c: _state.currentChar)) {
1196 _state.errorCode = IllegalNumber;
1197 _errorMessage = QCoreApplication::translate(
1198 context: "QQmlParser",
1199 key: "At least one hexadecimal digit is required after '0%1'")
1200 .arg(a: ch);
1201 return T_ERROR;
1202 }
1203
1204 double d = 0.;
1205 while (1) {
1206 int digit = ::hexDigit(c: _state.currentChar);
1207 if (digit < 0)
1208 break;
1209 d *= 16;
1210 d += digit;
1211 scanChar();
1212
1213 if (!scanOptionalNumericSeparator([this](){ return isHexDigit(peekChar()); }))
1214 return T_ERROR;
1215 }
1216
1217 _state.tokenValue = d;
1218 return T_NUMERIC_LITERAL;
1219 } else if (_state.currentChar == u'o' || _state.currentChar == u'O') {
1220 ch = _state.currentChar; // remember the o or O to use it in the error message below.
1221
1222 // parse octal integer literal
1223 scanChar(); // consume 'o'
1224
1225 if (!isOctalDigit(c: _state.currentChar.unicode())) {
1226 _state.errorCode = IllegalNumber;
1227 _errorMessage =
1228 QCoreApplication::translate(
1229 context: "QQmlParser", key: "At least one octal digit is required after '0%1'")
1230 .arg(a: ch);
1231 return T_ERROR;
1232 }
1233
1234 double d = 0.;
1235 while (1) {
1236 int digit = ::octalDigit(c: _state.currentChar);
1237 if (digit < 0)
1238 break;
1239 d *= 8;
1240 d += digit;
1241 scanChar();
1242
1243 if (!scanOptionalNumericSeparator([this](){
1244 return isOctalDigit(c: peekChar().unicode());
1245 })) {
1246 return T_ERROR;
1247 }
1248 }
1249
1250 _state.tokenValue = d;
1251 return T_NUMERIC_LITERAL;
1252 } else if (_state.currentChar == u'b' || _state.currentChar == u'B') {
1253 ch = _state.currentChar; // remember the b or B to use it in the error message below.
1254
1255 // parse binary integer literal
1256 scanChar(); // consume 'b'
1257
1258 if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') {
1259 _state.errorCode = IllegalNumber;
1260 _errorMessage =
1261 QCoreApplication::translate(
1262 context: "QQmlParser", key: "At least one binary digit is required after '0%1'")
1263 .arg(a: ch);
1264 return T_ERROR;
1265 }
1266
1267 double d = 0.;
1268 while (1) {
1269 int digit = 0;
1270 if (_state.currentChar.unicode() == u'1')
1271 digit = 1;
1272 else if (_state.currentChar.unicode() != u'0')
1273 break;
1274 d *= 2;
1275 d += digit;
1276 scanChar();
1277
1278 if (!scanOptionalNumericSeparator([this](){
1279 return peekChar().unicode() == u'0' || peekChar().unicode() == u'1';
1280 })) {
1281 return T_ERROR;
1282 }
1283 }
1284
1285 _state.tokenValue = d;
1286 return T_NUMERIC_LITERAL;
1287 } else if (_state.currentChar.isDigit() && !qmlMode()) {
1288 _state.errorCode = IllegalCharacter;
1289 _errorMessage = QCoreApplication::translate(context: "QQmlParser",
1290 key: "Decimal numbers can't start with '0'");
1291 return T_ERROR;
1292 }
1293 }
1294
1295 // decimal integer literal
1296 QVarLengthArray<char,32> chars;
1297 chars.append(t: ch.unicode());
1298
1299 if (ch != u'.') {
1300 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1301 return T_ERROR;
1302
1303 while (_state.currentChar.isDigit()) {
1304 chars.append(t: _state.currentChar.unicode());
1305 scanChar(); // consume the digit
1306
1307 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1308 return T_ERROR;
1309 }
1310
1311 if (_state.currentChar == u'.') {
1312 chars.append(t: _state.currentChar.unicode());
1313 scanChar(); // consume `.'
1314 }
1315 }
1316
1317 while (_state.currentChar.isDigit()) {
1318 chars.append(t: _state.currentChar.unicode());
1319 scanChar();
1320
1321 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1322 return T_ERROR;
1323 }
1324
1325 if (_state.currentChar == u'e' || _state.currentChar == u'E') {
1326 if (_codePtr[0].isDigit()
1327 || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) {
1328
1329 chars.append(t: _state.currentChar.unicode());
1330 scanChar(); // consume `e'
1331
1332 if (_state.currentChar == u'+' || _state.currentChar == u'-') {
1333 chars.append(t: _state.currentChar.unicode());
1334 scanChar(); // consume the sign
1335 }
1336
1337 while (_state.currentChar.isDigit()) {
1338 chars.append(t: _state.currentChar.unicode());
1339 scanChar();
1340
1341 if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1342 return T_ERROR;
1343 }
1344 }
1345 }
1346
1347 const char *begin = chars.constData();
1348 const char *end = nullptr;
1349 bool ok = false;
1350
1351 _state.tokenValue = qstrntod(s00: begin, len: chars.size(), se: &end, ok: &ok);
1352
1353 if (end - begin != chars.size()) {
1354 _state.errorCode = IllegalExponentIndicator;
1355 _errorMessage =
1356 QCoreApplication::translate(context: "QQmlParser", key: "Illegal syntax for exponential number");
1357 return T_ERROR;
1358 }
1359
1360 return T_NUMERIC_LITERAL;
1361}
1362
1363int Lexer::scanVersionNumber(QChar ch)
1364{
1365 if (ch == u'0') {
1366 _state.tokenValue = 0;
1367 return T_VERSION_NUMBER;
1368 }
1369
1370 int acc = 0;
1371 acc += ch.digitValue();
1372
1373 while (_state.currentChar.isDigit()) {
1374 acc *= 10;
1375 acc += _state.currentChar.digitValue();
1376 scanChar(); // consume the digit
1377 }
1378
1379 _state.tokenValue = acc;
1380 return T_VERSION_NUMBER;
1381}
1382
1383bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
1384{
1385 _tokenText.resize(size: 0);
1386 _state.validTokenText = true;
1387 _state.patternFlags = 0;
1388
1389 if (prefix == EqualPrefix)
1390 _tokenText += u'=';
1391
1392 while (true) {
1393 switch (_state.currentChar.unicode()) {
1394 case u'/':
1395 scanChar();
1396
1397 // scan the flags
1398 _state.patternFlags = 0;
1399 while (isIdentLetter(c: _state.currentChar)) {
1400 int flag = regExpFlagFromChar(ch: _state.currentChar);
1401 if (flag == 0 || _state.patternFlags & flag) {
1402 _errorMessage = QCoreApplication::translate(
1403 context: "QQmlParser", key: "Invalid regular expression flag '%0'")
1404 .arg(a: QChar(_state.currentChar));
1405 return false;
1406 }
1407 _state.patternFlags |= flag;
1408 scanChar();
1409 }
1410
1411 _tokenLength = _codePtr - _tokenStartPtr - 1;
1412 return true;
1413
1414 case u'\\':
1415 // regular expression backslash sequence
1416 _tokenText += _state.currentChar;
1417 scanChar();
1418
1419 if (_codePtr > _endPtr || isLineTerminator()) {
1420 _errorMessage = QCoreApplication::translate(
1421 context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1422 return false;
1423 }
1424
1425 _tokenText += _state.currentChar;
1426 scanChar();
1427 break;
1428
1429 case u'[':
1430 // regular expression class
1431 _tokenText += _state.currentChar;
1432 scanChar();
1433
1434 while (_codePtr <= _endPtr && !isLineTerminator()) {
1435 if (_state.currentChar == u']')
1436 break;
1437 else if (_state.currentChar == u'\\') {
1438 // regular expression backslash sequence
1439 _tokenText += _state.currentChar;
1440 scanChar();
1441
1442 if (_codePtr > _endPtr || isLineTerminator()) {
1443 _errorMessage = QCoreApplication::translate(
1444 context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1445 return false;
1446 }
1447
1448 _tokenText += _state.currentChar;
1449 scanChar();
1450 } else {
1451 _tokenText += _state.currentChar;
1452 scanChar();
1453 }
1454 }
1455
1456 if (_state.currentChar != u']') {
1457 _errorMessage = QCoreApplication::translate(
1458 context: "QQmlParser", key: "Unterminated regular expression class");
1459 return false;
1460 }
1461
1462 _tokenText += _state.currentChar;
1463 scanChar(); // skip ]
1464 break;
1465
1466 default:
1467 if (_codePtr > _endPtr || isLineTerminator()) {
1468 _errorMessage = QCoreApplication::translate(
1469 context: "QQmlParser", key: "Unterminated regular expression literal");
1470 return false;
1471 } else {
1472 _tokenText += _state.currentChar;
1473 scanChar();
1474 }
1475 } // switch
1476 } // while
1477
1478 return false;
1479}
1480
1481bool Lexer::isLineTerminator() const
1482{
1483 const ushort unicode = _state.currentChar.unicode();
1484 return unicode == 0x000Au
1485 || unicode == 0x000Du
1486 || unicode == 0x2028u
1487 || unicode == 0x2029u;
1488}
1489
1490unsigned Lexer::isLineTerminatorSequence() const
1491{
1492 switch (_state.currentChar.unicode()) {
1493 case 0x000Au:
1494 case 0x2028u:
1495 case 0x2029u:
1496 return 1;
1497 case 0x000Du:
1498 if (_codePtr->unicode() == 0x000Au)
1499 return 2;
1500 else
1501 return 1;
1502 default:
1503 return 0;
1504 }
1505}
1506
1507bool Lexer::isIdentLetter(QChar ch)
1508{
1509 // ASCII-biased, since all reserved words are ASCII, aand hence the
1510 // bulk of content to be parsed.
1511 if ((ch >= u'a' && ch <= u'z')
1512 || (ch >= u'A' && ch <= u'Z')
1513 || ch == u'$' || ch == u'_')
1514 return true;
1515 if (ch.unicode() < 128)
1516 return false;
1517 return ch.isLetterOrNumber();
1518}
1519
1520bool Lexer::isDecimalDigit(ushort c)
1521{
1522 return (c >= u'0' && c <= u'9');
1523}
1524
1525bool Lexer::isHexDigit(QChar c)
1526{
1527 return ((c >= u'0' && c <= u'9')
1528 || (c >= u'a' && c <= u'f')
1529 || (c >= u'A' && c <= u'F'));
1530}
1531
1532bool Lexer::isOctalDigit(ushort c)
1533{
1534 return (c >= u'0' && c <= u'7');
1535}
1536
1537QString Lexer::tokenText() const
1538{
1539 if (_state.validTokenText)
1540 return _tokenText;
1541
1542 if (_state.tokenKind == T_STRING_LITERAL)
1543 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1544
1545 return QString(_tokenStartPtr, _tokenLength);
1546}
1547
1548Lexer::Error Lexer::errorCode() const
1549{
1550 return _state.errorCode;
1551}
1552
1553QString Lexer::errorMessage() const
1554{
1555 return _errorMessage;
1556}
1557
1558void Lexer::syncProhibitAutomaticSemicolon()
1559{
1560 if (_state.parenthesesState == BalancedParentheses) {
1561 // we have seen something like "if (foo)", which means we should
1562 // never insert an automatic semicolon at this point, since it would
1563 // then be expanded into an empty statement (ECMA-262 7.9.1)
1564 _state.prohibitAutomaticSemicolon = true;
1565 _state.parenthesesState = IgnoreParentheses;
1566 } else {
1567 _state.prohibitAutomaticSemicolon = false;
1568 }
1569}
1570
1571bool Lexer::prevTerminator() const
1572{
1573 return _state.terminator;
1574}
1575
1576bool Lexer::followsClosingBrace() const
1577{
1578 return _state.followsClosingBrace;
1579}
1580
1581bool Lexer::canInsertAutomaticSemicolon(int token) const
1582{
1583 return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator
1584 || _state.followsClosingBrace;
1585}
1586
1587static const int uriTokens[] = {
1588 QQmlJSGrammar::T_IDENTIFIER,
1589 QQmlJSGrammar::T_PROPERTY,
1590 QQmlJSGrammar::T_SIGNAL,
1591 QQmlJSGrammar::T_READONLY,
1592 QQmlJSGrammar::T_ON,
1593 QQmlJSGrammar::T_BREAK,
1594 QQmlJSGrammar::T_CASE,
1595 QQmlJSGrammar::T_CATCH,
1596 QQmlJSGrammar::T_CONTINUE,
1597 QQmlJSGrammar::T_DEFAULT,
1598 QQmlJSGrammar::T_DELETE,
1599 QQmlJSGrammar::T_DO,
1600 QQmlJSGrammar::T_ELSE,
1601 QQmlJSGrammar::T_FALSE,
1602 QQmlJSGrammar::T_FINALLY,
1603 QQmlJSGrammar::T_FOR,
1604 QQmlJSGrammar::T_FUNCTION,
1605 QQmlJSGrammar::T_IF,
1606 QQmlJSGrammar::T_IN,
1607 QQmlJSGrammar::T_OF,
1608 QQmlJSGrammar::T_INSTANCEOF,
1609 QQmlJSGrammar::T_NEW,
1610 QQmlJSGrammar::T_NULL,
1611 QQmlJSGrammar::T_RETURN,
1612 QQmlJSGrammar::T_SWITCH,
1613 QQmlJSGrammar::T_THIS,
1614 QQmlJSGrammar::T_THROW,
1615 QQmlJSGrammar::T_TRUE,
1616 QQmlJSGrammar::T_TRY,
1617 QQmlJSGrammar::T_TYPEOF,
1618 QQmlJSGrammar::T_VAR,
1619 QQmlJSGrammar::T_VOID,
1620 QQmlJSGrammar::T_WHILE,
1621 QQmlJSGrammar::T_CONST,
1622 QQmlJSGrammar::T_DEBUGGER,
1623 QQmlJSGrammar::T_RESERVED_WORD,
1624 QQmlJSGrammar::T_WITH,
1625
1626 QQmlJSGrammar::EOF_SYMBOL
1627};
1628static inline bool isUriToken(int token)
1629{
1630 const int *current = uriTokens;
1631 while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1632 if (*current == token)
1633 return true;
1634 ++current;
1635 }
1636 return false;
1637}
1638
1639bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error)
1640{
1641 auto setError = [error, this](QString message) {
1642 error->message = std::move(message);
1643 error->loc.startLine = tokenStartLine();
1644 error->loc.startColumn = tokenStartColumn();
1645 };
1646
1647 QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true);
1648 Q_ASSERT(!_qmlMode);
1649
1650 lex(); // fetch the first token
1651
1652 if (_state.tokenKind != T_DOT)
1653 return true;
1654
1655 do {
1656 const int lineNumber = tokenStartLine();
1657 const int column = tokenStartColumn();
1658
1659 lex(); // skip T_DOT
1660
1661 if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT))
1662 return true; // expected a valid QML/JS directive
1663
1664 const QString directiveName = tokenText();
1665
1666 if (! (directiveName == QLatin1String("pragma") ||
1667 directiveName == QLatin1String("import"))) {
1668 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1669 return false; // not a valid directive name
1670 }
1671
1672 // it must be a pragma or an import directive.
1673 if (directiveName == QLatin1String("pragma")) {
1674 // .pragma library
1675 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library"))) {
1676 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1677 return false; // expected `library
1678 }
1679
1680 // we found a .pragma library directive
1681 directives->pragmaLibrary();
1682
1683 } else {
1684 Q_ASSERT(directiveName == QLatin1String("import"));
1685 lex(); // skip .import
1686
1687 QString pathOrUri;
1688 QString version;
1689 bool fileImport = false; // file or uri import
1690
1691 if (_state.tokenKind == T_STRING_LITERAL) {
1692 // .import T_STRING_LITERAL as T_IDENTIFIER
1693
1694 fileImport = true;
1695 pathOrUri = tokenText();
1696
1697 if (!pathOrUri.endsWith(s: QLatin1String("js"))) {
1698 setError(QCoreApplication::translate(context: "QQmlParser",key: "Imported file must be a script"));
1699 return false;
1700 }
1701 lex();
1702
1703 } else if (_state.tokenKind == T_IDENTIFIER) {
1704 // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER
1705 while (true) {
1706 if (!isUriToken(token: _state.tokenKind)) {
1707 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1708 return false;
1709 }
1710
1711 pathOrUri.append(s: tokenText());
1712
1713 lex();
1714 if (tokenStartLine() != lineNumber) {
1715 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1716 return false;
1717 }
1718 if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1719 break;
1720
1721 pathOrUri.append(c: u'.');
1722
1723 lex();
1724 if (tokenStartLine() != lineNumber) {
1725 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1726 return false;
1727 }
1728 }
1729
1730 if (_state.tokenKind == T_VERSION_NUMBER) {
1731 version = tokenText();
1732 lex();
1733 if (_state.tokenKind == T_DOT) {
1734 version += u'.';
1735 lex();
1736 if (_state.tokenKind != T_VERSION_NUMBER) {
1737 setError(QCoreApplication::translate(
1738 context: "QQmlParser", key: "Incomplete version number (dot but no minor)"));
1739 return false; // expected the module version number
1740 }
1741 version += tokenText();
1742 lex();
1743 }
1744 }
1745 }
1746
1747 //
1748 // recognize the mandatory `as' followed by the module name
1749 //
1750 if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1751 if (fileImport)
1752 setError(QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1753 else
1754 setError(QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1755 if (tokenStartLine() != lineNumber) {
1756 error->loc.startLine = lineNumber;
1757 error->loc.startColumn = column;
1758 }
1759 return false; // expected `as'
1760 }
1761
1762 if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) {
1763 if (fileImport)
1764 setError(QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1765 else
1766 setError(QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1767 return false; // expected module name
1768 }
1769
1770 const QString module = tokenText();
1771 if (!module.at(i: 0).isUpper()) {
1772 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid import qualifier"));
1773 return false;
1774 }
1775
1776 if (fileImport)
1777 directives->importFile(jsfile: pathOrUri, module, line: lineNumber, column);
1778 else
1779 directives->importModule(uri: pathOrUri, version, module, line: lineNumber, column);
1780 }
1781
1782 if (tokenStartLine() != lineNumber) {
1783 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1784 return false; // the directives cannot span over multiple lines
1785 }
1786
1787 // fetch the first token after the .pragma/.import directive
1788 lex();
1789 } while (_state.tokenKind == T_DOT);
1790
1791 return true;
1792}
1793
1794const Lexer::State &Lexer::state() const
1795{
1796 return _state;
1797}
1798void Lexer::setState(const Lexer::State &state)
1799{
1800 _state = state;
1801}
1802
1803int Lexer::parseModeFlags() const {
1804 int flags = 0;
1805 if (qmlMode())
1806 flags |= QmlMode|StaticIsKeyword;
1807 if (yieldIsKeyWord())
1808 flags |= YieldIsKeyword;
1809 if (_staticIsKeyword)
1810 flags |= StaticIsKeyword;
1811 return flags;
1812}
1813
1814namespace QQmlJS {
1815QDebug operator<<(QDebug dbg, const Lexer::State &s)
1816{
1817 dbg << "{\n"
1818 << " errorCode:" << int(s.errorCode) << ",\n"
1819 << " currentChar:" << s.currentChar << ",\n"
1820 << " tokenValue:" << s.tokenValue << ",\n"
1821 << " parenthesesState:" << s.parenthesesState << ",\n"
1822 << " parenthesesCount:" << s.parenthesesCount << ",\n"
1823 << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n"
1824 << " bracesCount:" << s.bracesCount << ",\n"
1825 << " stackToken:" << s.stackToken << ",\n"
1826 << " patternFlags:" << s.patternFlags << ",\n"
1827 << " tokenKind:" << s.tokenKind << ",\n"
1828 << " importState:" << int(s.importState) << ",\n"
1829 << " validTokenText:" << s.validTokenText << ",\n"
1830 << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n"
1831 << " restrictedKeyword:" << s.restrictedKeyword << ",\n"
1832 << " terminator:" << s.terminator << ",\n"
1833 << " followsClosingBrace:" << s.followsClosingBrace << ",\n"
1834 << " delimited:" << s.delimited << ",\n"
1835 << " handlingDirectives:" << s.handlingDirectives << ",\n"
1836 << " generatorLevel:" << s.generatorLevel << "\n}";
1837 return dbg;
1838}
1839}
1840
1841QT_END_NAMESPACE
1842

Provided by KDAB

Privacy Policy
Start learning QML with our Intro Training
Find out more

source code of qtdeclarative/src/qml/parser/qqmljslexer.cpp