1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4#include "qqmljslexer_p.h"
5#include "qqmljsengine_p.h"
6#include "qqmljskeywords_p.h"
7
8#include <private/qqmljsdiagnosticmessage_p.h>
9#include <private/qqmljsmemorypool_p.h>
10#include <private/qlocale_tools_p.h>
11
12
13#include <QtCore/qcoreapplication.h>
14#include <QtCore/qvarlengtharray.h>
15#include <QtCore/qdebug.h>
16#include <QtCore/QScopedValueRollback>
17
18#include <optional>
19
20QT_BEGIN_NAMESPACE
21using namespace QQmlJS;
22
23static inline int regExpFlagFromChar(const QChar &ch)
24{
25 switch (ch.unicode()) {
26 case 'g': return Lexer::RegExp_Global;
27 case 'i': return Lexer::RegExp_IgnoreCase;
28 case 'm': return Lexer::RegExp_Multiline;
29 case 'u': return Lexer::RegExp_Unicode;
30 case 'y': return Lexer::RegExp_Sticky;
31 }
32 return 0;
33}
34
35static inline unsigned char convertHex(ushort c)
36{
37 if (c >= '0' && c <= '9')
38 return (c - '0');
39 else if (c >= 'a' && c <= 'f')
40 return (c - 'a' + 10);
41 else
42 return (c - 'A' + 10);
43}
44
45static inline QChar convertHex(QChar c1, QChar c2)
46{
47 return QChar((convertHex(c: c1.unicode()) << 4) + convertHex(c: c2.unicode()));
48}
49
50Lexer::Lexer(Engine *engine, LexMode lexMode)
51 : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true)
52{
53 if (engine)
54 engine->setLexer(this);
55}
56
57bool Lexer::qmlMode() const
58{
59 return _qmlMode;
60}
61
62QString Lexer::code() const
63{
64 return _code;
65}
66
67void Lexer::setCode(const QString &code, int lineno, bool qmlMode,
68 Lexer::CodeContinuation codeContinuation)
69{
70 if (codeContinuation == Lexer::CodeContinuation::Continue)
71 _currentOffset += _code.size();
72 else
73 _currentOffset = 0;
74 if (_engine)
75 _engine->setCode(code);
76
77 _qmlMode = qmlMode;
78 _code = code;
79 _skipLinefeed = false;
80
81 _tokenText.clear();
82 _tokenText.reserve(asize: 1024);
83 _errorMessage.clear();
84 _tokenSpell = QStringView();
85 _rawString = QStringView();
86
87 _codePtr = code.unicode();
88 _endPtr = _codePtr + code.size();
89 _tokenStartPtr = _codePtr;
90
91 if (lineno >= 0)
92 _currentLineNumber = lineno;
93 _currentColumnNumber = 0;
94 _tokenLine = _currentLineNumber;
95 _tokenColumn = 0;
96 _tokenLength = 0;
97
98 if (codeContinuation == Lexer::CodeContinuation::Reset)
99 _state = State {};
100}
101
102void Lexer::scanChar()
103{
104 if (_skipLinefeed) {
105 Q_ASSERT(*_codePtr == u'\n');
106 ++_codePtr;
107 _skipLinefeed = false;
108 }
109 _state.currentChar = *_codePtr++;
110 ++_currentColumnNumber;
111
112 if (isLineTerminator()) {
113 if (_state.currentChar == u'\r') {
114 if (_codePtr < _endPtr && *_codePtr == u'\n')
115 _skipLinefeed = true;
116 _state.currentChar = u'\n';
117 }
118 ++_currentLineNumber;
119 _currentColumnNumber = 0;
120 }
121}
122
123QChar Lexer::peekChar()
124{
125 auto peekPtr = _codePtr;
126 if (peekPtr < _endPtr)
127 return *peekPtr;
128 return QChar();
129}
130
131namespace {
132inline bool isBinop(int tok)
133{
134 switch (tok) {
135 case Lexer::T_AND:
136 case Lexer::T_AND_AND:
137 case Lexer::T_AND_EQ:
138 case Lexer::T_DIVIDE_:
139 case Lexer::T_DIVIDE_EQ:
140 case Lexer::T_EQ:
141 case Lexer::T_EQ_EQ:
142 case Lexer::T_EQ_EQ_EQ:
143 case Lexer::T_GE:
144 case Lexer::T_GT:
145 case Lexer::T_GT_GT:
146 case Lexer::T_GT_GT_EQ:
147 case Lexer::T_GT_GT_GT:
148 case Lexer::T_GT_GT_GT_EQ:
149 case Lexer::T_LE:
150 case Lexer::T_LT:
151 case Lexer::T_LT_LT:
152 case Lexer::T_LT_LT_EQ:
153 case Lexer::T_MINUS:
154 case Lexer::T_MINUS_EQ:
155 case Lexer::T_NOT_EQ:
156 case Lexer::T_NOT_EQ_EQ:
157 case Lexer::T_OR:
158 case Lexer::T_OR_EQ:
159 case Lexer::T_OR_OR:
160 case Lexer::T_PLUS:
161 case Lexer::T_PLUS_EQ:
162 case Lexer::T_REMAINDER:
163 case Lexer::T_REMAINDER_EQ:
164 case Lexer::T_RETURN:
165 case Lexer::T_STAR:
166 case Lexer::T_STAR_EQ:
167 case Lexer::T_XOR:
168 case Lexer::T_XOR_EQ:
169 return true;
170
171 default:
172 return false;
173 }
174}
175
176int hexDigit(QChar c)
177{
178 if (c >= u'0' && c <= u'9')
179 return c.unicode() - u'0';
180 if (c >= u'a' && c <= u'f')
181 return c.unicode() - u'a' + 10;
182 if (c >= u'A' && c <= u'F')
183 return c.unicode() - u'A' + 10;
184 return -1;
185}
186
187int octalDigit(QChar c)
188{
189 if (c >= u'0' && c <= u'7')
190 return c.unicode() - u'0';
191 return -1;
192}
193
194} // anonymous namespace
195
196int Lexer::lex()
197{
198 const int previousTokenKind = _state.tokenKind;
199 int tokenKind;
200 bool firstPass = true;
201
202 again:
203 tokenKind = T_ERROR;
204 _tokenSpell = QStringView();
205 _rawString = QStringView();
206 if (firstPass && _state.stackToken == -1) {
207 firstPass = false;
208 if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
209 return T_EOL;
210
211 if (_state.comments == CommentState::InMultilineComment) {
212 scanChar();
213 _tokenStartPtr = _codePtr - 1;
214 _tokenLine = _currentLineNumber;
215 _tokenColumn = _currentColumnNumber;
216 while (_codePtr <= _endPtr) {
217 if (_state.currentChar == u'*') {
218 scanChar();
219 if (_state.currentChar == u'/') {
220 scanChar();
221 if (_engine) {
222 _engine->addComment(pos: tokenOffset() + 2,
223 len: _codePtr - _tokenStartPtr - 1 - 4,
224 line: tokenStartLine(), col: tokenStartColumn() + 2);
225 }
226 tokenKind = T_COMMENT;
227 break;
228 }
229 } else {
230 scanChar();
231 }
232 }
233 if (tokenKind == T_ERROR)
234 tokenKind = T_PARTIAL_COMMENT;
235 } else {
236 // handle multiline continuation
237 std::optional<ScanStringMode> scanMode;
238 switch (previousTokenKind) {
239 case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
240 scanMode = ScanStringMode::SingleQuote;
241 break;
242 case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
243 scanMode = ScanStringMode::DoubleQuote;
244 break;
245 case T_PARTIAL_TEMPLATE_HEAD:
246 scanMode = ScanStringMode::TemplateHead;
247 break;
248 case T_PARTIAL_TEMPLATE_MIDDLE:
249 scanMode = ScanStringMode::TemplateContinuation;
250 break;
251 default:
252 break;
253 }
254 if (scanMode) {
255 scanChar();
256 _tokenStartPtr = _codePtr - 1;
257 _tokenLine = _currentLineNumber;
258 _tokenColumn = _currentColumnNumber;
259 tokenKind = scanString(mode: *scanMode);
260 }
261 }
262 }
263 if (tokenKind == T_ERROR)
264 tokenKind = scanToken();
265 _tokenLength = _codePtr - _tokenStartPtr - 1;
266 switch (tokenKind) {
267 // end of line and comments should not "overwrite" the old token type...
268 case T_EOL:
269 return tokenKind;
270 case T_COMMENT:
271 _state.comments = CommentState::HadComment;
272 return tokenKind;
273 case T_PARTIAL_COMMENT:
274 _state.comments = CommentState::InMultilineComment;
275 return tokenKind;
276 default:
277 _state.comments = CommentState::NoComment;
278 break;
279 }
280 _state.tokenKind = tokenKind;
281
282 _state.delimited = false;
283 _state.restrictedKeyword = false;
284 _state.followsClosingBrace = (previousTokenKind == T_RBRACE);
285
286 // update the flags
287 switch (_state.tokenKind) {
288 case T_LBRACE:
289 if (_state.bracesCount > 0)
290 ++_state.bracesCount;
291 Q_FALLTHROUGH();
292 case T_SEMICOLON:
293 _state.importState = ImportState::NoQmlImport;
294 Q_FALLTHROUGH();
295 case T_QUESTION:
296 case T_COLON:
297 case T_TILDE:
298 _state.delimited = true;
299 break;
300 case T_AUTOMATIC_SEMICOLON:
301 case T_AS:
302 _state.importState = ImportState::NoQmlImport;
303 Q_FALLTHROUGH();
304 default:
305 if (isBinop(tok: _state.tokenKind))
306 _state.delimited = true;
307 break;
308
309 case T_IMPORT:
310 if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT))
311 _state.importState = ImportState::SawImport;
312 if (isBinop(tok: _state.tokenKind))
313 _state.delimited = true;
314 break;
315
316 case T_IF:
317 case T_FOR:
318 case T_WHILE:
319 case T_WITH:
320 _state.parenthesesState = CountParentheses;
321 _state.parenthesesCount = 0;
322 break;
323
324 case T_ELSE:
325 case T_DO:
326 _state.parenthesesState = BalancedParentheses;
327 break;
328
329 case T_CONTINUE:
330 case T_BREAK:
331 case T_RETURN:
332 case T_YIELD:
333 case T_THROW:
334 _state.restrictedKeyword = true;
335 break;
336 case T_RBRACE:
337 if (_state.bracesCount > 0)
338 --_state.bracesCount;
339 if (_state.bracesCount == 0)
340 goto again;
341 } // switch
342
343 // update the parentheses state
344 switch (_state.parenthesesState) {
345 case IgnoreParentheses:
346 break;
347
348 case CountParentheses:
349 if (_state.tokenKind == T_RPAREN) {
350 --_state.parenthesesCount;
351 if (_state.parenthesesCount == 0)
352 _state.parenthesesState = BalancedParentheses;
353 } else if (_state.tokenKind == T_LPAREN) {
354 ++_state.parenthesesCount;
355 }
356 break;
357
358 case BalancedParentheses:
359 if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
360 _state.parenthesesState = IgnoreParentheses;
361 break;
362 } // switch
363
364 return _state.tokenKind;
365}
366
367uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
368{
369 Q_ASSERT(_state.currentChar == u'u');
370 scanChar(); // skip u
371 if (_codePtr + 4 <= _endPtr && isHexDigit(c: _state.currentChar)) {
372 uint codePoint = 0;
373 for (int i = 0; i < 4; ++i) {
374 int digit = hexDigit(c: _state.currentChar);
375 if (digit < 0)
376 goto error;
377 codePoint *= 16;
378 codePoint += digit;
379 scanChar();
380 }
381
382 *ok = true;
383 return codePoint;
384 } else if (_codePtr < _endPtr && _state.currentChar == u'{') {
385 scanChar(); // skip '{'
386 uint codePoint = 0;
387 if (!isHexDigit(c: _state.currentChar))
388 // need at least one hex digit
389 goto error;
390
391 while (_codePtr <= _endPtr) {
392 int digit = hexDigit(c: _state.currentChar);
393 if (digit < 0)
394 break;
395 codePoint *= 16;
396 codePoint += digit;
397 if (codePoint > 0x10ffff)
398 goto error;
399 scanChar();
400 }
401
402 if (_state.currentChar != u'}')
403 goto error;
404
405 scanChar(); // skip '}'
406
407
408 *ok = true;
409 return codePoint;
410 }
411
412error:
413 _state.errorCode = IllegalUnicodeEscapeSequence;
414 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Illegal unicode escape sequence");
415
416 *ok = false;
417 return 0;
418}
419
420QChar Lexer::decodeHexEscapeCharacter(bool *ok)
421{
422 if (isHexDigit(c: _codePtr[0]) && isHexDigit(c: _codePtr[1])) {
423 scanChar();
424
425 const QChar c1 = _state.currentChar;
426 scanChar();
427
428 const QChar c2 = _state.currentChar;
429 scanChar();
430
431 if (ok)
432 *ok = true;
433
434 return convertHex(c1, c2);
435 }
436
437 *ok = false;
438 return QChar();
439}
440
441namespace QQmlJS {
442QDebug operator<<(QDebug dbg, const Lexer &l)
443{
444 dbg << "{\n"
445 << " engine:" << qsizetype(l._engine) << ",\n"
446 << " lexMode:" << int(l._lexMode) << ",\n"
447 << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n"
448 << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n"
449 << " qmlMode:" << l._qmlMode << ",\n"
450 << " staticIsKeyword:" << l._staticIsKeyword << ",\n"
451 << " currentLineNumber:" << l._currentLineNumber << ",\n"
452 << " currentColumnNumber:" << l._currentColumnNumber << ",\n"
453 << " currentOffset:" << l._currentOffset << ",\n"
454 << " tokenLength:" << l._tokenLength << ",\n"
455 << " tokenLine:" << l._tokenLine << ",\n"
456 << " tokenColumn:" << l._tokenColumn << ",\n"
457 << " tokenText:" << l._tokenText << ",\n"
458 << " skipLinefeed:" << l._skipLinefeed << ",\n"
459 << " errorMessage:" << l._errorMessage << ",\n"
460 << " tokenSpell:" << l._tokenSpell << ",\n"
461 << " rawString:" << l._rawString << ",\n";
462 if (l._codePtr)
463 dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n";
464 else
465 dbg << " codePtr: *null*,\n";
466 if (l._tokenStartPtr)
467 dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n";
468 else
469 dbg << " tokenStartPtr: *null*,\n";
470 dbg << " state:" << l._state << "\n}";
471 return dbg;
472}
473}
474
475static inline bool isIdentifierStart(uint ch)
476{
477 // fast path for ascii
478 if ((ch >= u'a' && ch <= u'z') ||
479 (ch >= u'A' && ch <= u'Z') ||
480 ch == u'$' || ch == u'_')
481 return true;
482
483 switch (QChar::category(ucs4: ch)) {
484 case QChar::Number_Letter:
485 case QChar::Letter_Uppercase:
486 case QChar::Letter_Lowercase:
487 case QChar::Letter_Titlecase:
488 case QChar::Letter_Modifier:
489 case QChar::Letter_Other:
490 return true;
491 default:
492 break;
493 }
494 return false;
495}
496
497static bool isIdentifierPart(uint ch)
498{
499 // fast path for ascii
500 if ((ch >= u'a' && ch <= u'z') ||
501 (ch >= u'A' && ch <= u'Z') ||
502 (ch >= u'0' && ch <= u'9') ||
503 ch == u'$' || ch == u'_' ||
504 ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */)
505 return true;
506
507 switch (QChar::category(ucs4: ch)) {
508 case QChar::Mark_NonSpacing:
509 case QChar::Mark_SpacingCombining:
510
511 case QChar::Number_DecimalDigit:
512 case QChar::Number_Letter:
513
514 case QChar::Letter_Uppercase:
515 case QChar::Letter_Lowercase:
516 case QChar::Letter_Titlecase:
517 case QChar::Letter_Modifier:
518 case QChar::Letter_Other:
519
520 case QChar::Punctuation_Connector:
521 return true;
522 default:
523 break;
524 }
525 return false;
526}
527
528int Lexer::scanToken()
529{
530 if (_state.stackToken != -1) {
531 int tk = _state.stackToken;
532 _state.stackToken = -1;
533 return tk;
534 }
535
536 if (_state.bracesCount == 0) {
537 // we're inside a Template string
538 return scanString(mode: TemplateContinuation);
539 }
540
541 if (_state.comments == CommentState::NoComment)
542 _state.terminator = false;
543
544again:
545 _state.validTokenText = false;
546
547 while (_state.currentChar.isSpace()) {
548 if (isLineTerminator()) {
549 bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr;
550 if (_state.restrictedKeyword) {
551 // automatic semicolon insertion
552 _tokenLine = _currentLineNumber;
553 _tokenColumn = _currentColumnNumber;
554 _tokenStartPtr = _codePtr - 1;
555 return T_SEMICOLON;
556 } else if (_lexMode == LexMode::WholeCode || !isAtEnd) {
557 _state.terminator = true;
558 syncProhibitAutomaticSemicolon();
559 } // else we will do the previous things at the start of next line...
560 }
561
562 scanChar();
563 }
564
565 _tokenStartPtr = _codePtr - 1;
566 _tokenLine = _currentLineNumber;
567 _tokenColumn = _currentColumnNumber;
568
569 if (_codePtr >= _endPtr) {
570 if (_lexMode == LexMode::LineByLine) {
571 if (!_code.isEmpty()) {
572 _state.currentChar = *(_codePtr - 2);
573 return T_EOL;
574 } else {
575 return EOF_SYMBOL;
576 }
577 } else if (_codePtr > _endPtr) {
578 return EOF_SYMBOL;
579 }
580 }
581
582 const QChar ch = _state.currentChar;
583 scanChar();
584
585 switch (ch.unicode()) {
586 case u'~': return T_TILDE;
587 case u'}': return T_RBRACE;
588
589 case u'|':
590 if (_state.currentChar == u'|') {
591 scanChar();
592 return T_OR_OR;
593 } else if (_state.currentChar == u'=') {
594 scanChar();
595 return T_OR_EQ;
596 }
597 return T_OR;
598
599 case u'{': return T_LBRACE;
600
601 case u'^':
602 if (_state.currentChar == u'=') {
603 scanChar();
604 return T_XOR_EQ;
605 }
606 return T_XOR;
607
608 case u']': return T_RBRACKET;
609 case u'[': return T_LBRACKET;
610 case u'?': {
611 if (_state.currentChar == u'?') {
612 scanChar();
613 return T_QUESTION_QUESTION;
614 }
615 if (_state.currentChar == u'.' && !peekChar().isDigit()) {
616 scanChar();
617 return T_QUESTION_DOT;
618 }
619
620 return T_QUESTION;
621 }
622
623 case u'>':
624 if (_state.currentChar == u'>') {
625 scanChar();
626 if (_state.currentChar == u'>') {
627 scanChar();
628 if (_state.currentChar == u'=') {
629 scanChar();
630 return T_GT_GT_GT_EQ;
631 }
632 return T_GT_GT_GT;
633 } else if (_state.currentChar == u'=') {
634 scanChar();
635 return T_GT_GT_EQ;
636 }
637 return T_GT_GT;
638 } else if (_state.currentChar == u'=') {
639 scanChar();
640 return T_GE;
641 }
642 return T_GT;
643
644 case u'=':
645 if (_state.currentChar == u'=') {
646 scanChar();
647 if (_state.currentChar == u'=') {
648 scanChar();
649 return T_EQ_EQ_EQ;
650 }
651 return T_EQ_EQ;
652 } else if (_state.currentChar == u'>') {
653 scanChar();
654 return T_ARROW;
655 }
656 return T_EQ;
657
658 case u'<':
659 if (_state.currentChar == u'=') {
660 scanChar();
661 return T_LE;
662 } else if (_state.currentChar == u'<') {
663 scanChar();
664 if (_state.currentChar == u'=') {
665 scanChar();
666 return T_LT_LT_EQ;
667 }
668 return T_LT_LT;
669 }
670 return T_LT;
671
672 case u';': return T_SEMICOLON;
673 case u':': return T_COLON;
674
675 case u'/':
676 switch (_state.currentChar.unicode()) {
677 case u'*':
678 scanChar();
679 while (_codePtr <= _endPtr) {
680 if (_state.currentChar == u'*') {
681 scanChar();
682 if (_state.currentChar == u'/') {
683 scanChar();
684 if (_engine) {
685 _engine->addComment(pos: tokenOffset() + 2,
686 len: _codePtr - _tokenStartPtr - 1 - 4, line: tokenStartLine(),
687 col: tokenStartColumn() + 2);
688 }
689 if (_lexMode == LexMode::LineByLine)
690 return T_COMMENT;
691 else
692 goto again;
693 }
694 } else {
695 scanChar();
696 }
697 }
698 if (_lexMode == LexMode::LineByLine)
699 return T_PARTIAL_COMMENT;
700 else
701 goto again;
702 case u'/':
703 while (_codePtr <= _endPtr && !isLineTerminator()) {
704 scanChar();
705 }
706 if (_engine) {
707 _engine->addComment(pos: tokenOffset() + 2, len: _codePtr - _tokenStartPtr - 1 - 2,
708 line: tokenStartLine(), col: tokenStartColumn() + 2);
709 }
710 if (_lexMode == LexMode::LineByLine)
711 return T_COMMENT;
712 else
713 goto again;
714 case u'=':
715 scanChar();
716 return T_DIVIDE_EQ;
717 default:
718 return T_DIVIDE_;
719 }
720 case u'.':
721 if (_state.importState == ImportState::SawImport)
722 return T_DOT;
723 if (isDecimalDigit(c: _state.currentChar.unicode()))
724 return scanNumber(ch);
725 if (_state.currentChar == u'.') {
726 scanChar();
727 if (_state.currentChar == u'.') {
728 scanChar();
729 return T_ELLIPSIS;
730 } else {
731 _state.errorCode = IllegalCharacter;
732 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unexpected token '.'");
733 return T_ERROR;
734 }
735 }
736 return T_DOT;
737
738 case u'-':
739 if (_state.currentChar == u'=') {
740 scanChar();
741 return T_MINUS_EQ;
742 } else if (_state.currentChar == u'-') {
743 scanChar();
744
745 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
746 && _state.tokenKind != T_LPAREN) {
747 _state.stackToken = T_MINUS_MINUS;
748 return T_SEMICOLON;
749 }
750
751 return T_MINUS_MINUS;
752 }
753 return T_MINUS;
754
755 case u',': return T_COMMA;
756
757 case u'+':
758 if (_state.currentChar == u'=') {
759 scanChar();
760 return T_PLUS_EQ;
761 } else if (_state.currentChar == u'+') {
762 scanChar();
763
764 if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
765 && _state.tokenKind != T_LPAREN) {
766 _state.stackToken = T_PLUS_PLUS;
767 return T_SEMICOLON;
768 }
769
770 return T_PLUS_PLUS;
771 }
772 return T_PLUS;
773
774 case u'*':
775 if (_state.currentChar == u'=') {
776 scanChar();
777 return T_STAR_EQ;
778 } else if (_state.currentChar == u'*') {
779 scanChar();
780 if (_state.currentChar == u'=') {
781 scanChar();
782 return T_STAR_STAR_EQ;
783 }
784 return T_STAR_STAR;
785 }
786 return T_STAR;
787
788 case u')': return T_RPAREN;
789 case u'(': return T_LPAREN;
790
791 case u'@': return T_AT;
792
793 case u'&':
794 if (_state.currentChar == u'=') {
795 scanChar();
796 return T_AND_EQ;
797 } else if (_state.currentChar == u'&') {
798 scanChar();
799 return T_AND_AND;
800 }
801 return T_AND;
802
803 case u'%':
804 if (_state.currentChar == u'=') {
805 scanChar();
806 return T_REMAINDER_EQ;
807 }
808 return T_REMAINDER;
809
810 case u'!':
811 if (_state.currentChar == u'=') {
812 scanChar();
813 if (_state.currentChar == u'=') {
814 scanChar();
815 return T_NOT_EQ_EQ;
816 }
817 return T_NOT_EQ;
818 }
819 return T_NOT;
820
821 case u'`':
822 _state.outerTemplateBraceCount.push(t: _state.bracesCount);
823 Q_FALLTHROUGH();
824 case u'\'':
825 case u'"':
826 return scanString(mode: ScanStringMode(ch.unicode()));
827 case u'0':
828 case u'1':
829 case u'2':
830 case u'3':
831 case u'4':
832 case u'5':
833 case u'6':
834 case u'7':
835 case u'8':
836 case u'9':
837 if (_state.importState == ImportState::SawImport)
838 return scanVersionNumber(ch);
839 else
840 return scanNumber(ch);
841
842 case '#':
843 if (_currentLineNumber == 1 && _currentColumnNumber == 2) {
844 // shebang support
845 while (_codePtr <= _endPtr && !isLineTerminator()) {
846 scanChar();
847 }
848 if (_engine) {
849 _engine->addComment(pos: tokenOffset(), len: _codePtr - _tokenStartPtr - 1, line: tokenStartLine(),
850 col: tokenStartColumn());
851 }
852 if (_lexMode == LexMode::LineByLine)
853 return T_COMMENT;
854 else
855 goto again;
856 }
857 Q_FALLTHROUGH();
858
859 default: {
860 uint c = ch.unicode();
861 bool identifierWithEscapeChars = false;
862 if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _state.currentChar.unicode())) {
863 c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
864 scanChar();
865 } else if (c == '\\' && _state.currentChar == u'u') {
866 identifierWithEscapeChars = true;
867 bool ok = false;
868 c = decodeUnicodeEscapeCharacter(ok: &ok);
869 if (!ok)
870 return T_ERROR;
871 }
872 if (isIdentifierStart(ch: c)) {
873 if (identifierWithEscapeChars) {
874 _tokenText.resize(size: 0);
875 if (QChar::requiresSurrogates(ucs4: c)) {
876 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
877 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
878 } else {
879 _tokenText += QChar(c);
880 }
881 _state.validTokenText = true;
882 }
883 while (_codePtr <= _endPtr) {
884 c = _state.currentChar.unicode();
885 if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _codePtr->unicode())) {
886 scanChar();
887 c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
888 } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') {
889 if (!identifierWithEscapeChars) {
890 identifierWithEscapeChars = true;
891 _tokenText.resize(size: 0);
892 _tokenText.insert(i: 0, uc: _tokenStartPtr, len: _codePtr - _tokenStartPtr - 1);
893 _state.validTokenText = true;
894 }
895
896 scanChar(); // skip '\\'
897 bool ok = false;
898 c = decodeUnicodeEscapeCharacter(ok: &ok);
899 if (!ok)
900 return T_ERROR;
901
902 if (!isIdentifierPart(ch: c))
903 break;
904
905 if (QChar::requiresSurrogates(ucs4: c)) {
906 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
907 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
908 } else {
909 _tokenText += QChar(c);
910 }
911 continue;
912 }
913
914 if (!isIdentifierPart(ch: c))
915 break;
916
917 if (identifierWithEscapeChars) {
918 if (QChar::requiresSurrogates(ucs4: c)) {
919 _tokenText += QChar(QChar::highSurrogate(ucs4: c));
920 _tokenText += QChar(QChar::lowSurrogate(ucs4: c));
921 } else {
922 _tokenText += QChar(c);
923 }
924 }
925 scanChar();
926 }
927
928 _tokenLength = _codePtr - _tokenStartPtr - 1;
929
930 int kind = T_IDENTIFIER;
931
932 if (!identifierWithEscapeChars)
933 kind = classify(s: _tokenStartPtr, n: _tokenLength, parseModeFlags: parseModeFlags());
934
935 if (kind == T_FUNCTION) {
936 continue_skipping:
937 while (_codePtr < _endPtr && _state.currentChar.isSpace())
938 scanChar();
939 if (_state.currentChar == u'*') {
940 _tokenLength = _codePtr - _tokenStartPtr - 1;
941 kind = T_FUNCTION_STAR;
942 scanChar();
943 } else if (_state.currentChar == u'/') {
944 scanChar();
945 switch (_state.currentChar.unicode()) {
946 case u'*':
947 scanChar();
948 while (_codePtr <= _endPtr) {
949 if (_state.currentChar == u'*') {
950 scanChar();
951 if (_state.currentChar == u'/') {
952 scanChar();
953 if (_engine) {
954 _engine->addComment(pos: tokenOffset() + 2,
955 len: _codePtr - _tokenStartPtr - 1 - 4,
956 line: tokenStartLine(),
957 col: tokenStartColumn() + 2);
958 }
959 if (_lexMode == LexMode::LineByLine)
960 return T_COMMENT;
961 goto continue_skipping;
962 }
963 } else {
964 scanChar();
965 }
966 }
967 if (_lexMode == LexMode::LineByLine)
968 return T_PARTIAL_COMMENT;
969 else
970 goto continue_skipping;
971 case u'/':
972 while (_codePtr <= _endPtr && !isLineTerminator()) {
973 scanChar();
974 }
975 if (_engine) {
976 _engine->addComment(pos: tokenOffset() + 2,
977 len: _codePtr - _tokenStartPtr - 1 - 2,
978 line: tokenStartLine(), col: tokenStartColumn() + 2);
979 }
980 if (_lexMode == LexMode::LineByLine)
981 return T_COMMENT;
982 else
983 goto continue_skipping;
984 default:
985 break;
986 }
987 }
988 }
989
990 if (_engine) {
991 if (kind == T_IDENTIFIER && identifierWithEscapeChars)
992 _tokenSpell = _engine->newStringRef(text: _tokenText);
993 else
994 _tokenSpell = _engine->midRef(position: _tokenStartPtr - _code.unicode(), size: _tokenLength);
995 }
996
997 return kind;
998 }
999 }
1000
1001 break;
1002 }
1003
1004 return T_ERROR;
1005}
1006
1007int Lexer::scanString(ScanStringMode mode)
1008{
1009 QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode);
1010 // we actually use T_STRING_LITERAL also for multiline strings, should we want to
1011 // change that we should set it to:
1012 // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL ||
1013 // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL
1014 // here and uncomment the multilineStringLiteral = true below.
1015 bool multilineStringLiteral = false;
1016
1017 const QChar *startCode = _codePtr - 1;
1018 // in case we just parsed a \r, we need to reset this flag to get things working
1019 // correctly in the loop below and afterwards
1020 _skipLinefeed = false;
1021 bool first = true;
1022
1023 if (_engine) {
1024 while (_codePtr <= _endPtr) {
1025 if (isLineTerminator()) {
1026 if ((quote == u'`' || qmlMode())) {
1027 if (first)
1028 --_currentLineNumber; // will be read again in scanChar()
1029 break;
1030 }
1031 _state.errorCode = IllegalCharacter;
1032 _errorMessage = QCoreApplication::translate(context: "QQmlParser",
1033 key: "Stray newline in string literal");
1034 return T_ERROR;
1035 } else if (_state.currentChar == u'\\') {
1036 break;
1037 } else if (_state.currentChar == u'$' && quote == u'`') {
1038 break;
1039 } else if (_state.currentChar == quote) {
1040 _tokenSpell =
1041 _engine->midRef(position: startCode - _code.unicode(), size: _codePtr - startCode - 1);
1042 _rawString = _tokenSpell;
1043 scanChar();
1044
1045 if (quote == u'`')
1046 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1047 if (mode == TemplateHead)
1048 return T_NO_SUBSTITUTION_TEMPLATE;
1049 else if (mode == TemplateContinuation)
1050 return T_TEMPLATE_TAIL;
1051 else if (multilineStringLiteral)
1052 return T_MULTILINE_STRING_LITERAL;
1053 else
1054 return T_STRING_LITERAL;
1055 }
1056 // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result
1057 _state.currentChar = *_codePtr++;
1058 ++_currentColumnNumber;
1059 first = false;
1060 }
1061 }
1062
1063 // rewind by one char, so things gets scanned correctly
1064 --_codePtr;
1065 --_currentColumnNumber;
1066
1067 _state.validTokenText = true;
1068 _tokenText = QString(startCode, _codePtr - startCode);
1069
1070 auto setRawString = [&](const QChar *end) {
1071 QString raw(startCode, end - startCode - 1);
1072 raw.replace(before: QLatin1String("\r\n"), after: QLatin1String("\n"));
1073 raw.replace(before: u'\r', after: u'\n');
1074 _rawString = _engine->newStringRef(text: raw);
1075 };
1076
1077 scanChar();
1078
1079 while (_codePtr <= _endPtr) {
1080 if (_state.currentChar == quote) {
1081 scanChar();
1082
1083 if (_engine) {
1084 _tokenSpell = _engine->newStringRef(text: _tokenText);
1085 if (quote == u'`')
1086 setRawString(_codePtr - 1);
1087 }
1088
1089 if (quote == u'`')
1090 _state.bracesCount = _state.outerTemplateBraceCount.pop();
1091
1092 if (mode == TemplateContinuation)
1093 return T_TEMPLATE_TAIL;
1094 else if (mode == TemplateHead)
1095 return T_NO_SUBSTITUTION_TEMPLATE;
1096
1097 return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1098 } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') {
1099 scanChar();
1100 scanChar();
1101 _state.bracesCount = 1;
1102 if (_engine) {
1103 _tokenSpell = _engine->newStringRef(text: _tokenText);
1104 setRawString(_codePtr - 2);
1105 }
1106
1107 return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1108 } else if (_state.currentChar == u'\\') {
1109 scanChar();
1110 if (_codePtr > _endPtr) {
1111 _state.errorCode = IllegalEscapeSequence;
1112 _errorMessage = QCoreApplication::translate(
1113 context: "QQmlParser", key: "End of file reached at escape sequence");
1114 return T_ERROR;
1115 }
1116
1117 QChar u;
1118
1119 switch (_state.currentChar.unicode()) {
1120 // unicode escape sequence
1121 case u'u': {
1122 bool ok = false;
1123 uint codePoint = decodeUnicodeEscapeCharacter(ok: &ok);
1124 if (!ok)
1125 return T_ERROR;
1126 if (QChar::requiresSurrogates(ucs4: codePoint)) {
1127 // need to use a surrogate pair
1128 _tokenText += QChar(QChar::highSurrogate(ucs4: codePoint));
1129 u = QChar::lowSurrogate(ucs4: codePoint);
1130 } else {
1131 u = QChar(codePoint);
1132 }
1133 } break;
1134
1135 // hex escape sequence
1136 case u'x': {
1137 bool ok = false;
1138 u = decodeHexEscapeCharacter(ok: &ok);
1139 if (!ok) {
1140 _state.errorCode = IllegalHexadecimalEscapeSequence;
1141 _errorMessage = QCoreApplication::translate(
1142 context: "QQmlParser", key: "Illegal hexadecimal escape sequence");
1143 return T_ERROR;
1144 }
1145 } break;
1146
1147 // single character escape sequence
1148 case u'\\': u = u'\\'; scanChar(); break;
1149 case u'\'': u = u'\''; scanChar(); break;
1150 case u'\"': u = u'\"'; scanChar(); break;
1151 case u'b': u = u'\b'; scanChar(); break;
1152 case u'f': u = u'\f'; scanChar(); break;
1153 case u'n': u = u'\n'; scanChar(); break;
1154 case u'r': u = u'\r'; scanChar(); break;
1155 case u't': u = u'\t'; scanChar(); break;
1156 case u'v': u = u'\v'; scanChar(); break;
1157
1158 case u'0':
1159 if (!_codePtr->isDigit()) {
1160 scanChar();
1161 u = u'\0';
1162 break;
1163 }
1164 Q_FALLTHROUGH();
1165 case u'1':
1166 case u'2':
1167 case u'3':
1168 case u'4':
1169 case u'5':
1170 case u'6':
1171 case u'7':
1172 case u'8':
1173 case u'9':
1174 _state.errorCode = IllegalEscapeSequence;
1175 _errorMessage = QCoreApplication::translate(
1176 context: "QQmlParser", key: "Octal escape sequences are not allowed");
1177 return T_ERROR;
1178
1179 case u'\r':
1180 case u'\n':
1181 case 0x2028u:
1182 case 0x2029u:
1183 // uncomment the following to use T_MULTILINE_STRING_LITERAL
1184 // multilineStringLiteral = true;
1185 scanChar();
1186 continue;
1187
1188 default:
1189 // non escape character
1190 u = _state.currentChar;
1191 scanChar();
1192 }
1193
1194 _tokenText += u;
1195 } else {
1196 _tokenText += _state.currentChar;
1197 scanChar();
1198 }
1199 }
1200 if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1201 if (mode == TemplateContinuation)
1202 return T_PARTIAL_TEMPLATE_MIDDLE;
1203 else if (mode == TemplateHead)
1204 return T_PARTIAL_TEMPLATE_HEAD;
1205 else if (mode == SingleQuote)
1206 return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1207 return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1208 }
1209 _state.errorCode = UnclosedStringLiteral;
1210 _errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unclosed string at end of line");
1211 return T_ERROR;
1212}
1213
1214int Lexer::scanNumber(QChar ch)
1215{
1216 if (ch == u'0') {
1217 if (_state.currentChar == u'x' || _state.currentChar == u'X') {
1218 ch = _state.currentChar; // remember the x or X to use it in the error message below.
1219
1220 // parse hex integer literal
1221 scanChar(); // consume 'x'
1222
1223 if (!isHexDigit(c: _state.currentChar)) {
1224 _state.errorCode = IllegalNumber;
1225 _errorMessage = QCoreApplication::translate(
1226 context: "QQmlParser",
1227 key: "At least one hexadecimal digit is required after '0%1'")
1228 .arg(a: ch);
1229 return T_ERROR;
1230 }
1231
1232 double d = 0.;
1233 while (1) {
1234 int digit = ::hexDigit(c: _state.currentChar);
1235 if (digit < 0)
1236 break;
1237 d *= 16;
1238 d += digit;
1239 scanChar();
1240 }
1241
1242 _state.tokenValue = d;
1243 return T_NUMERIC_LITERAL;
1244 } else if (_state.currentChar == u'o' || _state.currentChar == u'O') {
1245 ch = _state.currentChar; // remember the o or O to use it in the error message below.
1246
1247 // parse octal integer literal
1248 scanChar(); // consume 'o'
1249
1250 if (!isOctalDigit(c: _state.currentChar.unicode())) {
1251 _state.errorCode = IllegalNumber;
1252 _errorMessage =
1253 QCoreApplication::translate(
1254 context: "QQmlParser", key: "At least one octal digit is required after '0%1'")
1255 .arg(a: ch);
1256 return T_ERROR;
1257 }
1258
1259 double d = 0.;
1260 while (1) {
1261 int digit = ::octalDigit(c: _state.currentChar);
1262 if (digit < 0)
1263 break;
1264 d *= 8;
1265 d += digit;
1266 scanChar();
1267 }
1268
1269 _state.tokenValue = d;
1270 return T_NUMERIC_LITERAL;
1271 } else if (_state.currentChar == u'b' || _state.currentChar == u'B') {
1272 ch = _state.currentChar; // remember the b or B to use it in the error message below.
1273
1274 // parse binary integer literal
1275 scanChar(); // consume 'b'
1276
1277 if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') {
1278 _state.errorCode = IllegalNumber;
1279 _errorMessage =
1280 QCoreApplication::translate(
1281 context: "QQmlParser", key: "At least one binary digit is required after '0%1'")
1282 .arg(a: ch);
1283 return T_ERROR;
1284 }
1285
1286 double d = 0.;
1287 while (1) {
1288 int digit = 0;
1289 if (_state.currentChar.unicode() == u'1')
1290 digit = 1;
1291 else if (_state.currentChar.unicode() != u'0')
1292 break;
1293 d *= 2;
1294 d += digit;
1295 scanChar();
1296 }
1297
1298 _state.tokenValue = d;
1299 return T_NUMERIC_LITERAL;
1300 } else if (_state.currentChar.isDigit() && !qmlMode()) {
1301 _state.errorCode = IllegalCharacter;
1302 _errorMessage = QCoreApplication::translate(context: "QQmlParser",
1303 key: "Decimal numbers can't start with '0'");
1304 return T_ERROR;
1305 }
1306 }
1307
1308 // decimal integer literal
1309 QVarLengthArray<char,32> chars;
1310 chars.append(t: ch.unicode());
1311
1312 if (ch != u'.') {
1313 while (_state.currentChar.isDigit()) {
1314 chars.append(t: _state.currentChar.unicode());
1315 scanChar(); // consume the digit
1316 }
1317
1318 if (_state.currentChar == u'.') {
1319 chars.append(t: _state.currentChar.unicode());
1320 scanChar(); // consume `.'
1321 }
1322 }
1323
1324 while (_state.currentChar.isDigit()) {
1325 chars.append(t: _state.currentChar.unicode());
1326 scanChar();
1327 }
1328
1329 if (_state.currentChar == u'e' || _state.currentChar == u'E') {
1330 if (_codePtr[0].isDigit()
1331 || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) {
1332
1333 chars.append(t: _state.currentChar.unicode());
1334 scanChar(); // consume `e'
1335
1336 if (_state.currentChar == u'+' || _state.currentChar == u'-') {
1337 chars.append(t: _state.currentChar.unicode());
1338 scanChar(); // consume the sign
1339 }
1340
1341 while (_state.currentChar.isDigit()) {
1342 chars.append(t: _state.currentChar.unicode());
1343 scanChar();
1344 }
1345 }
1346 }
1347
1348 const char *begin = chars.constData();
1349 const char *end = nullptr;
1350 bool ok = false;
1351
1352 _state.tokenValue = qstrntod(s00: begin, len: chars.size(), se: &end, ok: &ok);
1353
1354 if (end - begin != chars.size()) {
1355 _state.errorCode = IllegalExponentIndicator;
1356 _errorMessage =
1357 QCoreApplication::translate(context: "QQmlParser", key: "Illegal syntax for exponential number");
1358 return T_ERROR;
1359 }
1360
1361 return T_NUMERIC_LITERAL;
1362}
1363
1364int Lexer::scanVersionNumber(QChar ch)
1365{
1366 if (ch == u'0') {
1367 _state.tokenValue = 0;
1368 return T_VERSION_NUMBER;
1369 }
1370
1371 int acc = 0;
1372 acc += ch.digitValue();
1373
1374 while (_state.currentChar.isDigit()) {
1375 acc *= 10;
1376 acc += _state.currentChar.digitValue();
1377 scanChar(); // consume the digit
1378 }
1379
1380 _state.tokenValue = acc;
1381 return T_VERSION_NUMBER;
1382}
1383
1384bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
1385{
1386 _tokenText.resize(size: 0);
1387 _state.validTokenText = true;
1388 _state.patternFlags = 0;
1389
1390 if (prefix == EqualPrefix)
1391 _tokenText += u'=';
1392
1393 while (true) {
1394 switch (_state.currentChar.unicode()) {
1395 case u'/':
1396 scanChar();
1397
1398 // scan the flags
1399 _state.patternFlags = 0;
1400 while (isIdentLetter(c: _state.currentChar)) {
1401 int flag = regExpFlagFromChar(ch: _state.currentChar);
1402 if (flag == 0 || _state.patternFlags & flag) {
1403 _errorMessage = QCoreApplication::translate(
1404 context: "QQmlParser", key: "Invalid regular expression flag '%0'")
1405 .arg(a: QChar(_state.currentChar));
1406 return false;
1407 }
1408 _state.patternFlags |= flag;
1409 scanChar();
1410 }
1411
1412 _tokenLength = _codePtr - _tokenStartPtr - 1;
1413 return true;
1414
1415 case u'\\':
1416 // regular expression backslash sequence
1417 _tokenText += _state.currentChar;
1418 scanChar();
1419
1420 if (_codePtr > _endPtr || isLineTerminator()) {
1421 _errorMessage = QCoreApplication::translate(
1422 context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1423 return false;
1424 }
1425
1426 _tokenText += _state.currentChar;
1427 scanChar();
1428 break;
1429
1430 case u'[':
1431 // regular expression class
1432 _tokenText += _state.currentChar;
1433 scanChar();
1434
1435 while (_codePtr <= _endPtr && !isLineTerminator()) {
1436 if (_state.currentChar == u']')
1437 break;
1438 else if (_state.currentChar == u'\\') {
1439 // regular expression backslash sequence
1440 _tokenText += _state.currentChar;
1441 scanChar();
1442
1443 if (_codePtr > _endPtr || isLineTerminator()) {
1444 _errorMessage = QCoreApplication::translate(
1445 context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1446 return false;
1447 }
1448
1449 _tokenText += _state.currentChar;
1450 scanChar();
1451 } else {
1452 _tokenText += _state.currentChar;
1453 scanChar();
1454 }
1455 }
1456
1457 if (_state.currentChar != u']') {
1458 _errorMessage = QCoreApplication::translate(
1459 context: "QQmlParser", key: "Unterminated regular expression class");
1460 return false;
1461 }
1462
1463 _tokenText += _state.currentChar;
1464 scanChar(); // skip ]
1465 break;
1466
1467 default:
1468 if (_codePtr > _endPtr || isLineTerminator()) {
1469 _errorMessage = QCoreApplication::translate(
1470 context: "QQmlParser", key: "Unterminated regular expression literal");
1471 return false;
1472 } else {
1473 _tokenText += _state.currentChar;
1474 scanChar();
1475 }
1476 } // switch
1477 } // while
1478
1479 return false;
1480}
1481
1482bool Lexer::isLineTerminator() const
1483{
1484 const ushort unicode = _state.currentChar.unicode();
1485 return unicode == 0x000Au
1486 || unicode == 0x000Du
1487 || unicode == 0x2028u
1488 || unicode == 0x2029u;
1489}
1490
1491unsigned Lexer::isLineTerminatorSequence() const
1492{
1493 switch (_state.currentChar.unicode()) {
1494 case 0x000Au:
1495 case 0x2028u:
1496 case 0x2029u:
1497 return 1;
1498 case 0x000Du:
1499 if (_codePtr->unicode() == 0x000Au)
1500 return 2;
1501 else
1502 return 1;
1503 default:
1504 return 0;
1505 }
1506}
1507
1508bool Lexer::isIdentLetter(QChar ch)
1509{
1510 // ASCII-biased, since all reserved words are ASCII, aand hence the
1511 // bulk of content to be parsed.
1512 if ((ch >= u'a' && ch <= u'z')
1513 || (ch >= u'A' && ch <= u'Z')
1514 || ch == u'$' || ch == u'_')
1515 return true;
1516 if (ch.unicode() < 128)
1517 return false;
1518 return ch.isLetterOrNumber();
1519}
1520
1521bool Lexer::isDecimalDigit(ushort c)
1522{
1523 return (c >= u'0' && c <= u'9');
1524}
1525
1526bool Lexer::isHexDigit(QChar c)
1527{
1528 return ((c >= u'0' && c <= u'9')
1529 || (c >= u'a' && c <= u'f')
1530 || (c >= u'A' && c <= u'F'));
1531}
1532
1533bool Lexer::isOctalDigit(ushort c)
1534{
1535 return (c >= u'0' && c <= u'7');
1536}
1537
1538QString Lexer::tokenText() const
1539{
1540 if (_state.validTokenText)
1541 return _tokenText;
1542
1543 if (_state.tokenKind == T_STRING_LITERAL)
1544 return QString(_tokenStartPtr + 1, _tokenLength - 2);
1545
1546 return QString(_tokenStartPtr, _tokenLength);
1547}
1548
1549Lexer::Error Lexer::errorCode() const
1550{
1551 return _state.errorCode;
1552}
1553
1554QString Lexer::errorMessage() const
1555{
1556 return _errorMessage;
1557}
1558
1559void Lexer::syncProhibitAutomaticSemicolon()
1560{
1561 if (_state.parenthesesState == BalancedParentheses) {
1562 // we have seen something like "if (foo)", which means we should
1563 // never insert an automatic semicolon at this point, since it would
1564 // then be expanded into an empty statement (ECMA-262 7.9.1)
1565 _state.prohibitAutomaticSemicolon = true;
1566 _state.parenthesesState = IgnoreParentheses;
1567 } else {
1568 _state.prohibitAutomaticSemicolon = false;
1569 }
1570}
1571
1572bool Lexer::prevTerminator() const
1573{
1574 return _state.terminator;
1575}
1576
1577bool Lexer::followsClosingBrace() const
1578{
1579 return _state.followsClosingBrace;
1580}
1581
1582bool Lexer::canInsertAutomaticSemicolon(int token) const
1583{
1584 return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator
1585 || _state.followsClosingBrace;
1586}
1587
1588static const int uriTokens[] = {
1589 QQmlJSGrammar::T_IDENTIFIER,
1590 QQmlJSGrammar::T_PROPERTY,
1591 QQmlJSGrammar::T_SIGNAL,
1592 QQmlJSGrammar::T_READONLY,
1593 QQmlJSGrammar::T_ON,
1594 QQmlJSGrammar::T_BREAK,
1595 QQmlJSGrammar::T_CASE,
1596 QQmlJSGrammar::T_CATCH,
1597 QQmlJSGrammar::T_CONTINUE,
1598 QQmlJSGrammar::T_DEFAULT,
1599 QQmlJSGrammar::T_DELETE,
1600 QQmlJSGrammar::T_DO,
1601 QQmlJSGrammar::T_ELSE,
1602 QQmlJSGrammar::T_FALSE,
1603 QQmlJSGrammar::T_FINALLY,
1604 QQmlJSGrammar::T_FOR,
1605 QQmlJSGrammar::T_FUNCTION,
1606 QQmlJSGrammar::T_FUNCTION_STAR,
1607 QQmlJSGrammar::T_IF,
1608 QQmlJSGrammar::T_IN,
1609 QQmlJSGrammar::T_OF,
1610 QQmlJSGrammar::T_INSTANCEOF,
1611 QQmlJSGrammar::T_NEW,
1612 QQmlJSGrammar::T_NULL,
1613 QQmlJSGrammar::T_RETURN,
1614 QQmlJSGrammar::T_SWITCH,
1615 QQmlJSGrammar::T_THIS,
1616 QQmlJSGrammar::T_THROW,
1617 QQmlJSGrammar::T_TRUE,
1618 QQmlJSGrammar::T_TRY,
1619 QQmlJSGrammar::T_TYPEOF,
1620 QQmlJSGrammar::T_VAR,
1621 QQmlJSGrammar::T_VOID,
1622 QQmlJSGrammar::T_WHILE,
1623 QQmlJSGrammar::T_CONST,
1624 QQmlJSGrammar::T_DEBUGGER,
1625 QQmlJSGrammar::T_RESERVED_WORD,
1626 QQmlJSGrammar::T_WITH,
1627
1628 QQmlJSGrammar::EOF_SYMBOL
1629};
1630static inline bool isUriToken(int token)
1631{
1632 const int *current = uriTokens;
1633 while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1634 if (*current == token)
1635 return true;
1636 ++current;
1637 }
1638 return false;
1639}
1640
1641bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error)
1642{
1643 auto setError = [error, this](QString message) {
1644 error->message = std::move(message);
1645 error->loc.startLine = tokenStartLine();
1646 error->loc.startColumn = tokenStartColumn();
1647 };
1648
1649 QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true);
1650 Q_ASSERT(!_qmlMode);
1651
1652 lex(); // fetch the first token
1653
1654 if (_state.tokenKind != T_DOT)
1655 return true;
1656
1657 do {
1658 const int lineNumber = tokenStartLine();
1659 const int column = tokenStartColumn();
1660
1661 lex(); // skip T_DOT
1662
1663 if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT))
1664 return true; // expected a valid QML/JS directive
1665
1666 const QString directiveName = tokenText();
1667
1668 if (! (directiveName == QLatin1String("pragma") ||
1669 directiveName == QLatin1String("import"))) {
1670 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1671 return false; // not a valid directive name
1672 }
1673
1674 // it must be a pragma or an import directive.
1675 if (directiveName == QLatin1String("pragma")) {
1676 // .pragma library
1677 if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library"))) {
1678 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1679 return false; // expected `library
1680 }
1681
1682 // we found a .pragma library directive
1683 directives->pragmaLibrary();
1684
1685 } else {
1686 Q_ASSERT(directiveName == QLatin1String("import"));
1687 lex(); // skip .import
1688
1689 QString pathOrUri;
1690 QString version;
1691 bool fileImport = false; // file or uri import
1692
1693 if (_state.tokenKind == T_STRING_LITERAL) {
1694 // .import T_STRING_LITERAL as T_IDENTIFIER
1695
1696 fileImport = true;
1697 pathOrUri = tokenText();
1698
1699 if (!pathOrUri.endsWith(s: QLatin1String("js"))) {
1700 setError(QCoreApplication::translate(context: "QQmlParser",key: "Imported file must be a script"));
1701 return false;
1702 }
1703 lex();
1704
1705 } else if (_state.tokenKind == T_IDENTIFIER) {
1706 // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER
1707 while (true) {
1708 if (!isUriToken(token: _state.tokenKind)) {
1709 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1710 return false;
1711 }
1712
1713 pathOrUri.append(s: tokenText());
1714
1715 lex();
1716 if (tokenStartLine() != lineNumber) {
1717 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1718 return false;
1719 }
1720 if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1721 break;
1722
1723 pathOrUri.append(c: u'.');
1724
1725 lex();
1726 if (tokenStartLine() != lineNumber) {
1727 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1728 return false;
1729 }
1730 }
1731
1732 if (_state.tokenKind == T_VERSION_NUMBER) {
1733 version = tokenText();
1734 lex();
1735 if (_state.tokenKind == T_DOT) {
1736 version += u'.';
1737 lex();
1738 if (_state.tokenKind != T_VERSION_NUMBER) {
1739 setError(QCoreApplication::translate(
1740 context: "QQmlParser", key: "Incomplete version number (dot but no minor)"));
1741 return false; // expected the module version number
1742 }
1743 version += tokenText();
1744 lex();
1745 }
1746 }
1747 }
1748
1749 //
1750 // recognize the mandatory `as' followed by the module name
1751 //
1752 if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1753 if (fileImport)
1754 setError(QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1755 else
1756 setError(QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1757 if (tokenStartLine() != lineNumber) {
1758 error->loc.startLine = lineNumber;
1759 error->loc.startColumn = column;
1760 }
1761 return false; // expected `as'
1762 }
1763
1764 if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) {
1765 if (fileImport)
1766 setError(QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1767 else
1768 setError(QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1769 return false; // expected module name
1770 }
1771
1772 const QString module = tokenText();
1773 if (!module.at(i: 0).isUpper()) {
1774 setError(QCoreApplication::translate(context: "QQmlParser",key: "Invalid import qualifier"));
1775 return false;
1776 }
1777
1778 if (fileImport)
1779 directives->importFile(jsfile: pathOrUri, module, line: lineNumber, column);
1780 else
1781 directives->importModule(uri: pathOrUri, version, module, line: lineNumber, column);
1782 }
1783
1784 if (tokenStartLine() != lineNumber) {
1785 setError(QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1786 return false; // the directives cannot span over multiple lines
1787 }
1788
1789 // fetch the first token after the .pragma/.import directive
1790 lex();
1791 } while (_state.tokenKind == T_DOT);
1792
1793 return true;
1794}
1795
1796const Lexer::State &Lexer::state() const
1797{
1798 return _state;
1799}
1800void Lexer::setState(const Lexer::State &state)
1801{
1802 _state = state;
1803}
1804
1805namespace QQmlJS {
1806QDebug operator<<(QDebug dbg, const Lexer::State &s)
1807{
1808 dbg << "{\n"
1809 << " errorCode:" << int(s.errorCode) << ",\n"
1810 << " currentChar:" << s.currentChar << ",\n"
1811 << " tokenValue:" << s.tokenValue << ",\n"
1812 << " parenthesesState:" << s.parenthesesState << ",\n"
1813 << " parenthesesCount:" << s.parenthesesCount << ",\n"
1814 << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n"
1815 << " bracesCount:" << s.bracesCount << ",\n"
1816 << " stackToken:" << s.stackToken << ",\n"
1817 << " patternFlags:" << s.patternFlags << ",\n"
1818 << " tokenKind:" << s.tokenKind << ",\n"
1819 << " importState:" << int(s.importState) << ",\n"
1820 << " validTokenText:" << s.validTokenText << ",\n"
1821 << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n"
1822 << " restrictedKeyword:" << s.restrictedKeyword << ",\n"
1823 << " terminator:" << s.terminator << ",\n"
1824 << " followsClosingBrace:" << s.followsClosingBrace << ",\n"
1825 << " delimited:" << s.delimited << ",\n"
1826 << " handlingDirectives:" << s.handlingDirectives << ",\n"
1827 << " generatorLevel:" << s.generatorLevel << "\n}";
1828 return dbg;
1829}
1830}
1831
1832QT_END_NAMESPACE
1833

source code of qtdeclarative/src/qml/parser/qqmljslexer.cpp