1 | // Copyright (C) 2016 The Qt Company Ltd. |
2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
3 | |
4 | #include "qqmljslexer_p.h" |
5 | #include "qqmljsengine_p.h" |
6 | #include "qqmljskeywords_p.h" |
7 | |
8 | #include <private/qqmljsdiagnosticmessage_p.h> |
9 | #include <private/qqmljsmemorypool_p.h> |
10 | #include <private/qlocale_tools_p.h> |
11 | |
12 | |
13 | #include <QtCore/qcoreapplication.h> |
14 | #include <QtCore/qvarlengtharray.h> |
15 | #include <QtCore/qdebug.h> |
16 | #include <QtCore/QScopedValueRollback> |
17 | |
18 | #include <optional> |
19 | |
20 | QT_BEGIN_NAMESPACE |
21 | using namespace QQmlJS; |
22 | |
23 | static inline int regExpFlagFromChar(const QChar &ch) |
24 | { |
25 | switch (ch.unicode()) { |
26 | case 'g': return Lexer::RegExp_Global; |
27 | case 'i': return Lexer::RegExp_IgnoreCase; |
28 | case 'm': return Lexer::RegExp_Multiline; |
29 | case 'u': return Lexer::RegExp_Unicode; |
30 | case 'y': return Lexer::RegExp_Sticky; |
31 | } |
32 | return 0; |
33 | } |
34 | |
35 | static inline unsigned char convertHex(ushort c) |
36 | { |
37 | if (c >= '0' && c <= '9') |
38 | return (c - '0'); |
39 | else if (c >= 'a' && c <= 'f') |
40 | return (c - 'a' + 10); |
41 | else |
42 | return (c - 'A' + 10); |
43 | } |
44 | |
45 | static inline QChar convertHex(QChar c1, QChar c2) |
46 | { |
47 | return QChar((convertHex(c: c1.unicode()) << 4) + convertHex(c: c2.unicode())); |
48 | } |
49 | |
50 | Lexer::Lexer(Engine *engine, LexMode lexMode) |
51 | : _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true) |
52 | { |
53 | if (engine) |
54 | engine->setLexer(this); |
55 | } |
56 | |
57 | bool Lexer::qmlMode() const |
58 | { |
59 | return _qmlMode; |
60 | } |
61 | |
62 | QString Lexer::code() const |
63 | { |
64 | return _code; |
65 | } |
66 | |
67 | void Lexer::setCode(const QString &code, int lineno, bool qmlMode, |
68 | Lexer::CodeContinuation codeContinuation) |
69 | { |
70 | if (codeContinuation == Lexer::CodeContinuation::Continue) |
71 | _currentOffset += _code.size(); |
72 | else |
73 | _currentOffset = 0; |
74 | if (_engine) |
75 | _engine->setCode(code); |
76 | |
77 | _qmlMode = qmlMode; |
78 | _code = code; |
79 | _skipLinefeed = false; |
80 | |
81 | _tokenText.clear(); |
82 | _tokenText.reserve(asize: 1024); |
83 | _errorMessage.clear(); |
84 | _tokenSpell = QStringView(); |
85 | _rawString = QStringView(); |
86 | |
87 | _codePtr = code.unicode(); |
88 | _endPtr = _codePtr + code.size(); |
89 | _tokenStartPtr = _codePtr; |
90 | |
91 | if (lineno >= 0) |
92 | _currentLineNumber = lineno; |
93 | _currentColumnNumber = 0; |
94 | _tokenLine = _currentLineNumber; |
95 | _tokenColumn = 0; |
96 | _tokenLength = 0; |
97 | |
98 | if (codeContinuation == Lexer::CodeContinuation::Reset) |
99 | _state = State {}; |
100 | } |
101 | |
102 | void Lexer::scanChar() |
103 | { |
104 | if (_skipLinefeed) { |
105 | Q_ASSERT(*_codePtr == u'\n'); |
106 | ++_codePtr; |
107 | _skipLinefeed = false; |
108 | } |
109 | _state.currentChar = *_codePtr++; |
110 | ++_currentColumnNumber; |
111 | |
112 | if (isLineTerminator()) { |
113 | if (_state.currentChar == u'\r') { |
114 | if (_codePtr < _endPtr && *_codePtr == u'\n') |
115 | _skipLinefeed = true; |
116 | _state.currentChar = u'\n'; |
117 | } |
118 | ++_currentLineNumber; |
119 | _currentColumnNumber = 0; |
120 | } |
121 | } |
122 | |
123 | QChar Lexer::peekChar() |
124 | { |
125 | auto peekPtr = _codePtr; |
126 | if (peekPtr < _endPtr) |
127 | return *peekPtr; |
128 | return QChar(); |
129 | } |
130 | |
131 | namespace { |
132 | inline bool isBinop(int tok) |
133 | { |
134 | switch (tok) { |
135 | case Lexer::T_AND: |
136 | case Lexer::T_AND_AND: |
137 | case Lexer::T_AND_EQ: |
138 | case Lexer::T_DIVIDE_: |
139 | case Lexer::T_DIVIDE_EQ: |
140 | case Lexer::T_EQ: |
141 | case Lexer::T_EQ_EQ: |
142 | case Lexer::T_EQ_EQ_EQ: |
143 | case Lexer::T_GE: |
144 | case Lexer::T_GT: |
145 | case Lexer::T_GT_GT: |
146 | case Lexer::T_GT_GT_EQ: |
147 | case Lexer::T_GT_GT_GT: |
148 | case Lexer::T_GT_GT_GT_EQ: |
149 | case Lexer::T_LE: |
150 | case Lexer::T_LT: |
151 | case Lexer::T_LT_LT: |
152 | case Lexer::T_LT_LT_EQ: |
153 | case Lexer::T_MINUS: |
154 | case Lexer::T_MINUS_EQ: |
155 | case Lexer::T_NOT_EQ: |
156 | case Lexer::T_NOT_EQ_EQ: |
157 | case Lexer::T_OR: |
158 | case Lexer::T_OR_EQ: |
159 | case Lexer::T_OR_OR: |
160 | case Lexer::T_PLUS: |
161 | case Lexer::T_PLUS_EQ: |
162 | case Lexer::T_REMAINDER: |
163 | case Lexer::T_REMAINDER_EQ: |
164 | case Lexer::T_RETURN: |
165 | case Lexer::T_STAR: |
166 | case Lexer::T_STAR_EQ: |
167 | case Lexer::T_XOR: |
168 | case Lexer::T_XOR_EQ: |
169 | return true; |
170 | |
171 | default: |
172 | return false; |
173 | } |
174 | } |
175 | |
176 | int hexDigit(QChar c) |
177 | { |
178 | if (c >= u'0' && c <= u'9') |
179 | return c.unicode() - u'0'; |
180 | if (c >= u'a' && c <= u'f') |
181 | return c.unicode() - u'a' + 10; |
182 | if (c >= u'A' && c <= u'F') |
183 | return c.unicode() - u'A' + 10; |
184 | return -1; |
185 | } |
186 | |
187 | int octalDigit(QChar c) |
188 | { |
189 | if (c >= u'0' && c <= u'7') |
190 | return c.unicode() - u'0'; |
191 | return -1; |
192 | } |
193 | |
194 | } // anonymous namespace |
195 | |
196 | int Lexer::lex() |
197 | { |
198 | const int previousTokenKind = _state.tokenKind; |
199 | int tokenKind; |
200 | bool firstPass = true; |
201 | |
202 | again: |
203 | tokenKind = T_ERROR; |
204 | _tokenSpell = QStringView(); |
205 | _rawString = QStringView(); |
206 | if (firstPass && _state.stackToken == -1) { |
207 | firstPass = false; |
208 | if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty()) |
209 | return T_EOL; |
210 | |
211 | if (_state.comments == CommentState::InMultilineComment) { |
212 | scanChar(); |
213 | _tokenStartPtr = _codePtr - 1; |
214 | _tokenLine = _currentLineNumber; |
215 | _tokenColumn = _currentColumnNumber; |
216 | while (_codePtr <= _endPtr) { |
217 | if (_state.currentChar == u'*') { |
218 | scanChar(); |
219 | if (_state.currentChar == u'/') { |
220 | scanChar(); |
221 | if (_engine) { |
222 | _engine->addComment(pos: tokenOffset() + 2, |
223 | len: _codePtr - _tokenStartPtr - 1 - 4, |
224 | line: tokenStartLine(), col: tokenStartColumn() + 2); |
225 | } |
226 | tokenKind = T_COMMENT; |
227 | break; |
228 | } |
229 | } else { |
230 | scanChar(); |
231 | } |
232 | } |
233 | if (tokenKind == T_ERROR) |
234 | tokenKind = T_PARTIAL_COMMENT; |
235 | } else { |
236 | // handle multiline continuation |
237 | std::optional<ScanStringMode> scanMode; |
238 | switch (previousTokenKind) { |
239 | case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL: |
240 | scanMode = ScanStringMode::SingleQuote; |
241 | break; |
242 | case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL: |
243 | scanMode = ScanStringMode::DoubleQuote; |
244 | break; |
245 | case T_PARTIAL_TEMPLATE_HEAD: |
246 | scanMode = ScanStringMode::TemplateHead; |
247 | break; |
248 | case T_PARTIAL_TEMPLATE_MIDDLE: |
249 | scanMode = ScanStringMode::TemplateContinuation; |
250 | break; |
251 | default: |
252 | break; |
253 | } |
254 | if (scanMode) { |
255 | scanChar(); |
256 | _tokenStartPtr = _codePtr - 1; |
257 | _tokenLine = _currentLineNumber; |
258 | _tokenColumn = _currentColumnNumber; |
259 | tokenKind = scanString(mode: *scanMode); |
260 | } |
261 | } |
262 | } |
263 | if (tokenKind == T_ERROR) |
264 | tokenKind = scanToken(); |
265 | _tokenLength = _codePtr - _tokenStartPtr - 1; |
266 | switch (tokenKind) { |
267 | // end of line and comments should not "overwrite" the old token type... |
268 | case T_EOL: |
269 | return tokenKind; |
270 | case T_COMMENT: |
271 | _state.comments = CommentState::HadComment; |
272 | return tokenKind; |
273 | case T_PARTIAL_COMMENT: |
274 | _state.comments = CommentState::InMultilineComment; |
275 | return tokenKind; |
276 | default: |
277 | _state.comments = CommentState::NoComment; |
278 | break; |
279 | } |
280 | _state.tokenKind = tokenKind; |
281 | |
282 | _state.delimited = false; |
283 | _state.restrictedKeyword = false; |
284 | _state.followsClosingBrace = (previousTokenKind == T_RBRACE); |
285 | |
286 | // update the flags |
287 | switch (_state.tokenKind) { |
288 | case T_LBRACE: |
289 | if (_state.bracesCount > 0) |
290 | ++_state.bracesCount; |
291 | Q_FALLTHROUGH(); |
292 | case T_SEMICOLON: |
293 | _state.importState = ImportState::NoQmlImport; |
294 | Q_FALLTHROUGH(); |
295 | case T_QUESTION: |
296 | case T_COLON: |
297 | case T_TILDE: |
298 | _state.delimited = true; |
299 | break; |
300 | case T_AUTOMATIC_SEMICOLON: |
301 | case T_AS: |
302 | _state.importState = ImportState::NoQmlImport; |
303 | Q_FALLTHROUGH(); |
304 | default: |
305 | if (isBinop(tok: _state.tokenKind)) |
306 | _state.delimited = true; |
307 | break; |
308 | |
309 | case T_IMPORT: |
310 | if (qmlMode() || (_state.handlingDirectives && previousTokenKind == T_DOT)) |
311 | _state.importState = ImportState::SawImport; |
312 | if (isBinop(tok: _state.tokenKind)) |
313 | _state.delimited = true; |
314 | break; |
315 | |
316 | case T_IF: |
317 | case T_FOR: |
318 | case T_WHILE: |
319 | case T_WITH: |
320 | _state.parenthesesState = CountParentheses; |
321 | _state.parenthesesCount = 0; |
322 | break; |
323 | |
324 | case T_ELSE: |
325 | case T_DO: |
326 | _state.parenthesesState = BalancedParentheses; |
327 | break; |
328 | |
329 | case T_CONTINUE: |
330 | case T_BREAK: |
331 | case T_RETURN: |
332 | case T_YIELD: |
333 | case T_THROW: |
334 | _state.restrictedKeyword = true; |
335 | break; |
336 | case T_RBRACE: |
337 | if (_state.bracesCount > 0) |
338 | --_state.bracesCount; |
339 | if (_state.bracesCount == 0) |
340 | goto again; |
341 | } // switch |
342 | |
343 | // update the parentheses state |
344 | switch (_state.parenthesesState) { |
345 | case IgnoreParentheses: |
346 | break; |
347 | |
348 | case CountParentheses: |
349 | if (_state.tokenKind == T_RPAREN) { |
350 | --_state.parenthesesCount; |
351 | if (_state.parenthesesCount == 0) |
352 | _state.parenthesesState = BalancedParentheses; |
353 | } else if (_state.tokenKind == T_LPAREN) { |
354 | ++_state.parenthesesCount; |
355 | } |
356 | break; |
357 | |
358 | case BalancedParentheses: |
359 | if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE) |
360 | _state.parenthesesState = IgnoreParentheses; |
361 | break; |
362 | } // switch |
363 | |
364 | return _state.tokenKind; |
365 | } |
366 | |
367 | uint Lexer::decodeUnicodeEscapeCharacter(bool *ok) |
368 | { |
369 | Q_ASSERT(_state.currentChar == u'u'); |
370 | scanChar(); // skip u |
371 | if (_codePtr + 4 <= _endPtr && isHexDigit(c: _state.currentChar)) { |
372 | uint codePoint = 0; |
373 | for (int i = 0; i < 4; ++i) { |
374 | int digit = hexDigit(c: _state.currentChar); |
375 | if (digit < 0) |
376 | goto error; |
377 | codePoint *= 16; |
378 | codePoint += digit; |
379 | scanChar(); |
380 | } |
381 | |
382 | *ok = true; |
383 | return codePoint; |
384 | } else if (_codePtr < _endPtr && _state.currentChar == u'{') { |
385 | scanChar(); // skip '{' |
386 | uint codePoint = 0; |
387 | if (!isHexDigit(c: _state.currentChar)) |
388 | // need at least one hex digit |
389 | goto error; |
390 | |
391 | while (_codePtr <= _endPtr) { |
392 | int digit = hexDigit(c: _state.currentChar); |
393 | if (digit < 0) |
394 | break; |
395 | codePoint *= 16; |
396 | codePoint += digit; |
397 | if (codePoint > 0x10ffff) |
398 | goto error; |
399 | scanChar(); |
400 | } |
401 | |
402 | if (_state.currentChar != u'}') |
403 | goto error; |
404 | |
405 | scanChar(); // skip '}' |
406 | |
407 | |
408 | *ok = true; |
409 | return codePoint; |
410 | } |
411 | |
412 | error: |
413 | _state.errorCode = IllegalUnicodeEscapeSequence; |
414 | _errorMessage = QCoreApplication::translate(context: "QQmlParser" , key: "Illegal unicode escape sequence" ); |
415 | |
416 | *ok = false; |
417 | return 0; |
418 | } |
419 | |
420 | QChar Lexer::decodeHexEscapeCharacter(bool *ok) |
421 | { |
422 | if (isHexDigit(c: _codePtr[0]) && isHexDigit(c: _codePtr[1])) { |
423 | scanChar(); |
424 | |
425 | const QChar c1 = _state.currentChar; |
426 | scanChar(); |
427 | |
428 | const QChar c2 = _state.currentChar; |
429 | scanChar(); |
430 | |
431 | if (ok) |
432 | *ok = true; |
433 | |
434 | return convertHex(c1, c2); |
435 | } |
436 | |
437 | *ok = false; |
438 | return QChar(); |
439 | } |
440 | |
441 | namespace QQmlJS { |
442 | QDebug operator<<(QDebug dbg, const Lexer &l) |
443 | { |
444 | dbg << "{\n" |
445 | << " engine:" << qsizetype(l._engine) << ",\n" |
446 | << " lexMode:" << int(l._lexMode) << ",\n" |
447 | << " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n" |
448 | << " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n" |
449 | << " qmlMode:" << l._qmlMode << ",\n" |
450 | << " staticIsKeyword:" << l._staticIsKeyword << ",\n" |
451 | << " currentLineNumber:" << l._currentLineNumber << ",\n" |
452 | << " currentColumnNumber:" << l._currentColumnNumber << ",\n" |
453 | << " currentOffset:" << l._currentOffset << ",\n" |
454 | << " tokenLength:" << l._tokenLength << ",\n" |
455 | << " tokenLine:" << l._tokenLine << ",\n" |
456 | << " tokenColumn:" << l._tokenColumn << ",\n" |
457 | << " tokenText:" << l._tokenText << ",\n" |
458 | << " skipLinefeed:" << l._skipLinefeed << ",\n" |
459 | << " errorMessage:" << l._errorMessage << ",\n" |
460 | << " tokenSpell:" << l._tokenSpell << ",\n" |
461 | << " rawString:" << l._rawString << ",\n" ; |
462 | if (l._codePtr) |
463 | dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n" ; |
464 | else |
465 | dbg << " codePtr: *null*,\n" ; |
466 | if (l._tokenStartPtr) |
467 | dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n" ; |
468 | else |
469 | dbg << " tokenStartPtr: *null*,\n" ; |
470 | dbg << " state:" << l._state << "\n}" ; |
471 | return dbg; |
472 | } |
473 | } |
474 | |
475 | static inline bool isIdentifierStart(uint ch) |
476 | { |
477 | // fast path for ascii |
478 | if ((ch >= u'a' && ch <= u'z') || |
479 | (ch >= u'A' && ch <= u'Z') || |
480 | ch == u'$' || ch == u'_') |
481 | return true; |
482 | |
483 | switch (QChar::category(ucs4: ch)) { |
484 | case QChar::Number_Letter: |
485 | case QChar::Letter_Uppercase: |
486 | case QChar::Letter_Lowercase: |
487 | case QChar::Letter_Titlecase: |
488 | case QChar::Letter_Modifier: |
489 | case QChar::Letter_Other: |
490 | return true; |
491 | default: |
492 | break; |
493 | } |
494 | return false; |
495 | } |
496 | |
497 | static bool isIdentifierPart(uint ch) |
498 | { |
499 | // fast path for ascii |
500 | if ((ch >= u'a' && ch <= u'z') || |
501 | (ch >= u'A' && ch <= u'Z') || |
502 | (ch >= u'0' && ch <= u'9') || |
503 | ch == u'$' || ch == u'_' || |
504 | ch == 0x200c /* ZWNJ */ || ch == 0x200d /* ZWJ */) |
505 | return true; |
506 | |
507 | switch (QChar::category(ucs4: ch)) { |
508 | case QChar::Mark_NonSpacing: |
509 | case QChar::Mark_SpacingCombining: |
510 | |
511 | case QChar::Number_DecimalDigit: |
512 | case QChar::Number_Letter: |
513 | |
514 | case QChar::Letter_Uppercase: |
515 | case QChar::Letter_Lowercase: |
516 | case QChar::Letter_Titlecase: |
517 | case QChar::Letter_Modifier: |
518 | case QChar::Letter_Other: |
519 | |
520 | case QChar::Punctuation_Connector: |
521 | return true; |
522 | default: |
523 | break; |
524 | } |
525 | return false; |
526 | } |
527 | |
528 | int Lexer::scanToken() |
529 | { |
530 | if (_state.stackToken != -1) { |
531 | int tk = _state.stackToken; |
532 | _state.stackToken = -1; |
533 | return tk; |
534 | } |
535 | |
536 | if (_state.bracesCount == 0) { |
537 | // we're inside a Template string |
538 | return scanString(mode: TemplateContinuation); |
539 | } |
540 | |
541 | if (_state.comments == CommentState::NoComment) |
542 | _state.terminator = false; |
543 | |
544 | again: |
545 | _state.validTokenText = false; |
546 | |
547 | while (_state.currentChar.isSpace()) { |
548 | if (isLineTerminator()) { |
549 | bool isAtEnd = (_codePtr + (_skipLinefeed ? 1 : 0)) == _endPtr; |
550 | if (_state.restrictedKeyword) { |
551 | // automatic semicolon insertion |
552 | _tokenLine = _currentLineNumber; |
553 | _tokenColumn = _currentColumnNumber; |
554 | _tokenStartPtr = _codePtr - 1; |
555 | return T_SEMICOLON; |
556 | } else if (_lexMode == LexMode::WholeCode || !isAtEnd) { |
557 | _state.terminator = true; |
558 | syncProhibitAutomaticSemicolon(); |
559 | } // else we will do the previous things at the start of next line... |
560 | } |
561 | |
562 | scanChar(); |
563 | } |
564 | |
565 | _tokenStartPtr = _codePtr - 1; |
566 | _tokenLine = _currentLineNumber; |
567 | _tokenColumn = _currentColumnNumber; |
568 | |
569 | if (_codePtr >= _endPtr) { |
570 | if (_lexMode == LexMode::LineByLine) { |
571 | if (!_code.isEmpty()) { |
572 | _state.currentChar = *(_codePtr - 2); |
573 | return T_EOL; |
574 | } else { |
575 | return EOF_SYMBOL; |
576 | } |
577 | } else if (_codePtr > _endPtr) { |
578 | return EOF_SYMBOL; |
579 | } |
580 | } |
581 | |
582 | const QChar ch = _state.currentChar; |
583 | scanChar(); |
584 | |
585 | switch (ch.unicode()) { |
586 | case u'~': return T_TILDE; |
587 | case u'}': return T_RBRACE; |
588 | |
589 | case u'|': |
590 | if (_state.currentChar == u'|') { |
591 | scanChar(); |
592 | return T_OR_OR; |
593 | } else if (_state.currentChar == u'=') { |
594 | scanChar(); |
595 | return T_OR_EQ; |
596 | } |
597 | return T_OR; |
598 | |
599 | case u'{': return T_LBRACE; |
600 | |
601 | case u'^': |
602 | if (_state.currentChar == u'=') { |
603 | scanChar(); |
604 | return T_XOR_EQ; |
605 | } |
606 | return T_XOR; |
607 | |
608 | case u']': return T_RBRACKET; |
609 | case u'[': return T_LBRACKET; |
610 | case u'?': { |
611 | if (_state.currentChar == u'?') { |
612 | scanChar(); |
613 | return T_QUESTION_QUESTION; |
614 | } |
615 | if (_state.currentChar == u'.' && !peekChar().isDigit()) { |
616 | scanChar(); |
617 | return T_QUESTION_DOT; |
618 | } |
619 | |
620 | return T_QUESTION; |
621 | } |
622 | |
623 | case u'>': |
624 | if (_state.currentChar == u'>') { |
625 | scanChar(); |
626 | if (_state.currentChar == u'>') { |
627 | scanChar(); |
628 | if (_state.currentChar == u'=') { |
629 | scanChar(); |
630 | return T_GT_GT_GT_EQ; |
631 | } |
632 | return T_GT_GT_GT; |
633 | } else if (_state.currentChar == u'=') { |
634 | scanChar(); |
635 | return T_GT_GT_EQ; |
636 | } |
637 | return T_GT_GT; |
638 | } else if (_state.currentChar == u'=') { |
639 | scanChar(); |
640 | return T_GE; |
641 | } |
642 | return T_GT; |
643 | |
644 | case u'=': |
645 | if (_state.currentChar == u'=') { |
646 | scanChar(); |
647 | if (_state.currentChar == u'=') { |
648 | scanChar(); |
649 | return T_EQ_EQ_EQ; |
650 | } |
651 | return T_EQ_EQ; |
652 | } else if (_state.currentChar == u'>') { |
653 | scanChar(); |
654 | return T_ARROW; |
655 | } |
656 | return T_EQ; |
657 | |
658 | case u'<': |
659 | if (_state.currentChar == u'=') { |
660 | scanChar(); |
661 | return T_LE; |
662 | } else if (_state.currentChar == u'<') { |
663 | scanChar(); |
664 | if (_state.currentChar == u'=') { |
665 | scanChar(); |
666 | return T_LT_LT_EQ; |
667 | } |
668 | return T_LT_LT; |
669 | } |
670 | return T_LT; |
671 | |
672 | case u';': return T_SEMICOLON; |
673 | case u':': return T_COLON; |
674 | |
675 | case u'/': |
676 | switch (_state.currentChar.unicode()) { |
677 | case u'*': |
678 | scanChar(); |
679 | while (_codePtr <= _endPtr) { |
680 | if (_state.currentChar == u'*') { |
681 | scanChar(); |
682 | if (_state.currentChar == u'/') { |
683 | scanChar(); |
684 | if (_engine) { |
685 | _engine->addComment(pos: tokenOffset() + 2, |
686 | len: _codePtr - _tokenStartPtr - 1 - 4, line: tokenStartLine(), |
687 | col: tokenStartColumn() + 2); |
688 | } |
689 | if (_lexMode == LexMode::LineByLine) |
690 | return T_COMMENT; |
691 | else |
692 | goto again; |
693 | } |
694 | } else { |
695 | scanChar(); |
696 | } |
697 | } |
698 | if (_lexMode == LexMode::LineByLine) |
699 | return T_PARTIAL_COMMENT; |
700 | else |
701 | goto again; |
702 | case u'/': |
703 | while (_codePtr <= _endPtr && !isLineTerminator()) { |
704 | scanChar(); |
705 | } |
706 | if (_engine) { |
707 | _engine->addComment(pos: tokenOffset() + 2, len: _codePtr - _tokenStartPtr - 1 - 2, |
708 | line: tokenStartLine(), col: tokenStartColumn() + 2); |
709 | } |
710 | if (_lexMode == LexMode::LineByLine) |
711 | return T_COMMENT; |
712 | else |
713 | goto again; |
714 | case u'=': |
715 | scanChar(); |
716 | return T_DIVIDE_EQ; |
717 | default: |
718 | return T_DIVIDE_; |
719 | } |
720 | case u'.': |
721 | if (_state.importState == ImportState::SawImport) |
722 | return T_DOT; |
723 | if (isDecimalDigit(c: _state.currentChar.unicode())) |
724 | return scanNumber(ch); |
725 | if (_state.currentChar == u'.') { |
726 | scanChar(); |
727 | if (_state.currentChar == u'.') { |
728 | scanChar(); |
729 | return T_ELLIPSIS; |
730 | } else { |
731 | _state.errorCode = IllegalCharacter; |
732 | _errorMessage = QCoreApplication::translate(context: "QQmlParser" , key: "Unexpected token '.'" ); |
733 | return T_ERROR; |
734 | } |
735 | } |
736 | return T_DOT; |
737 | |
738 | case u'-': |
739 | if (_state.currentChar == u'=') { |
740 | scanChar(); |
741 | return T_MINUS_EQ; |
742 | } else if (_state.currentChar == u'-') { |
743 | scanChar(); |
744 | |
745 | if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon |
746 | && _state.tokenKind != T_LPAREN) { |
747 | _state.stackToken = T_MINUS_MINUS; |
748 | return T_SEMICOLON; |
749 | } |
750 | |
751 | return T_MINUS_MINUS; |
752 | } |
753 | return T_MINUS; |
754 | |
755 | case u',': return T_COMMA; |
756 | |
757 | case u'+': |
758 | if (_state.currentChar == u'=') { |
759 | scanChar(); |
760 | return T_PLUS_EQ; |
761 | } else if (_state.currentChar == u'+') { |
762 | scanChar(); |
763 | |
764 | if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon |
765 | && _state.tokenKind != T_LPAREN) { |
766 | _state.stackToken = T_PLUS_PLUS; |
767 | return T_SEMICOLON; |
768 | } |
769 | |
770 | return T_PLUS_PLUS; |
771 | } |
772 | return T_PLUS; |
773 | |
774 | case u'*': |
775 | if (_state.currentChar == u'=') { |
776 | scanChar(); |
777 | return T_STAR_EQ; |
778 | } else if (_state.currentChar == u'*') { |
779 | scanChar(); |
780 | if (_state.currentChar == u'=') { |
781 | scanChar(); |
782 | return T_STAR_STAR_EQ; |
783 | } |
784 | return T_STAR_STAR; |
785 | } |
786 | return T_STAR; |
787 | |
788 | case u')': return T_RPAREN; |
789 | case u'(': return T_LPAREN; |
790 | |
791 | case u'@': return T_AT; |
792 | |
793 | case u'&': |
794 | if (_state.currentChar == u'=') { |
795 | scanChar(); |
796 | return T_AND_EQ; |
797 | } else if (_state.currentChar == u'&') { |
798 | scanChar(); |
799 | return T_AND_AND; |
800 | } |
801 | return T_AND; |
802 | |
803 | case u'%': |
804 | if (_state.currentChar == u'=') { |
805 | scanChar(); |
806 | return T_REMAINDER_EQ; |
807 | } |
808 | return T_REMAINDER; |
809 | |
810 | case u'!': |
811 | if (_state.currentChar == u'=') { |
812 | scanChar(); |
813 | if (_state.currentChar == u'=') { |
814 | scanChar(); |
815 | return T_NOT_EQ_EQ; |
816 | } |
817 | return T_NOT_EQ; |
818 | } |
819 | return T_NOT; |
820 | |
821 | case u'`': |
822 | _state.outerTemplateBraceCount.push(t: _state.bracesCount); |
823 | Q_FALLTHROUGH(); |
824 | case u'\'': |
825 | case u'"': |
826 | return scanString(mode: ScanStringMode(ch.unicode())); |
827 | case u'0': |
828 | case u'1': |
829 | case u'2': |
830 | case u'3': |
831 | case u'4': |
832 | case u'5': |
833 | case u'6': |
834 | case u'7': |
835 | case u'8': |
836 | case u'9': |
837 | if (_state.importState == ImportState::SawImport) |
838 | return scanVersionNumber(ch); |
839 | else |
840 | return scanNumber(ch); |
841 | |
842 | case '#': |
843 | if (_currentLineNumber == 1 && _currentColumnNumber == 2) { |
844 | // shebang support |
845 | while (_codePtr <= _endPtr && !isLineTerminator()) { |
846 | scanChar(); |
847 | } |
848 | if (_engine) { |
849 | _engine->addComment(pos: tokenOffset(), len: _codePtr - _tokenStartPtr - 1, line: tokenStartLine(), |
850 | col: tokenStartColumn()); |
851 | } |
852 | if (_lexMode == LexMode::LineByLine) |
853 | return T_COMMENT; |
854 | else |
855 | goto again; |
856 | } |
857 | Q_FALLTHROUGH(); |
858 | |
859 | default: { |
860 | uint c = ch.unicode(); |
861 | bool identifierWithEscapeChars = false; |
862 | if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _state.currentChar.unicode())) { |
863 | c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode()); |
864 | scanChar(); |
865 | } else if (c == '\\' && _state.currentChar == u'u') { |
866 | identifierWithEscapeChars = true; |
867 | bool ok = false; |
868 | c = decodeUnicodeEscapeCharacter(ok: &ok); |
869 | if (!ok) |
870 | return T_ERROR; |
871 | } |
872 | if (isIdentifierStart(ch: c)) { |
873 | if (identifierWithEscapeChars) { |
874 | _tokenText.resize(size: 0); |
875 | if (QChar::requiresSurrogates(ucs4: c)) { |
876 | _tokenText += QChar(QChar::highSurrogate(ucs4: c)); |
877 | _tokenText += QChar(QChar::lowSurrogate(ucs4: c)); |
878 | } else { |
879 | _tokenText += QChar(c); |
880 | } |
881 | _state.validTokenText = true; |
882 | } |
883 | while (_codePtr <= _endPtr) { |
884 | c = _state.currentChar.unicode(); |
885 | if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _codePtr->unicode())) { |
886 | scanChar(); |
887 | c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode()); |
888 | } else if (_state.currentChar == u'\\' && _codePtr[0] == u'u') { |
889 | if (!identifierWithEscapeChars) { |
890 | identifierWithEscapeChars = true; |
891 | _tokenText.resize(size: 0); |
892 | _tokenText.insert(i: 0, uc: _tokenStartPtr, len: _codePtr - _tokenStartPtr - 1); |
893 | _state.validTokenText = true; |
894 | } |
895 | |
896 | scanChar(); // skip '\\' |
897 | bool ok = false; |
898 | c = decodeUnicodeEscapeCharacter(ok: &ok); |
899 | if (!ok) |
900 | return T_ERROR; |
901 | |
902 | if (!isIdentifierPart(ch: c)) |
903 | break; |
904 | |
905 | if (QChar::requiresSurrogates(ucs4: c)) { |
906 | _tokenText += QChar(QChar::highSurrogate(ucs4: c)); |
907 | _tokenText += QChar(QChar::lowSurrogate(ucs4: c)); |
908 | } else { |
909 | _tokenText += QChar(c); |
910 | } |
911 | continue; |
912 | } |
913 | |
914 | if (!isIdentifierPart(ch: c)) |
915 | break; |
916 | |
917 | if (identifierWithEscapeChars) { |
918 | if (QChar::requiresSurrogates(ucs4: c)) { |
919 | _tokenText += QChar(QChar::highSurrogate(ucs4: c)); |
920 | _tokenText += QChar(QChar::lowSurrogate(ucs4: c)); |
921 | } else { |
922 | _tokenText += QChar(c); |
923 | } |
924 | } |
925 | scanChar(); |
926 | } |
927 | |
928 | _tokenLength = _codePtr - _tokenStartPtr - 1; |
929 | |
930 | int kind = T_IDENTIFIER; |
931 | |
932 | if (!identifierWithEscapeChars) |
933 | kind = classify(s: _tokenStartPtr, n: _tokenLength, parseModeFlags: parseModeFlags()); |
934 | |
935 | if (kind == T_FUNCTION) { |
936 | continue_skipping: |
937 | while (_codePtr < _endPtr && _state.currentChar.isSpace()) |
938 | scanChar(); |
939 | if (_state.currentChar == u'*') { |
940 | _tokenLength = _codePtr - _tokenStartPtr - 1; |
941 | kind = T_FUNCTION_STAR; |
942 | scanChar(); |
943 | } else if (_state.currentChar == u'/') { |
944 | scanChar(); |
945 | switch (_state.currentChar.unicode()) { |
946 | case u'*': |
947 | scanChar(); |
948 | while (_codePtr <= _endPtr) { |
949 | if (_state.currentChar == u'*') { |
950 | scanChar(); |
951 | if (_state.currentChar == u'/') { |
952 | scanChar(); |
953 | if (_engine) { |
954 | _engine->addComment(pos: tokenOffset() + 2, |
955 | len: _codePtr - _tokenStartPtr - 1 - 4, |
956 | line: tokenStartLine(), |
957 | col: tokenStartColumn() + 2); |
958 | } |
959 | if (_lexMode == LexMode::LineByLine) |
960 | return T_COMMENT; |
961 | goto continue_skipping; |
962 | } |
963 | } else { |
964 | scanChar(); |
965 | } |
966 | } |
967 | if (_lexMode == LexMode::LineByLine) |
968 | return T_PARTIAL_COMMENT; |
969 | else |
970 | goto continue_skipping; |
971 | case u'/': |
972 | while (_codePtr <= _endPtr && !isLineTerminator()) { |
973 | scanChar(); |
974 | } |
975 | if (_engine) { |
976 | _engine->addComment(pos: tokenOffset() + 2, |
977 | len: _codePtr - _tokenStartPtr - 1 - 2, |
978 | line: tokenStartLine(), col: tokenStartColumn() + 2); |
979 | } |
980 | if (_lexMode == LexMode::LineByLine) |
981 | return T_COMMENT; |
982 | else |
983 | goto continue_skipping; |
984 | default: |
985 | break; |
986 | } |
987 | } |
988 | } |
989 | |
990 | if (_engine) { |
991 | if (kind == T_IDENTIFIER && identifierWithEscapeChars) |
992 | _tokenSpell = _engine->newStringRef(text: _tokenText); |
993 | else |
994 | _tokenSpell = _engine->midRef(position: _tokenStartPtr - _code.unicode(), size: _tokenLength); |
995 | } |
996 | |
997 | return kind; |
998 | } |
999 | } |
1000 | |
1001 | break; |
1002 | } |
1003 | |
1004 | return T_ERROR; |
1005 | } |
1006 | |
1007 | int Lexer::scanString(ScanStringMode mode) |
1008 | { |
1009 | QChar quote = (mode == TemplateContinuation) ? QChar(TemplateHead) : QChar(mode); |
1010 | // we actually use T_STRING_LITERAL also for multiline strings, should we want to |
1011 | // change that we should set it to: |
1012 | // _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL || |
1013 | // _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL |
1014 | // here and uncomment the multilineStringLiteral = true below. |
1015 | bool multilineStringLiteral = false; |
1016 | |
1017 | const QChar *startCode = _codePtr - 1; |
1018 | // in case we just parsed a \r, we need to reset this flag to get things working |
1019 | // correctly in the loop below and afterwards |
1020 | _skipLinefeed = false; |
1021 | bool first = true; |
1022 | |
1023 | if (_engine) { |
1024 | while (_codePtr <= _endPtr) { |
1025 | if (isLineTerminator()) { |
1026 | if ((quote == u'`' || qmlMode())) { |
1027 | if (first) |
1028 | --_currentLineNumber; // will be read again in scanChar() |
1029 | break; |
1030 | } |
1031 | _state.errorCode = IllegalCharacter; |
1032 | _errorMessage = QCoreApplication::translate(context: "QQmlParser" , |
1033 | key: "Stray newline in string literal" ); |
1034 | return T_ERROR; |
1035 | } else if (_state.currentChar == u'\\') { |
1036 | break; |
1037 | } else if (_state.currentChar == u'$' && quote == u'`') { |
1038 | break; |
1039 | } else if (_state.currentChar == quote) { |
1040 | _tokenSpell = |
1041 | _engine->midRef(position: startCode - _code.unicode(), size: _codePtr - startCode - 1); |
1042 | _rawString = _tokenSpell; |
1043 | scanChar(); |
1044 | |
1045 | if (quote == u'`') |
1046 | _state.bracesCount = _state.outerTemplateBraceCount.pop(); |
1047 | if (mode == TemplateHead) |
1048 | return T_NO_SUBSTITUTION_TEMPLATE; |
1049 | else if (mode == TemplateContinuation) |
1050 | return T_TEMPLATE_TAIL; |
1051 | else if (multilineStringLiteral) |
1052 | return T_MULTILINE_STRING_LITERAL; |
1053 | else |
1054 | return T_STRING_LITERAL; |
1055 | } |
1056 | // don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result |
1057 | _state.currentChar = *_codePtr++; |
1058 | ++_currentColumnNumber; |
1059 | first = false; |
1060 | } |
1061 | } |
1062 | |
1063 | // rewind by one char, so things gets scanned correctly |
1064 | --_codePtr; |
1065 | --_currentColumnNumber; |
1066 | |
1067 | _state.validTokenText = true; |
1068 | _tokenText = QString(startCode, _codePtr - startCode); |
1069 | |
1070 | auto setRawString = [&](const QChar *end) { |
1071 | QString raw(startCode, end - startCode - 1); |
1072 | raw.replace(before: QLatin1String("\r\n" ), after: QLatin1String("\n" )); |
1073 | raw.replace(before: u'\r', after: u'\n'); |
1074 | _rawString = _engine->newStringRef(text: raw); |
1075 | }; |
1076 | |
1077 | scanChar(); |
1078 | |
1079 | while (_codePtr <= _endPtr) { |
1080 | if (_state.currentChar == quote) { |
1081 | scanChar(); |
1082 | |
1083 | if (_engine) { |
1084 | _tokenSpell = _engine->newStringRef(text: _tokenText); |
1085 | if (quote == u'`') |
1086 | setRawString(_codePtr - 1); |
1087 | } |
1088 | |
1089 | if (quote == u'`') |
1090 | _state.bracesCount = _state.outerTemplateBraceCount.pop(); |
1091 | |
1092 | if (mode == TemplateContinuation) |
1093 | return T_TEMPLATE_TAIL; |
1094 | else if (mode == TemplateHead) |
1095 | return T_NO_SUBSTITUTION_TEMPLATE; |
1096 | |
1097 | return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL; |
1098 | } else if (quote == u'`' && _state.currentChar == u'$' && *_codePtr == u'{') { |
1099 | scanChar(); |
1100 | scanChar(); |
1101 | _state.bracesCount = 1; |
1102 | if (_engine) { |
1103 | _tokenSpell = _engine->newStringRef(text: _tokenText); |
1104 | setRawString(_codePtr - 2); |
1105 | } |
1106 | |
1107 | return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE); |
1108 | } else if (_state.currentChar == u'\\') { |
1109 | scanChar(); |
1110 | if (_codePtr > _endPtr) { |
1111 | _state.errorCode = IllegalEscapeSequence; |
1112 | _errorMessage = QCoreApplication::translate( |
1113 | context: "QQmlParser" , key: "End of file reached at escape sequence" ); |
1114 | return T_ERROR; |
1115 | } |
1116 | |
1117 | QChar u; |
1118 | |
1119 | switch (_state.currentChar.unicode()) { |
1120 | // unicode escape sequence |
1121 | case u'u': { |
1122 | bool ok = false; |
1123 | uint codePoint = decodeUnicodeEscapeCharacter(ok: &ok); |
1124 | if (!ok) |
1125 | return T_ERROR; |
1126 | if (QChar::requiresSurrogates(ucs4: codePoint)) { |
1127 | // need to use a surrogate pair |
1128 | _tokenText += QChar(QChar::highSurrogate(ucs4: codePoint)); |
1129 | u = QChar::lowSurrogate(ucs4: codePoint); |
1130 | } else { |
1131 | u = QChar(codePoint); |
1132 | } |
1133 | } break; |
1134 | |
1135 | // hex escape sequence |
1136 | case u'x': { |
1137 | bool ok = false; |
1138 | u = decodeHexEscapeCharacter(ok: &ok); |
1139 | if (!ok) { |
1140 | _state.errorCode = IllegalHexadecimalEscapeSequence; |
1141 | _errorMessage = QCoreApplication::translate( |
1142 | context: "QQmlParser" , key: "Illegal hexadecimal escape sequence" ); |
1143 | return T_ERROR; |
1144 | } |
1145 | } break; |
1146 | |
1147 | // single character escape sequence |
1148 | case u'\\': u = u'\\'; scanChar(); break; |
1149 | case u'\'': u = u'\''; scanChar(); break; |
1150 | case u'\"': u = u'\"'; scanChar(); break; |
1151 | case u'b': u = u'\b'; scanChar(); break; |
1152 | case u'f': u = u'\f'; scanChar(); break; |
1153 | case u'n': u = u'\n'; scanChar(); break; |
1154 | case u'r': u = u'\r'; scanChar(); break; |
1155 | case u't': u = u'\t'; scanChar(); break; |
1156 | case u'v': u = u'\v'; scanChar(); break; |
1157 | |
1158 | case u'0': |
1159 | if (!_codePtr->isDigit()) { |
1160 | scanChar(); |
1161 | u = u'\0'; |
1162 | break; |
1163 | } |
1164 | Q_FALLTHROUGH(); |
1165 | case u'1': |
1166 | case u'2': |
1167 | case u'3': |
1168 | case u'4': |
1169 | case u'5': |
1170 | case u'6': |
1171 | case u'7': |
1172 | case u'8': |
1173 | case u'9': |
1174 | _state.errorCode = IllegalEscapeSequence; |
1175 | _errorMessage = QCoreApplication::translate( |
1176 | context: "QQmlParser" , key: "Octal escape sequences are not allowed" ); |
1177 | return T_ERROR; |
1178 | |
1179 | case u'\r': |
1180 | case u'\n': |
1181 | case 0x2028u: |
1182 | case 0x2029u: |
1183 | // uncomment the following to use T_MULTILINE_STRING_LITERAL |
1184 | // multilineStringLiteral = true; |
1185 | scanChar(); |
1186 | continue; |
1187 | |
1188 | default: |
1189 | // non escape character |
1190 | u = _state.currentChar; |
1191 | scanChar(); |
1192 | } |
1193 | |
1194 | _tokenText += u; |
1195 | } else { |
1196 | _tokenText += _state.currentChar; |
1197 | scanChar(); |
1198 | } |
1199 | } |
1200 | if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) { |
1201 | if (mode == TemplateContinuation) |
1202 | return T_PARTIAL_TEMPLATE_MIDDLE; |
1203 | else if (mode == TemplateHead) |
1204 | return T_PARTIAL_TEMPLATE_HEAD; |
1205 | else if (mode == SingleQuote) |
1206 | return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL; |
1207 | return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL; |
1208 | } |
1209 | _state.errorCode = UnclosedStringLiteral; |
1210 | _errorMessage = QCoreApplication::translate(context: "QQmlParser" , key: "Unclosed string at end of line" ); |
1211 | return T_ERROR; |
1212 | } |
1213 | |
1214 | int Lexer::scanNumber(QChar ch) |
1215 | { |
1216 | if (ch == u'0') { |
1217 | if (_state.currentChar == u'x' || _state.currentChar == u'X') { |
1218 | ch = _state.currentChar; // remember the x or X to use it in the error message below. |
1219 | |
1220 | // parse hex integer literal |
1221 | scanChar(); // consume 'x' |
1222 | |
1223 | if (!isHexDigit(c: _state.currentChar)) { |
1224 | _state.errorCode = IllegalNumber; |
1225 | _errorMessage = QCoreApplication::translate( |
1226 | context: "QQmlParser" , |
1227 | key: "At least one hexadecimal digit is required after '0%1'" ) |
1228 | .arg(a: ch); |
1229 | return T_ERROR; |
1230 | } |
1231 | |
1232 | double d = 0.; |
1233 | while (1) { |
1234 | int digit = ::hexDigit(c: _state.currentChar); |
1235 | if (digit < 0) |
1236 | break; |
1237 | d *= 16; |
1238 | d += digit; |
1239 | scanChar(); |
1240 | } |
1241 | |
1242 | _state.tokenValue = d; |
1243 | return T_NUMERIC_LITERAL; |
1244 | } else if (_state.currentChar == u'o' || _state.currentChar == u'O') { |
1245 | ch = _state.currentChar; // remember the o or O to use it in the error message below. |
1246 | |
1247 | // parse octal integer literal |
1248 | scanChar(); // consume 'o' |
1249 | |
1250 | if (!isOctalDigit(c: _state.currentChar.unicode())) { |
1251 | _state.errorCode = IllegalNumber; |
1252 | _errorMessage = |
1253 | QCoreApplication::translate( |
1254 | context: "QQmlParser" , key: "At least one octal digit is required after '0%1'" ) |
1255 | .arg(a: ch); |
1256 | return T_ERROR; |
1257 | } |
1258 | |
1259 | double d = 0.; |
1260 | while (1) { |
1261 | int digit = ::octalDigit(c: _state.currentChar); |
1262 | if (digit < 0) |
1263 | break; |
1264 | d *= 8; |
1265 | d += digit; |
1266 | scanChar(); |
1267 | } |
1268 | |
1269 | _state.tokenValue = d; |
1270 | return T_NUMERIC_LITERAL; |
1271 | } else if (_state.currentChar == u'b' || _state.currentChar == u'B') { |
1272 | ch = _state.currentChar; // remember the b or B to use it in the error message below. |
1273 | |
1274 | // parse binary integer literal |
1275 | scanChar(); // consume 'b' |
1276 | |
1277 | if (_state.currentChar.unicode() != u'0' && _state.currentChar.unicode() != u'1') { |
1278 | _state.errorCode = IllegalNumber; |
1279 | _errorMessage = |
1280 | QCoreApplication::translate( |
1281 | context: "QQmlParser" , key: "At least one binary digit is required after '0%1'" ) |
1282 | .arg(a: ch); |
1283 | return T_ERROR; |
1284 | } |
1285 | |
1286 | double d = 0.; |
1287 | while (1) { |
1288 | int digit = 0; |
1289 | if (_state.currentChar.unicode() == u'1') |
1290 | digit = 1; |
1291 | else if (_state.currentChar.unicode() != u'0') |
1292 | break; |
1293 | d *= 2; |
1294 | d += digit; |
1295 | scanChar(); |
1296 | } |
1297 | |
1298 | _state.tokenValue = d; |
1299 | return T_NUMERIC_LITERAL; |
1300 | } else if (_state.currentChar.isDigit() && !qmlMode()) { |
1301 | _state.errorCode = IllegalCharacter; |
1302 | _errorMessage = QCoreApplication::translate(context: "QQmlParser" , |
1303 | key: "Decimal numbers can't start with '0'" ); |
1304 | return T_ERROR; |
1305 | } |
1306 | } |
1307 | |
1308 | // decimal integer literal |
1309 | QVarLengthArray<char,32> chars; |
1310 | chars.append(t: ch.unicode()); |
1311 | |
1312 | if (ch != u'.') { |
1313 | while (_state.currentChar.isDigit()) { |
1314 | chars.append(t: _state.currentChar.unicode()); |
1315 | scanChar(); // consume the digit |
1316 | } |
1317 | |
1318 | if (_state.currentChar == u'.') { |
1319 | chars.append(t: _state.currentChar.unicode()); |
1320 | scanChar(); // consume `.' |
1321 | } |
1322 | } |
1323 | |
1324 | while (_state.currentChar.isDigit()) { |
1325 | chars.append(t: _state.currentChar.unicode()); |
1326 | scanChar(); |
1327 | } |
1328 | |
1329 | if (_state.currentChar == u'e' || _state.currentChar == u'E') { |
1330 | if (_codePtr[0].isDigit() |
1331 | || ((_codePtr[0] == u'+' || _codePtr[0] == u'-') && _codePtr[1].isDigit())) { |
1332 | |
1333 | chars.append(t: _state.currentChar.unicode()); |
1334 | scanChar(); // consume `e' |
1335 | |
1336 | if (_state.currentChar == u'+' || _state.currentChar == u'-') { |
1337 | chars.append(t: _state.currentChar.unicode()); |
1338 | scanChar(); // consume the sign |
1339 | } |
1340 | |
1341 | while (_state.currentChar.isDigit()) { |
1342 | chars.append(t: _state.currentChar.unicode()); |
1343 | scanChar(); |
1344 | } |
1345 | } |
1346 | } |
1347 | |
1348 | const char *begin = chars.constData(); |
1349 | const char *end = nullptr; |
1350 | bool ok = false; |
1351 | |
1352 | _state.tokenValue = qstrntod(s00: begin, len: chars.size(), se: &end, ok: &ok); |
1353 | |
1354 | if (end - begin != chars.size()) { |
1355 | _state.errorCode = IllegalExponentIndicator; |
1356 | _errorMessage = |
1357 | QCoreApplication::translate(context: "QQmlParser" , key: "Illegal syntax for exponential number" ); |
1358 | return T_ERROR; |
1359 | } |
1360 | |
1361 | return T_NUMERIC_LITERAL; |
1362 | } |
1363 | |
1364 | int Lexer::scanVersionNumber(QChar ch) |
1365 | { |
1366 | if (ch == u'0') { |
1367 | _state.tokenValue = 0; |
1368 | return T_VERSION_NUMBER; |
1369 | } |
1370 | |
1371 | int acc = 0; |
1372 | acc += ch.digitValue(); |
1373 | |
1374 | while (_state.currentChar.isDigit()) { |
1375 | acc *= 10; |
1376 | acc += _state.currentChar.digitValue(); |
1377 | scanChar(); // consume the digit |
1378 | } |
1379 | |
1380 | _state.tokenValue = acc; |
1381 | return T_VERSION_NUMBER; |
1382 | } |
1383 | |
1384 | bool Lexer::scanRegExp(RegExpBodyPrefix prefix) |
1385 | { |
1386 | _tokenText.resize(size: 0); |
1387 | _state.validTokenText = true; |
1388 | _state.patternFlags = 0; |
1389 | |
1390 | if (prefix == EqualPrefix) |
1391 | _tokenText += u'='; |
1392 | |
1393 | while (true) { |
1394 | switch (_state.currentChar.unicode()) { |
1395 | case u'/': |
1396 | scanChar(); |
1397 | |
1398 | // scan the flags |
1399 | _state.patternFlags = 0; |
1400 | while (isIdentLetter(c: _state.currentChar)) { |
1401 | int flag = regExpFlagFromChar(ch: _state.currentChar); |
1402 | if (flag == 0 || _state.patternFlags & flag) { |
1403 | _errorMessage = QCoreApplication::translate( |
1404 | context: "QQmlParser" , key: "Invalid regular expression flag '%0'" ) |
1405 | .arg(a: QChar(_state.currentChar)); |
1406 | return false; |
1407 | } |
1408 | _state.patternFlags |= flag; |
1409 | scanChar(); |
1410 | } |
1411 | |
1412 | _tokenLength = _codePtr - _tokenStartPtr - 1; |
1413 | return true; |
1414 | |
1415 | case u'\\': |
1416 | // regular expression backslash sequence |
1417 | _tokenText += _state.currentChar; |
1418 | scanChar(); |
1419 | |
1420 | if (_codePtr > _endPtr || isLineTerminator()) { |
1421 | _errorMessage = QCoreApplication::translate( |
1422 | context: "QQmlParser" , key: "Unterminated regular expression backslash sequence" ); |
1423 | return false; |
1424 | } |
1425 | |
1426 | _tokenText += _state.currentChar; |
1427 | scanChar(); |
1428 | break; |
1429 | |
1430 | case u'[': |
1431 | // regular expression class |
1432 | _tokenText += _state.currentChar; |
1433 | scanChar(); |
1434 | |
1435 | while (_codePtr <= _endPtr && !isLineTerminator()) { |
1436 | if (_state.currentChar == u']') |
1437 | break; |
1438 | else if (_state.currentChar == u'\\') { |
1439 | // regular expression backslash sequence |
1440 | _tokenText += _state.currentChar; |
1441 | scanChar(); |
1442 | |
1443 | if (_codePtr > _endPtr || isLineTerminator()) { |
1444 | _errorMessage = QCoreApplication::translate( |
1445 | context: "QQmlParser" , key: "Unterminated regular expression backslash sequence" ); |
1446 | return false; |
1447 | } |
1448 | |
1449 | _tokenText += _state.currentChar; |
1450 | scanChar(); |
1451 | } else { |
1452 | _tokenText += _state.currentChar; |
1453 | scanChar(); |
1454 | } |
1455 | } |
1456 | |
1457 | if (_state.currentChar != u']') { |
1458 | _errorMessage = QCoreApplication::translate( |
1459 | context: "QQmlParser" , key: "Unterminated regular expression class" ); |
1460 | return false; |
1461 | } |
1462 | |
1463 | _tokenText += _state.currentChar; |
1464 | scanChar(); // skip ] |
1465 | break; |
1466 | |
1467 | default: |
1468 | if (_codePtr > _endPtr || isLineTerminator()) { |
1469 | _errorMessage = QCoreApplication::translate( |
1470 | context: "QQmlParser" , key: "Unterminated regular expression literal" ); |
1471 | return false; |
1472 | } else { |
1473 | _tokenText += _state.currentChar; |
1474 | scanChar(); |
1475 | } |
1476 | } // switch |
1477 | } // while |
1478 | |
1479 | return false; |
1480 | } |
1481 | |
1482 | bool Lexer::isLineTerminator() const |
1483 | { |
1484 | const ushort unicode = _state.currentChar.unicode(); |
1485 | return unicode == 0x000Au |
1486 | || unicode == 0x000Du |
1487 | || unicode == 0x2028u |
1488 | || unicode == 0x2029u; |
1489 | } |
1490 | |
1491 | unsigned Lexer::isLineTerminatorSequence() const |
1492 | { |
1493 | switch (_state.currentChar.unicode()) { |
1494 | case 0x000Au: |
1495 | case 0x2028u: |
1496 | case 0x2029u: |
1497 | return 1; |
1498 | case 0x000Du: |
1499 | if (_codePtr->unicode() == 0x000Au) |
1500 | return 2; |
1501 | else |
1502 | return 1; |
1503 | default: |
1504 | return 0; |
1505 | } |
1506 | } |
1507 | |
1508 | bool Lexer::isIdentLetter(QChar ch) |
1509 | { |
1510 | // ASCII-biased, since all reserved words are ASCII, aand hence the |
1511 | // bulk of content to be parsed. |
1512 | if ((ch >= u'a' && ch <= u'z') |
1513 | || (ch >= u'A' && ch <= u'Z') |
1514 | || ch == u'$' || ch == u'_') |
1515 | return true; |
1516 | if (ch.unicode() < 128) |
1517 | return false; |
1518 | return ch.isLetterOrNumber(); |
1519 | } |
1520 | |
1521 | bool Lexer::isDecimalDigit(ushort c) |
1522 | { |
1523 | return (c >= u'0' && c <= u'9'); |
1524 | } |
1525 | |
1526 | bool Lexer::isHexDigit(QChar c) |
1527 | { |
1528 | return ((c >= u'0' && c <= u'9') |
1529 | || (c >= u'a' && c <= u'f') |
1530 | || (c >= u'A' && c <= u'F')); |
1531 | } |
1532 | |
1533 | bool Lexer::isOctalDigit(ushort c) |
1534 | { |
1535 | return (c >= u'0' && c <= u'7'); |
1536 | } |
1537 | |
1538 | QString Lexer::tokenText() const |
1539 | { |
1540 | if (_state.validTokenText) |
1541 | return _tokenText; |
1542 | |
1543 | if (_state.tokenKind == T_STRING_LITERAL) |
1544 | return QString(_tokenStartPtr + 1, _tokenLength - 2); |
1545 | |
1546 | return QString(_tokenStartPtr, _tokenLength); |
1547 | } |
1548 | |
1549 | Lexer::Error Lexer::errorCode() const |
1550 | { |
1551 | return _state.errorCode; |
1552 | } |
1553 | |
1554 | QString Lexer::errorMessage() const |
1555 | { |
1556 | return _errorMessage; |
1557 | } |
1558 | |
1559 | void Lexer::syncProhibitAutomaticSemicolon() |
1560 | { |
1561 | if (_state.parenthesesState == BalancedParentheses) { |
1562 | // we have seen something like "if (foo)", which means we should |
1563 | // never insert an automatic semicolon at this point, since it would |
1564 | // then be expanded into an empty statement (ECMA-262 7.9.1) |
1565 | _state.prohibitAutomaticSemicolon = true; |
1566 | _state.parenthesesState = IgnoreParentheses; |
1567 | } else { |
1568 | _state.prohibitAutomaticSemicolon = false; |
1569 | } |
1570 | } |
1571 | |
1572 | bool Lexer::prevTerminator() const |
1573 | { |
1574 | return _state.terminator; |
1575 | } |
1576 | |
1577 | bool Lexer::followsClosingBrace() const |
1578 | { |
1579 | return _state.followsClosingBrace; |
1580 | } |
1581 | |
1582 | bool Lexer::canInsertAutomaticSemicolon(int token) const |
1583 | { |
1584 | return token == T_RBRACE || token == EOF_SYMBOL || _state.terminator |
1585 | || _state.followsClosingBrace; |
1586 | } |
1587 | |
1588 | static const int uriTokens[] = { |
1589 | QQmlJSGrammar::T_IDENTIFIER, |
1590 | QQmlJSGrammar::T_PROPERTY, |
1591 | QQmlJSGrammar::T_SIGNAL, |
1592 | QQmlJSGrammar::T_READONLY, |
1593 | QQmlJSGrammar::T_ON, |
1594 | QQmlJSGrammar::T_BREAK, |
1595 | QQmlJSGrammar::T_CASE, |
1596 | QQmlJSGrammar::T_CATCH, |
1597 | QQmlJSGrammar::T_CONTINUE, |
1598 | QQmlJSGrammar::T_DEFAULT, |
1599 | QQmlJSGrammar::T_DELETE, |
1600 | QQmlJSGrammar::T_DO, |
1601 | QQmlJSGrammar::T_ELSE, |
1602 | QQmlJSGrammar::T_FALSE, |
1603 | QQmlJSGrammar::T_FINALLY, |
1604 | QQmlJSGrammar::T_FOR, |
1605 | QQmlJSGrammar::T_FUNCTION, |
1606 | QQmlJSGrammar::T_FUNCTION_STAR, |
1607 | QQmlJSGrammar::T_IF, |
1608 | QQmlJSGrammar::T_IN, |
1609 | QQmlJSGrammar::T_OF, |
1610 | QQmlJSGrammar::T_INSTANCEOF, |
1611 | QQmlJSGrammar::T_NEW, |
1612 | QQmlJSGrammar::T_NULL, |
1613 | QQmlJSGrammar::T_RETURN, |
1614 | QQmlJSGrammar::T_SWITCH, |
1615 | QQmlJSGrammar::T_THIS, |
1616 | QQmlJSGrammar::T_THROW, |
1617 | QQmlJSGrammar::T_TRUE, |
1618 | QQmlJSGrammar::T_TRY, |
1619 | QQmlJSGrammar::T_TYPEOF, |
1620 | QQmlJSGrammar::T_VAR, |
1621 | QQmlJSGrammar::T_VOID, |
1622 | QQmlJSGrammar::T_WHILE, |
1623 | QQmlJSGrammar::T_CONST, |
1624 | QQmlJSGrammar::T_DEBUGGER, |
1625 | QQmlJSGrammar::T_RESERVED_WORD, |
1626 | QQmlJSGrammar::T_WITH, |
1627 | |
1628 | QQmlJSGrammar::EOF_SYMBOL |
1629 | }; |
1630 | static inline bool isUriToken(int token) |
1631 | { |
1632 | const int *current = uriTokens; |
1633 | while (*current != QQmlJSGrammar::EOF_SYMBOL) { |
1634 | if (*current == token) |
1635 | return true; |
1636 | ++current; |
1637 | } |
1638 | return false; |
1639 | } |
1640 | |
1641 | bool Lexer::scanDirectives(Directives *directives, DiagnosticMessage *error) |
1642 | { |
1643 | auto setError = [error, this](QString message) { |
1644 | error->message = std::move(message); |
1645 | error->loc.startLine = tokenStartLine(); |
1646 | error->loc.startColumn = tokenStartColumn(); |
1647 | }; |
1648 | |
1649 | QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true); |
1650 | Q_ASSERT(!_qmlMode); |
1651 | |
1652 | lex(); // fetch the first token |
1653 | |
1654 | if (_state.tokenKind != T_DOT) |
1655 | return true; |
1656 | |
1657 | do { |
1658 | const int lineNumber = tokenStartLine(); |
1659 | const int column = tokenStartColumn(); |
1660 | |
1661 | lex(); // skip T_DOT |
1662 | |
1663 | if (!(_state.tokenKind == T_IDENTIFIER || _state.tokenKind == T_IMPORT)) |
1664 | return true; // expected a valid QML/JS directive |
1665 | |
1666 | const QString directiveName = tokenText(); |
1667 | |
1668 | if (! (directiveName == QLatin1String("pragma" ) || |
1669 | directiveName == QLatin1String("import" ))) { |
1670 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "Syntax error" )); |
1671 | return false; // not a valid directive name |
1672 | } |
1673 | |
1674 | // it must be a pragma or an import directive. |
1675 | if (directiveName == QLatin1String("pragma" )) { |
1676 | // .pragma library |
1677 | if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library" ))) { |
1678 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "Syntax error" )); |
1679 | return false; // expected `library |
1680 | } |
1681 | |
1682 | // we found a .pragma library directive |
1683 | directives->pragmaLibrary(); |
1684 | |
1685 | } else { |
1686 | Q_ASSERT(directiveName == QLatin1String("import" )); |
1687 | lex(); // skip .import |
1688 | |
1689 | QString pathOrUri; |
1690 | QString version; |
1691 | bool fileImport = false; // file or uri import |
1692 | |
1693 | if (_state.tokenKind == T_STRING_LITERAL) { |
1694 | // .import T_STRING_LITERAL as T_IDENTIFIER |
1695 | |
1696 | fileImport = true; |
1697 | pathOrUri = tokenText(); |
1698 | |
1699 | if (!pathOrUri.endsWith(s: QLatin1String("js" ))) { |
1700 | setError(QCoreApplication::translate(context: "QQmlParser" ,key: "Imported file must be a script" )); |
1701 | return false; |
1702 | } |
1703 | lex(); |
1704 | |
1705 | } else if (_state.tokenKind == T_IDENTIFIER) { |
1706 | // .import T_IDENTIFIER (. T_IDENTIFIER)* (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER |
1707 | while (true) { |
1708 | if (!isUriToken(token: _state.tokenKind)) { |
1709 | setError(QCoreApplication::translate(context: "QQmlParser" ,key: "Invalid module URI" )); |
1710 | return false; |
1711 | } |
1712 | |
1713 | pathOrUri.append(s: tokenText()); |
1714 | |
1715 | lex(); |
1716 | if (tokenStartLine() != lineNumber) { |
1717 | setError(QCoreApplication::translate(context: "QQmlParser" ,key: "Invalid module URI" )); |
1718 | return false; |
1719 | } |
1720 | if (_state.tokenKind != QQmlJSGrammar::T_DOT) |
1721 | break; |
1722 | |
1723 | pathOrUri.append(c: u'.'); |
1724 | |
1725 | lex(); |
1726 | if (tokenStartLine() != lineNumber) { |
1727 | setError(QCoreApplication::translate(context: "QQmlParser" ,key: "Invalid module URI" )); |
1728 | return false; |
1729 | } |
1730 | } |
1731 | |
1732 | if (_state.tokenKind == T_VERSION_NUMBER) { |
1733 | version = tokenText(); |
1734 | lex(); |
1735 | if (_state.tokenKind == T_DOT) { |
1736 | version += u'.'; |
1737 | lex(); |
1738 | if (_state.tokenKind != T_VERSION_NUMBER) { |
1739 | setError(QCoreApplication::translate( |
1740 | context: "QQmlParser" , key: "Incomplete version number (dot but no minor)" )); |
1741 | return false; // expected the module version number |
1742 | } |
1743 | version += tokenText(); |
1744 | lex(); |
1745 | } |
1746 | } |
1747 | } |
1748 | |
1749 | // |
1750 | // recognize the mandatory `as' followed by the module name |
1751 | // |
1752 | if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) { |
1753 | if (fileImport) |
1754 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "File import requires a qualifier" )); |
1755 | else |
1756 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "Module import requires a qualifier" )); |
1757 | if (tokenStartLine() != lineNumber) { |
1758 | error->loc.startLine = lineNumber; |
1759 | error->loc.startColumn = column; |
1760 | } |
1761 | return false; // expected `as' |
1762 | } |
1763 | |
1764 | if (lex() != T_IDENTIFIER || tokenStartLine() != lineNumber) { |
1765 | if (fileImport) |
1766 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "File import requires a qualifier" )); |
1767 | else |
1768 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "Module import requires a qualifier" )); |
1769 | return false; // expected module name |
1770 | } |
1771 | |
1772 | const QString module = tokenText(); |
1773 | if (!module.at(i: 0).isUpper()) { |
1774 | setError(QCoreApplication::translate(context: "QQmlParser" ,key: "Invalid import qualifier" )); |
1775 | return false; |
1776 | } |
1777 | |
1778 | if (fileImport) |
1779 | directives->importFile(jsfile: pathOrUri, module, line: lineNumber, column); |
1780 | else |
1781 | directives->importModule(uri: pathOrUri, version, module, line: lineNumber, column); |
1782 | } |
1783 | |
1784 | if (tokenStartLine() != lineNumber) { |
1785 | setError(QCoreApplication::translate(context: "QQmlParser" , key: "Syntax error" )); |
1786 | return false; // the directives cannot span over multiple lines |
1787 | } |
1788 | |
1789 | // fetch the first token after the .pragma/.import directive |
1790 | lex(); |
1791 | } while (_state.tokenKind == T_DOT); |
1792 | |
1793 | return true; |
1794 | } |
1795 | |
1796 | const Lexer::State &Lexer::state() const |
1797 | { |
1798 | return _state; |
1799 | } |
1800 | void Lexer::setState(const Lexer::State &state) |
1801 | { |
1802 | _state = state; |
1803 | } |
1804 | |
1805 | namespace QQmlJS { |
1806 | QDebug operator<<(QDebug dbg, const Lexer::State &s) |
1807 | { |
1808 | dbg << "{\n" |
1809 | << " errorCode:" << int(s.errorCode) << ",\n" |
1810 | << " currentChar:" << s.currentChar << ",\n" |
1811 | << " tokenValue:" << s.tokenValue << ",\n" |
1812 | << " parenthesesState:" << s.parenthesesState << ",\n" |
1813 | << " parenthesesCount:" << s.parenthesesCount << ",\n" |
1814 | << " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n" |
1815 | << " bracesCount:" << s.bracesCount << ",\n" |
1816 | << " stackToken:" << s.stackToken << ",\n" |
1817 | << " patternFlags:" << s.patternFlags << ",\n" |
1818 | << " tokenKind:" << s.tokenKind << ",\n" |
1819 | << " importState:" << int(s.importState) << ",\n" |
1820 | << " validTokenText:" << s.validTokenText << ",\n" |
1821 | << " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n" |
1822 | << " restrictedKeyword:" << s.restrictedKeyword << ",\n" |
1823 | << " terminator:" << s.terminator << ",\n" |
1824 | << " followsClosingBrace:" << s.followsClosingBrace << ",\n" |
1825 | << " delimited:" << s.delimited << ",\n" |
1826 | << " handlingDirectives:" << s.handlingDirectives << ",\n" |
1827 | << " generatorLevel:" << s.generatorLevel << "\n}" ; |
1828 | return dbg; |
1829 | } |
1830 | } |
1831 | |
1832 | QT_END_NAMESPACE |
1833 | |