qqmljslexer.cpp source code [qtdeclarative/src/qml/parser/qqmljslexer.cpp]

1	// Copyright (C) 2016 The Qt Company Ltd.
2	// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
4	#include "qqmljslexer_p.h"
5	#include "qqmljsengine_p.h"
6	#include "qqmljskeywords_p.h"
7
8	#include <private/qqmljsdiagnosticmessage_p.h>
9	#include <private/qqmljsmemorypool_p.h>
10	#include <private/qlocale_tools_p.h>
11
12
13	#include <QtCore/qcoreapplication.h>
14	#include <QtCore/qvarlengtharray.h>
15	#include <QtCore/qdebug.h>
16	#include <QtCore/QScopedValueRollback>
17
18	#include <optional>
19
20	QT_BEGIN_NAMESPACE
21	using namespace QQmlJS;
22
23	static inline int regExpFlagFromChar(const QChar &ch)
24	{
25	switch (ch.unicode()) {
26	case `'g'`: return Lexer::RegExp_Global;
27	case `'i'`: return Lexer::RegExp_IgnoreCase;
28	case `'m'`: return Lexer::RegExp_Multiline;
29	case `'u'`: return Lexer::RegExp_Unicode;
30	case `'y'`: return Lexer::RegExp_Sticky;
31	}
32	return `0`;
33	}
34
35	static inline unsigned char convertHex(ushort c)
36	{
37	if (c >= `'0'` && c <= `'9'`)
38	return (c - `'0'`);
39	else if (c >= `'a'` && c <= `'f'`)
40	return (c - `'a'` + `10`);
41	else
42	return (c - `'A'` + `10`);
43	}
44
45	static inline QChar convertHex(QChar c1, QChar c2)
46	{
47	return QChar ((convertHex(c: c1.unicode()) << `4`) + convertHex(c: c2.unicode()));
48	}
49
50	Lexer::Lexer(Engine *engine, LexMode lexMode)
51	: _engine(engine), _lexMode(lexMode), _endPtr(nullptr), _qmlMode(true)
52	{
53	if (engine)
54	engine->setLexer(this);
55	}
56
57	bool Lexer::qmlMode() const
58	{
59	return _qmlMode;
60	}
61
62	QString Lexer::code() const
63	{
64	return _code;
65	}
66
67	void Lexer::setCode(const QString &code, int lineno, bool qmlMode,
68	Lexer::CodeContinuation codeContinuation)
69	{
70	if (codeContinuation == Lexer::CodeContinuation::Continue)
71	_currentOffset += _code.size();
72	else
73	_currentOffset = `0`;
74	if (_engine)
75	_engine->setCode(code);
76
77	_qmlMode = qmlMode;
78	_code = code;
79	_skipLinefeed = false;
80
81	_tokenText.clear();
82	_tokenText.reserve(asize: `1024`);
83	_errorMessage.clear();
84	_tokenSpell = QStringView ();
85	_rawString = QStringView ();
86
87	_codePtr = code.unicode();
88	_endPtr = _codePtr + code.size();
89	_tokenStartPtr = _codePtr;
90
91	if (lineno >= `0`)
92	_currentLineNumber = lineno;
93	_currentColumnNumber = `0`;
94	_tokenLine = _currentLineNumber;
95	_tokenColumn = `0`;
96	_tokenLength = `0`;
97
98	if (codeContinuation == Lexer::CodeContinuation::Reset)
99	_state = State {};
100	}
101
102	void Lexer::scanChar()
103	{
104	if (_skipLinefeed) {
105	Q_ASSERT(*_codePtr == u`'\n'`);
106	++_codePtr;
107	_skipLinefeed = false;
108	}
109	_state.currentChar = *_codePtr++;
110	++_currentColumnNumber;
111
112	if (isLineTerminator()) {
113	if (_state.currentChar == u`'\r'`) {
114	if (_codePtr < _endPtr && *_codePtr == u`'\n'`)
115	_skipLinefeed = true;
116	_state.currentChar = u`'\n'`;
117	}
118	++_currentLineNumber;
119	_currentColumnNumber = `0`;
120	}
121	}
122
123	QChar Lexer::peekChar()
124	{
125	auto peekPtr = _codePtr;
126	if (peekPtr < _endPtr)
127	return *peekPtr;
128	return QChar ();
129	}
130
131	namespace {
132	inline bool isBinop(int tok)
133	{
134	switch (tok) {
135	case Lexer::T_AND:
136	case Lexer::T_AND_AND:
137	case Lexer::T_AND_EQ:
138	case Lexer::T_DIVIDE_:
139	case Lexer::T_DIVIDE_EQ:
140	case Lexer::T_EQ:
141	case Lexer::T_EQ_EQ:
142	case Lexer::T_EQ_EQ_EQ:
143	case Lexer::T_GE:
144	case Lexer::T_GT:
145	case Lexer::T_GT_GT:
146	case Lexer::T_GT_GT_EQ:
147	case Lexer::T_GT_GT_GT:
148	case Lexer::T_GT_GT_GT_EQ:
149	case Lexer::T_LE:
150	case Lexer::T_LT:
151	case Lexer::T_LT_LT:
152	case Lexer::T_LT_LT_EQ:
153	case Lexer::T_MINUS:
154	case Lexer::T_MINUS_EQ:
155	case Lexer::T_NOT_EQ:
156	case Lexer::T_NOT_EQ_EQ:
157	case Lexer::T_OR:
158	case Lexer::T_OR_EQ:
159	case Lexer::T_OR_OR:
160	case Lexer::T_PLUS:
161	case Lexer::T_PLUS_EQ:
162	case Lexer::T_REMAINDER:
163	case Lexer::T_REMAINDER_EQ:
164	case Lexer::T_RETURN:
165	case Lexer::T_STAR:
166	case Lexer::T_STAR_EQ:
167	case Lexer::T_XOR:
168	case Lexer::T_XOR_EQ:
169	return true;
170
171	default:
172	return false;
173	}
174	}
175
176	int hexDigit(QChar c)
177	{
178	if (c >= u`'0'` && c <= u`'9'`)
179	return c.unicode() - u`'0'`;
180	if (c >= u`'a'` && c <= u`'f'`)
181	return c.unicode() - u`'a'` + `10`;
182	if (c >= u`'A'` && c <= u`'F'`)
183	return c.unicode() - u`'A'` + `10`;
184	return -`1`;
185	}
186
187	int octalDigit(QChar c)
188	{
189	if (c >= u`'0'` && c <= u`'7'`)
190	return c.unicode() - u`'0'`;
191	return -`1`;
192	}
193
194	} // anonymous namespace
195
196	int Lexer::lex()
197	{
198	const int previousTokenKind = _state.tokenKind;
199	int tokenKind;
200	bool firstPass = true;
201
202	again:
203	tokenKind = T_ERROR;
204	_tokenSpell = QStringView ();
205	_rawString = QStringView ();
206	if (firstPass && _state.stackToken == -`1`) {
207	firstPass = false;
208	if (_codePtr > _endPtr && _lexMode == LexMode::LineByLine && !_code.isEmpty())
209	return T_EOL;
210
211	if (_state.comments == CommentState::InMultilineComment) {
212	scanChar();
213	_tokenStartPtr = _codePtr - `1`;
214	_tokenLine = _currentLineNumber;
215	_tokenColumn = _currentColumnNumber;
216	while (_codePtr <= _endPtr) {
217	if (_state.currentChar == u`'*'`) {
218	scanChar();
219	if (_state.currentChar == u`'/'`) {
220	scanChar();
221	if (_engine) {
222	_engine->addComment(pos: tokenOffset() + `2`,
223	len: _codePtr - _tokenStartPtr - `1` - `4`,
224	line: tokenStartLine(), col: tokenStartColumn() + `2`);
225	}
226	tokenKind = T_COMMENT;
227	break;
228	}
229	} else {
230	scanChar();
231	}
232	}
233	if (tokenKind == T_ERROR)
234	tokenKind = T_PARTIAL_COMMENT;
235	} else {
236	// handle multiline continuation
237	std::optional<ScanStringMode> scanMode;
238	switch (previousTokenKind) {
239	case T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL:
240	scanMode = ScanStringMode::SingleQuote;
241	break;
242	case T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL:
243	scanMode = ScanStringMode::DoubleQuote;
244	break;
245	case T_PARTIAL_TEMPLATE_HEAD:
246	scanMode = ScanStringMode::TemplateHead;
247	break;
248	case T_PARTIAL_TEMPLATE_MIDDLE:
249	scanMode = ScanStringMode::TemplateContinuation;
250	break;
251	default:
252	break;
253	}
254	if (scanMode) {
255	scanChar();
256	_tokenStartPtr = _codePtr - `1`;
257	_tokenLine = _currentLineNumber;
258	_tokenColumn = _currentColumnNumber;
259	tokenKind = scanString(mode: *scanMode);
260	}
261	}
262	}
263	if (tokenKind == T_ERROR)
264	tokenKind = scanToken();
265	_tokenLength = _codePtr - _tokenStartPtr - `1`;
266	switch (tokenKind) {
267	// end of line and comments should not "overwrite" the old token type...
268	case T_EOL:
269	return tokenKind;
270	case T_COMMENT:
271	_state.comments = CommentState::HadComment;
272	return tokenKind;
273	case T_PARTIAL_COMMENT:
274	_state.comments = CommentState::InMultilineComment;
275	return tokenKind;
276	default:
277	_state.comments = CommentState::NoComment;
278	break;
279	}
280	_state.tokenKind = tokenKind;
281
282	_state.delimited = false;
283	_state.restrictedKeyword = false;
284	_state.followsClosingBrace = (previousTokenKind == T_RBRACE);
285
286	// update the flags
287	switch (_state.tokenKind) {
288	case T_LBRACE:
289	if (_state.bracesCount > `0`)
290	++_state.bracesCount;
291	Q_FALLTHROUGH();
292	case T_SEMICOLON:
293	_state.importState = ImportState::NoQmlImport;
294	Q_FALLTHROUGH();
295	case T_QUESTION:
296	case T_COLON:
297	case T_TILDE:
298	_state.delimited = true;
299	break;
300	case T_AUTOMATIC_SEMICOLON:
301	case T_AS:
302	_state.importState = ImportState::NoQmlImport;
303	Q_FALLTHROUGH();
304	default:
305	if (isBinop(tok: _state.tokenKind))
306	_state.delimited = true;
307	break;
308
309	case T_IMPORT:
310	if (qmlMode() \|\| (_state.handlingDirectives && previousTokenKind == T_DOT))
311	_state.importState = ImportState::SawImport;
312	if (isBinop(tok: _state.tokenKind))
313	_state.delimited = true;
314	break;
315
316	case T_IF:
317	case T_FOR:
318	case T_WHILE:
319	case T_WITH:
320	_state.parenthesesState = CountParentheses;
321	_state.parenthesesCount = `0`;
322	break;
323
324	case T_ELSE:
325	case T_DO:
326	_state.parenthesesState = BalancedParentheses;
327	break;
328
329	case T_CONTINUE:
330	case T_BREAK:
331	case T_RETURN:
332	case T_YIELD:
333	case T_THROW:
334	_state.restrictedKeyword = true;
335	break;
336	case T_RBRACE:
337	if (_state.bracesCount > `0`)
338	--_state.bracesCount;
339	if (_state.bracesCount == `0`)
340	goto again;
341	} // switch
342
343	// update the parentheses state
344	switch (_state.parenthesesState) {
345	case IgnoreParentheses:
346	break;
347
348	case CountParentheses:
349	if (_state.tokenKind == T_RPAREN) {
350	--_state.parenthesesCount;
351	if (_state.parenthesesCount == `0`)
352	_state.parenthesesState = BalancedParentheses;
353	} else if (_state.tokenKind == T_LPAREN) {
354	++_state.parenthesesCount;
355	}
356	break;
357
358	case BalancedParentheses:
359	if (_state.tokenKind != T_DO && _state.tokenKind != T_ELSE)
360	_state.parenthesesState = IgnoreParentheses;
361	break;
362	} // switch
363
364	return _state.tokenKind;
365	}
366
367	uint Lexer::decodeUnicodeEscapeCharacter(bool *ok)
368	{
369	Q_ASSERT(_state.currentChar == u`'u'`);
370	scanChar(); // skip u
371	constexpr int distanceFromFirstHexToLastHex = `3`;
372	if (_codePtr + distanceFromFirstHexToLastHex <= _endPtr && isHexDigit(c: _state.currentChar)) {
373	uint codePoint = `0`;
374	for (int i = `0`; i < `4`; ++i) {
375	int digit = hexDigit(c: _state.currentChar);
376	if (digit < `0`)
377	goto error;
378	codePoint *= `16`;
379	codePoint += digit;
380	scanChar();
381	}
382
383	ok = true*;
384	return codePoint;
385	} else if (_codePtr < _endPtr && _state.currentChar == u`'{'`) {
386	scanChar(); // skip '{'
387	uint codePoint = `0`;
388	if (!isHexDigit(c: _state.currentChar))
389	// need at least one hex digit
390	goto error;
391
392	while (_codePtr <= _endPtr) {
393	int digit = hexDigit(c: _state.currentChar);
394	if (digit < `0`)
395	break;
396	codePoint *= `16`;
397	codePoint += digit;
398	if (codePoint > `0x10ffff`)
399	goto error;
400	scanChar();
401	}
402
403	if (_state.currentChar != u`'}'`)
404	goto error;
405
406	scanChar(); // skip '}'
407
408
409	ok = true*;
410	return codePoint;
411	}
412
413	error:
414	_state.errorCode = IllegalUnicodeEscapeSequence;
415	_errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Illegal unicode escape sequence");
416
417	ok = false*;
418	return `0`;
419	}
420
421	QChar Lexer::decodeHexEscapeCharacter(bool *ok)
422	{
423	if (isHexDigit(c: _codePtr[`0`]) && isHexDigit(c: _codePtr[`1`])) {
424	scanChar();
425
426	const QChar c1 = _state.currentChar;
427	scanChar();
428
429	const QChar c2 = _state.currentChar;
430	scanChar();
431
432	if (ok)
433	ok = true*;
434
435	return convertHex(c1, c2);
436	}
437
438	ok = false*;
439	return QChar ();
440	}
441
442	namespace QQmlJS {
443	QDebug operator<<(QDebug dbg, const Lexer &l)
444	{
445	dbg << "{\n"
446	<< " engine:" << qsizetype(l._engine) << ",\n"
447	<< " lexMode:" << int(l._lexMode) << ",\n"
448	<< " code.size:" << qsizetype(l._code.unicode()) << "+" << l._code.size() << ",\n"
449	<< " endPtr: codePtr + " << (l._endPtr - l._codePtr) << ",\n"
450	<< " qmlMode:" << l._qmlMode << ",\n"
451	<< " staticIsKeyword:" << l._staticIsKeyword << ",\n"
452	<< " currentLineNumber:" << l._currentLineNumber << ",\n"
453	<< " currentColumnNumber:" << l._currentColumnNumber << ",\n"
454	<< " currentOffset:" << l._currentOffset << ",\n"
455	<< " tokenLength:" << l._tokenLength << ",\n"
456	<< " tokenLine:" << l._tokenLine << ",\n"
457	<< " tokenColumn:" << l._tokenColumn << ",\n"
458	<< " tokenText:" << l._tokenText << ",\n"
459	<< " skipLinefeed:" << l._skipLinefeed << ",\n"
460	<< " errorMessage:" << l._errorMessage << ",\n"
461	<< " tokenSpell:" << l._tokenSpell << ",\n"
462	<< " rawString:" << l._rawString << ",\n";
463	if (l._codePtr)
464	dbg << " codePtr: code.unicode()+" << (l._codePtr - l._code.unicode()) << ",\n";
465	else
466	dbg << " codePtr: null,\n";
467	if (l._tokenStartPtr)
468	dbg << " tokenStartPtr: codePtr " << (l._tokenStartPtr - l._codePtr) << ",\n";
469	else
470	dbg << " tokenStartPtr: null,\n";
471	dbg << " state:" << l._state << "\n}";
472	return dbg;
473	}
474	}
475
476	static inline bool isIdentifierStart(uint ch)
477	{
478	// fast path for ascii
479	if ((ch >= u`'a'` && ch <= u`'z'`) \|\|
480	(ch >= u`'A'` && ch <= u`'Z'`) \|\|
481	ch == u`'$'` \|\| ch == u`'_'`)
482	return true;
483
484	switch (QChar::category(ucs4: ch)) {
485	case QChar::Number_Letter:
486	case QChar::Letter_Uppercase:
487	case QChar::Letter_Lowercase:
488	case QChar::Letter_Titlecase:
489	case QChar::Letter_Modifier:
490	case QChar::Letter_Other:
491	return true;
492	default:
493	break;
494	}
495	return false;
496	}
497
498	static bool isIdentifierPart(uint ch)
499	{
500	// fast path for ascii
501	if ((ch >= u`'a'` && ch <= u`'z'`) \|\|
502	(ch >= u`'A'` && ch <= u`'Z'`) \|\|
503	(ch >= u`'0'` && ch <= u`'9'`) \|\|
504	ch == u`'$'` \|\| ch == u`'_'` \|\|
505	ch == `0x200c` / ZWNJ / \|\| ch == `0x200d` / ZWJ /)
506	return true;
507
508	switch (QChar::category(ucs4: ch)) {
509	case QChar::Mark_NonSpacing:
510	case QChar::Mark_SpacingCombining:
511
512	case QChar::Number_DecimalDigit:
513	case QChar::Number_Letter:
514
515	case QChar::Letter_Uppercase:
516	case QChar::Letter_Lowercase:
517	case QChar::Letter_Titlecase:
518	case QChar::Letter_Modifier:
519	case QChar::Letter_Other:
520
521	case QChar::Punctuation_Connector:
522	return true;
523	default:
524	break;
525	}
526	return false;
527	}
528
529	int Lexer::scanToken()
530	{
531	if (_state.stackToken != -`1`) {
532	int tk = _state.stackToken;
533	_state.stackToken = -`1`;
534	return tk;
535	}
536
537	if (_state.bracesCount == `0`) {
538	// we're inside a Template string
539	return scanString(mode: TemplateContinuation);
540	}
541
542	if (_state.comments == CommentState::NoComment)
543	_state.terminator = false;
544
545	again:
546	_state.validTokenText = false;
547
548	while (_state.currentChar.isSpace()) {
549	if (isLineTerminator()) {
550	bool isAtEnd = (_codePtr + (_skipLinefeed ? `1` : `0`)) == _endPtr;
551	if (_state.restrictedKeyword) {
552	// automatic semicolon insertion
553	_tokenLine = _currentLineNumber;
554	_tokenColumn = _currentColumnNumber;
555	_tokenStartPtr = _codePtr - `1`;
556	return T_SEMICOLON;
557	} else if (_lexMode == LexMode::WholeCode \|\| !isAtEnd) {
558	_state.terminator = true;
559	syncProhibitAutomaticSemicolon();
560	} // else we will do the previous things at the start of next line...
561	}
562
563	scanChar();
564	}
565
566	_tokenStartPtr = _codePtr - `1`;
567	_tokenLine = _currentLineNumber;
568	_tokenColumn = _currentColumnNumber;
569
570	if (_codePtr >= _endPtr) {
571	if (_lexMode == LexMode::LineByLine) {
572	if (!_code.isEmpty()) {
573	_state.currentChar = *(_codePtr - `2`);
574	return T_EOL;
575	} else {
576	return EOF_SYMBOL;
577	}
578	} else if (_codePtr > _endPtr) {
579	return EOF_SYMBOL;
580	}
581	}
582
583	const QChar ch = _state.currentChar;
584	scanChar();
585
586	switch (ch.unicode()) {
587	case u`'~'`: return T_TILDE;
588	case u`'}'`: return T_RBRACE;
589
590	case u`'\|'`:
591	if (_state.currentChar == u`'\|'`) {
592	scanChar();
593	return T_OR_OR;
594	} else if (_state.currentChar == u`'='`) {
595	scanChar();
596	return T_OR_EQ;
597	}
598	return T_OR;
599
600	case u`'{'`: return T_LBRACE;
601
602	case u`'^'`:
603	if (_state.currentChar == u`'='`) {
604	scanChar();
605	return T_XOR_EQ;
606	}
607	return T_XOR;
608
609	case u`']'`: return T_RBRACKET;
610	case u`'['`: return T_LBRACKET;
611	case u`'?'`: {
612	if (_state.currentChar == u`'?'`) {
613	scanChar();
614	return T_QUESTION_QUESTION;
615	}
616	if (_state.currentChar == u`'.'` && !peekChar().isDigit()) {
617	scanChar();
618	return T_QUESTION_DOT;
619	}
620
621	return T_QUESTION;
622	}
623
624	case u`'>'`:
625	if (_state.currentChar == u`'>'`) {
626	scanChar();
627	if (_state.currentChar == u`'>'`) {
628	scanChar();
629	if (_state.currentChar == u`'='`) {
630	scanChar();
631	return T_GT_GT_GT_EQ;
632	}
633	return T_GT_GT_GT;
634	} else if (_state.currentChar == u`'='`) {
635	scanChar();
636	return T_GT_GT_EQ;
637	}
638	return T_GT_GT;
639	} else if (_state.currentChar == u`'='`) {
640	scanChar();
641	return T_GE;
642	}
643	return T_GT;
644
645	case u`'='`:
646	if (_state.currentChar == u`'='`) {
647	scanChar();
648	if (_state.currentChar == u`'='`) {
649	scanChar();
650	return T_EQ_EQ_EQ;
651	}
652	return T_EQ_EQ;
653	} else if (_state.currentChar == u`'>'`) {
654	scanChar();
655	return T_ARROW;
656	}
657	return T_EQ;
658
659	case u`'<'`:
660	if (_state.currentChar == u`'='`) {
661	scanChar();
662	return T_LE;
663	} else if (_state.currentChar == u`'<'`) {
664	scanChar();
665	if (_state.currentChar == u`'='`) {
666	scanChar();
667	return T_LT_LT_EQ;
668	}
669	return T_LT_LT;
670	}
671	return T_LT;
672
673	case u`';'`: return T_SEMICOLON;
674	case u`':'`: return T_COLON;
675
676	case u`'/'`:
677	switch (_state.currentChar.unicode()) {
678	case u`'*'`:
679	scanChar();
680	while (_codePtr <= _endPtr) {
681	if (_state.currentChar == u`'*'`) {
682	scanChar();
683	if (_state.currentChar == u`'/'`) {
684	scanChar();
685	if (_engine) {
686	_engine->addComment(pos: tokenOffset() + `2`,
687	len: _codePtr - _tokenStartPtr - `1` - `4`, line: tokenStartLine(),
688	col: tokenStartColumn() + `2`);
689	}
690	if (_lexMode == LexMode::LineByLine)
691	return T_COMMENT;
692	else
693	goto again;
694	}
695	} else {
696	scanChar();
697	}
698	}
699	if (_lexMode == LexMode::LineByLine)
700	return T_PARTIAL_COMMENT;
701	else
702	goto again;
703	case u`'/'`:
704	while (_codePtr <= _endPtr && !isLineTerminator()) {
705	scanChar();
706	}
707	if (_engine) {
708	_engine->addComment(pos: tokenOffset() + `2`, len: _codePtr - _tokenStartPtr - `1` - `2`,
709	line: tokenStartLine(), col: tokenStartColumn() + `2`);
710	}
711	if (_lexMode == LexMode::LineByLine)
712	return T_COMMENT;
713	else
714	goto again;
715	case u`'='`:
716	scanChar();
717	return T_DIVIDE_EQ;
718	default:
719	return T_DIVIDE_;
720	}
721	case u`'.'`:
722	if (_state.importState == ImportState::SawImport)
723	return T_DOT;
724	if (isDecimalDigit(c: _state.currentChar.unicode()))
725	return scanNumber(ch);
726	if (_state.currentChar == u`'.'`) {
727	scanChar();
728	if (_state.currentChar == u`'.'`) {
729	scanChar();
730	return T_ELLIPSIS;
731	} else {
732	_state.errorCode = IllegalCharacter;
733	_errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unexpected token '.'");
734	return T_ERROR;
735	}
736	}
737	return T_DOT;
738
739	case u`'-'`:
740	if (_state.currentChar == u`'='`) {
741	scanChar();
742	return T_MINUS_EQ;
743	} else if (_state.currentChar == u`'-'`) {
744	scanChar();
745
746	if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
747	&& _state.tokenKind != T_LPAREN) {
748	_state.stackToken = T_MINUS_MINUS;
749	return T_SEMICOLON;
750	}
751
752	return T_MINUS_MINUS;
753	}
754	return T_MINUS;
755
756	case u`','`: return T_COMMA;
757
758	case u`'+'`:
759	if (_state.currentChar == u`'='`) {
760	scanChar();
761	return T_PLUS_EQ;
762	} else if (_state.currentChar == u`'+'`) {
763	scanChar();
764
765	if (_state.terminator && !_state.delimited && !_state.prohibitAutomaticSemicolon
766	&& _state.tokenKind != T_LPAREN) {
767	_state.stackToken = T_PLUS_PLUS;
768	return T_SEMICOLON;
769	}
770
771	return T_PLUS_PLUS;
772	}
773	return T_PLUS;
774
775	case u`'*'`:
776	if (_state.currentChar == u`'='`) {
777	scanChar();
778	return T_STAR_EQ;
779	} else if (_state.currentChar == u`'*'`) {
780	scanChar();
781	if (_state.currentChar == u`'='`) {
782	scanChar();
783	return T_STAR_STAR_EQ;
784	}
785	return T_STAR_STAR;
786	}
787	return T_STAR;
788
789	case u`')'`: return T_RPAREN;
790	case u`'('`: return T_LPAREN;
791
792	case u`'@'`: return T_AT;
793
794	case u`'&'`:
795	if (_state.currentChar == u`'='`) {
796	scanChar();
797	return T_AND_EQ;
798	} else if (_state.currentChar == u`'&'`) {
799	scanChar();
800	return T_AND_AND;
801	}
802	return T_AND;
803
804	case u`'%'`:
805	if (_state.currentChar == u`'='`) {
806	scanChar();
807	return T_REMAINDER_EQ;
808	}
809	return T_REMAINDER;
810
811	case u`'!'`:
812	if (_state.currentChar == u`'='`) {
813	scanChar();
814	if (_state.currentChar == u`'='`) {
815	scanChar();
816	return T_NOT_EQ_EQ;
817	}
818	return T_NOT_EQ;
819	}
820	return T_NOT;
821
822	case u'`':
823	_state.outerTemplateBraceCount.push(t: _state.bracesCount);
824	Q_FALLTHROUGH();
825	case u`'\''`:
826	case u`'"'`:
827	return scanString(mode: ScanStringMode(ch.unicode()));
828	case u`'0'`:
829	case u`'1'`:
830	case u`'2'`:
831	case u`'3'`:
832	case u`'4'`:
833	case u`'5'`:
834	case u`'6'`:
835	case u`'7'`:
836	case u`'8'`:
837	case u`'9'`:
838	if (_state.importState == ImportState::SawImport)
839	return scanVersionNumber(ch);
840	else
841	return scanNumber(ch);
842
843	case `'#'`:
844	if (_currentLineNumber == `1` && _currentColumnNumber == `2`) {
845	// shebang support
846	while (_codePtr <= _endPtr && !isLineTerminator()) {
847	scanChar();
848	}
849	if (_engine) {
850	_engine->addComment(pos: tokenOffset(), len: _codePtr - _tokenStartPtr - `1`, line: tokenStartLine(),
851	col: tokenStartColumn());
852	}
853	if (_lexMode == LexMode::LineByLine)
854	return T_COMMENT;
855	else
856	goto again;
857	}
858	Q_FALLTHROUGH();
859
860	default: {
861	uint c = ch.unicode();
862	bool identifierWithEscapeChars = false;
863	if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _state.currentChar.unicode())) {
864	c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
865	scanChar();
866	} else if (c == `'\\'` && _state.currentChar == u`'u'`) {
867	identifierWithEscapeChars = true;
868	bool ok = false;
869	c = decodeUnicodeEscapeCharacter(ok: &ok);
870	if (!ok)
871	return T_ERROR;
872	}
873	if (isIdentifierStart(ch: c)) {
874	if (identifierWithEscapeChars) {
875	_tokenText.resize(size: `0`);
876	if (QChar::requiresSurrogates(ucs4: c)) {
877	_tokenText += QChar (QChar::highSurrogate(ucs4: c));
878	_tokenText += QChar (QChar::lowSurrogate(ucs4: c));
879	} else {
880	_tokenText += QChar (c);
881	}
882	_state.validTokenText = true;
883	}
884	while (_codePtr <= _endPtr) {
885	c = _state.currentChar.unicode();
886	if (QChar::isHighSurrogate(ucs4: c) && QChar::isLowSurrogate(ucs4: _codePtr->unicode())) {
887	scanChar();
888	c = QChar::surrogateToUcs4(high: ushort(c), low: _state.currentChar.unicode());
889	} else if (_state.currentChar == u`'\\'` && _codePtr[`0`] == u`'u'`) {
890	if (!identifierWithEscapeChars) {
891	identifierWithEscapeChars = true;
892	_tokenText.resize(size: `0`);
893	_tokenText.insert(i: `0`, uc: _tokenStartPtr, len: _codePtr - _tokenStartPtr - `1`);
894	_state.validTokenText = true;
895	}
896
897	scanChar(); // skip '\\'
898	bool ok = false;
899	c = decodeUnicodeEscapeCharacter(ok: &ok);
900	if (!ok)
901	return T_ERROR;
902
903	if (!isIdentifierPart(ch: c))
904	break;
905
906	if (QChar::requiresSurrogates(ucs4: c)) {
907	_tokenText += QChar (QChar::highSurrogate(ucs4: c));
908	_tokenText += QChar (QChar::lowSurrogate(ucs4: c));
909	} else {
910	_tokenText += QChar (c);
911	}
912	continue;
913	}
914
915	if (!isIdentifierPart(ch: c))
916	break;
917
918	if (identifierWithEscapeChars) {
919	if (QChar::requiresSurrogates(ucs4: c)) {
920	_tokenText += QChar (QChar::highSurrogate(ucs4: c));
921	_tokenText += QChar (QChar::lowSurrogate(ucs4: c));
922	} else {
923	_tokenText += QChar (c);
924	}
925	}
926	scanChar();
927	}
928
929	_tokenLength = _codePtr - _tokenStartPtr - `1`;
930
931	int kind = T_IDENTIFIER;
932
933	if (!identifierWithEscapeChars)
934	kind = classify(s: _tokenStartPtr, n: _tokenLength, parseModeFlags: parseModeFlags());
935
936	if (_engine) {
937	if (kind == T_IDENTIFIER && identifierWithEscapeChars)
938	_tokenSpell = _engine->newStringRef(text: _tokenText);
939	else
940	_tokenSpell = _engine->midRef(position: _tokenStartPtr - _code.unicode(), size: _tokenLength);
941	}
942
943	return kind;
944	}
945	}
946
947	break;
948	}
949
950	return T_ERROR;
951	}
952
953	int Lexer::scanString(ScanStringMode mode)
954	{
955	QChar quote = (mode == TemplateContinuation) ? QChar (TemplateHead) : QChar (mode);
956	// we actually use T_STRING_LITERAL also for multiline strings, should we want to
957	// change that we should set it to:
958	// _state.tokenKind == T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL \|\|
959	// _state.tokenKind == T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL
960	// here and uncomment the multilineStringLiteral = true below.
961	bool multilineStringLiteral = false;
962
963	const QChar *startCode = _codePtr - `1`;
964	// in case we just parsed a \r, we need to reset this flag to get things working
965	// correctly in the loop below and afterwards
966	_skipLinefeed = false;
967	bool first = true;
968
969	if (_engine) {
970	while (_codePtr <= _endPtr) {
971	if (isLineTerminator()) {
972	if ((quote == u'`' \|\| qmlMode())) {
973	if (first)
974	--_currentLineNumber; // will be read again in scanChar()
975	break;
976	}
977	_state.errorCode = IllegalCharacter;
978	_errorMessage = QCoreApplication::translate(context: "QQmlParser",
979	key: "Stray newline in string literal");
980	return T_ERROR;
981	} else if (_state.currentChar == u`'\\'`) {
982	break;
983	} else if (_state.currentChar == u`'$'` && quote == u'`') {
984	break;
985	} else if (_state.currentChar == quote) {
986	_tokenSpell =
987	_engine->midRef(position: startCode - _code.unicode(), size: _codePtr - startCode - `1`);
988	_rawString = _tokenSpell;
989	scanChar();
990
991	if (quote == u'`')
992	_state.bracesCount = _state.outerTemplateBraceCount.pop();
993	if (mode == TemplateHead)
994	return T_NO_SUBSTITUTION_TEMPLATE;
995	else if (mode == TemplateContinuation)
996	return T_TEMPLATE_TAIL;
997	else if (multilineStringLiteral)
998	return T_MULTILINE_STRING_LITERAL;
999	else
1000	return T_STRING_LITERAL;
1001	}
1002	// don't use scanChar() here, that would transform \r sequences and the midRef() call would create the wrong result
1003	_state.currentChar = *_codePtr++;
1004	++_currentColumnNumber;
1005	first = false;
1006	}
1007	}
1008
1009	// rewind by one char, so things gets scanned correctly
1010	--_codePtr;
1011	--_currentColumnNumber;
1012
1013	_state.validTokenText = true;
1014	_tokenText = QString (startCode, _codePtr - startCode);
1015
1016	auto setRawString = [&](const QChar *end) {
1017	QString raw(startCode, end - startCode - `1`);
1018	raw.replace(before: QLatin1String ("\r\n"), after: QLatin1String ("\n"));
1019	raw.replace(before: u`'\r'`, after: u`'\n'`);
1020	_rawString = _engine->newStringRef(text: raw);
1021	};
1022
1023	scanChar();
1024
1025	while (_codePtr <= _endPtr) {
1026	if (_state.currentChar == quote) {
1027	scanChar();
1028
1029	if (_engine) {
1030	_tokenSpell = _engine->newStringRef(text: _tokenText);
1031	if (quote == u'`')
1032	setRawString (_codePtr - `1`);
1033	}
1034
1035	if (quote == u'`')
1036	_state.bracesCount = _state.outerTemplateBraceCount.pop();
1037
1038	if (mode == TemplateContinuation)
1039	return T_TEMPLATE_TAIL;
1040	else if (mode == TemplateHead)
1041	return T_NO_SUBSTITUTION_TEMPLATE;
1042
1043	return multilineStringLiteral ? T_MULTILINE_STRING_LITERAL : T_STRING_LITERAL;
1044	} else if (quote == u'`' && _state.currentChar == u`'$'` && *_codePtr == u`'{'`) {
1045	scanChar();
1046	scanChar();
1047	_state.bracesCount = `1`;
1048	if (_engine) {
1049	_tokenSpell = _engine->newStringRef(text: _tokenText);
1050	setRawString (_codePtr - `2`);
1051	}
1052
1053	return (mode == TemplateHead ? T_TEMPLATE_HEAD : T_TEMPLATE_MIDDLE);
1054	} else if (_state.currentChar == u`'\\'`) {
1055	scanChar();
1056	if (_codePtr > _endPtr) {
1057	_state.errorCode = IllegalEscapeSequence;
1058	_errorMessage = QCoreApplication::translate(
1059	context: "QQmlParser", key: "End of file reached at escape sequence");
1060	return T_ERROR;
1061	}
1062
1063	QChar u;
1064
1065	switch (_state.currentChar.unicode()) {
1066	// unicode escape sequence
1067	case u`'u'`: {
1068	bool ok = false;
1069	uint codePoint = decodeUnicodeEscapeCharacter(ok: &ok);
1070	if (!ok)
1071	return T_ERROR;
1072	if (QChar::requiresSurrogates(ucs4: codePoint)) {
1073	// need to use a surrogate pair
1074	_tokenText += QChar (QChar::highSurrogate(ucs4: codePoint));
1075	u = QChar::lowSurrogate(ucs4: codePoint);
1076	} else {
1077	u = QChar (codePoint);
1078	}
1079	} break;
1080
1081	// hex escape sequence
1082	case u`'x'`: {
1083	bool ok = false;
1084	u = decodeHexEscapeCharacter(ok: &ok);
1085	if (!ok) {
1086	_state.errorCode = IllegalHexadecimalEscapeSequence;
1087	_errorMessage = QCoreApplication::translate(
1088	context: "QQmlParser", key: "Illegal hexadecimal escape sequence");
1089	return T_ERROR;
1090	}
1091	} break;
1092
1093	// single character escape sequence
1094	case u`'\\'`: u = u`'\\'`; scanChar(); break;
1095	case u`'\''`: u = u`'\''`; scanChar(); break;
1096	case u`'\"'`: u = u`'\"'`; scanChar(); break;
1097	case u`'b'`: u = u`'\b'`; scanChar(); break;
1098	case u`'f'`: u = u`'\f'`; scanChar(); break;
1099	case u`'n'`: u = u`'\n'`; scanChar(); break;
1100	case u`'r'`: u = u`'\r'`; scanChar(); break;
1101	case u`'t'`: u = u`'\t'`; scanChar(); break;
1102	case u`'v'`: u = u`'\v'`; scanChar(); break;
1103
1104	case u`'0'`:
1105	if (!_codePtr->isDigit()) {
1106	scanChar();
1107	u = u`'\0'`;
1108	break;
1109	}
1110	Q_FALLTHROUGH();
1111	case u`'1'`:
1112	case u`'2'`:
1113	case u`'3'`:
1114	case u`'4'`:
1115	case u`'5'`:
1116	case u`'6'`:
1117	case u`'7'`:
1118	case u`'8'`:
1119	case u`'9'`:
1120	_state.errorCode = IllegalEscapeSequence;
1121	_errorMessage = QCoreApplication::translate(
1122	context: "QQmlParser", key: "Octal escape sequences are not allowed");
1123	return T_ERROR;
1124
1125	case u`'\r'`:
1126	case u`'\n'`:
1127	case `0x2028u`:
1128	case `0x2029u`:
1129	// uncomment the following to use T_MULTILINE_STRING_LITERAL
1130	// multilineStringLiteral = true;
1131	scanChar();
1132	continue;
1133
1134	default:
1135	// non escape character
1136	u = _state.currentChar;
1137	scanChar();
1138	}
1139
1140	_tokenText += u;
1141	} else {
1142	_tokenText += _state.currentChar;
1143	scanChar();
1144	}
1145	}
1146	if (_lexMode == LexMode::LineByLine && !_code.isEmpty()) {
1147	if (mode == TemplateContinuation)
1148	return T_PARTIAL_TEMPLATE_MIDDLE;
1149	else if (mode == TemplateHead)
1150	return T_PARTIAL_TEMPLATE_HEAD;
1151	else if (mode == SingleQuote)
1152	return T_PARTIAL_SINGLE_QUOTE_STRING_LITERAL;
1153	return T_PARTIAL_DOUBLE_QUOTE_STRING_LITERAL;
1154	}
1155	_state.errorCode = UnclosedStringLiteral;
1156	_errorMessage = QCoreApplication::translate(context: "QQmlParser", key: "Unclosed string at end of line");
1157	return T_ERROR;
1158	}
1159
1160	int Lexer::scanNumber(QChar ch)
1161	{
1162	auto scanOptionalNumericSeparator = [this](auto isNextCharacterValid){
1163	if (_state.currentChar == u`'_'`) {
1164	if (peekChar() == u`'_'`) {
1165	_state.errorCode = IllegalNumber;
1166	_errorMessage = QCoreApplication::translate(
1167	context: "QQmlParser",
1168	key: "There can be at most one numeric separator between digits"
1169	);
1170	return false;
1171	}
1172
1173	if (!isNextCharacterValid()) {
1174	_state.errorCode = IllegalNumber;
1175	_errorMessage = QCoreApplication::translate(
1176	context: "QQmlParser",
1177	key: "A trailing numeric separator is not allowed in numeric literals"
1178	);
1179	return false;
1180	}
1181
1182	scanChar();
1183	}
1184
1185	return true;
1186	};
1187
1188	if (ch == u`'0'`) {
1189	if (_state.currentChar == u`'x'` \|\| _state.currentChar == u`'X'`) {
1190	ch = _state.currentChar; // remember the x or X to use it in the error message below.
1191
1192	// parse hex integer literal
1193	scanChar(); // consume 'x'
1194
1195	if (!isHexDigit(c: _state.currentChar)) {
1196	_state.errorCode = IllegalNumber;
1197	_errorMessage = QCoreApplication::translate(
1198	context: "QQmlParser",
1199	key: "At least one hexadecimal digit is required after '0%1'")
1200	.arg(a: ch);
1201	return T_ERROR;
1202	}
1203
1204	double d = `0.`;
1205	while (`1`) {
1206	int digit = ::hexDigit(c: _state.currentChar);
1207	if (digit < `0`)
1208	break;
1209	d *= `16`;
1210	d += digit;
1211	scanChar();
1212
1213	if (!scanOptionalNumericSeparator([this](){ return isHexDigit(peekChar()); }))
1214	return T_ERROR;
1215	}
1216
1217	_state.tokenValue = d;
1218	return T_NUMERIC_LITERAL;
1219	} else if (_state.currentChar == u`'o'` \|\| _state.currentChar == u`'O'`) {
1220	ch = _state.currentChar; // remember the o or O to use it in the error message below.
1221
1222	// parse octal integer literal
1223	scanChar(); // consume 'o'
1224
1225	if (!isOctalDigit(c: _state.currentChar.unicode())) {
1226	_state.errorCode = IllegalNumber;
1227	_errorMessage =
1228	QCoreApplication::translate(
1229	context: "QQmlParser", key: "At least one octal digit is required after '0%1'")
1230	.arg(a: ch);
1231	return T_ERROR;
1232	}
1233
1234	double d = `0.`;
1235	while (`1`) {
1236	int digit = ::octalDigit(c: _state.currentChar);
1237	if (digit < `0`)
1238	break;
1239	d *= `8`;
1240	d += digit;
1241	scanChar();
1242
1243	if (!scanOptionalNumericSeparator ([this](){
1244	return isOctalDigit(c: peekChar().unicode());
1245	})) {
1246	return T_ERROR;
1247	}
1248	}
1249
1250	_state.tokenValue = d;
1251	return T_NUMERIC_LITERAL;
1252	} else if (_state.currentChar == u`'b'` \|\| _state.currentChar == u`'B'`) {
1253	ch = _state.currentChar; // remember the b or B to use it in the error message below.
1254
1255	// parse binary integer literal
1256	scanChar(); // consume 'b'
1257
1258	if (_state.currentChar.unicode() != u`'0'` && _state.currentChar.unicode() != u`'1'`) {
1259	_state.errorCode = IllegalNumber;
1260	_errorMessage =
1261	QCoreApplication::translate(
1262	context: "QQmlParser", key: "At least one binary digit is required after '0%1'")
1263	.arg(a: ch);
1264	return T_ERROR;
1265	}
1266
1267	double d = `0.`;
1268	while (`1`) {
1269	int digit = `0`;
1270	if (_state.currentChar.unicode() == u`'1'`)
1271	digit = `1`;
1272	else if (_state.currentChar.unicode() != u`'0'`)
1273	break;
1274	d *= `2`;
1275	d += digit;
1276	scanChar();
1277
1278	if (!scanOptionalNumericSeparator ([this](){
1279	return peekChar().unicode() == u`'0'` \|\| peekChar().unicode() == u`'1'`;
1280	})) {
1281	return T_ERROR;
1282	}
1283	}
1284
1285	_state.tokenValue = d;
1286	return T_NUMERIC_LITERAL;
1287	} else if (_state.currentChar.isDigit() && !qmlMode()) {
1288	_state.errorCode = IllegalCharacter;
1289	_errorMessage = QCoreApplication::translate(context: "QQmlParser",
1290	key: "Decimal numbers can't start with '0'");
1291	return T_ERROR;
1292	}
1293	}
1294
1295	// decimal integer literal
1296	QVarLengthArray<char,`32`> chars;
1297	chars.append(t: ch.unicode());
1298
1299	if (ch != u`'.'`) {
1300	if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1301	return T_ERROR;
1302
1303	while (_state.currentChar.isDigit()) {
1304	chars.append(t: _state.currentChar.unicode());
1305	scanChar(); // consume the digit
1306
1307	if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1308	return T_ERROR;
1309	}
1310
1311	if (_state.currentChar == u`'.'`) {
1312	chars.append(t: _state.currentChar.unicode());
1313	scanChar(); // consume `.'
1314	}
1315	}
1316
1317	while (_state.currentChar.isDigit()) {
1318	chars.append(t: _state.currentChar.unicode());
1319	scanChar();
1320
1321	if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1322	return T_ERROR;
1323	}
1324
1325	if (_state.currentChar == u`'e'` \|\| _state.currentChar == u`'E'`) {
1326	if (_codePtr[`0`].isDigit()
1327	\|\| ((_codePtr[`0`] == u`'+'` \|\| _codePtr[`0`] == u`'-'`) && _codePtr[`1`].isDigit())) {
1328
1329	chars.append(t: _state.currentChar.unicode());
1330	scanChar(); // consume `e'
1331
1332	if (_state.currentChar == u`'+'` \|\| _state.currentChar == u`'-'`) {
1333	chars.append(t: _state.currentChar.unicode());
1334	scanChar(); // consume the sign
1335	}
1336
1337	while (_state.currentChar.isDigit()) {
1338	chars.append(t: _state.currentChar.unicode());
1339	scanChar();
1340
1341	if (!scanOptionalNumericSeparator([this](){ return peekChar().isDigit(); }))
1342	return T_ERROR;
1343	}
1344	}
1345	}
1346
1347	const char *begin = chars.constData();
1348	const char end = nullptr*;
1349	bool ok = false;
1350
1351	_state.tokenValue = qstrntod(s00: begin, len: chars.size(), se: &end, ok: &ok);
1352
1353	if (end - begin != chars.size()) {
1354	_state.errorCode = IllegalExponentIndicator;
1355	_errorMessage =
1356	QCoreApplication::translate(context: "QQmlParser", key: "Illegal syntax for exponential number");
1357	return T_ERROR;
1358	}
1359
1360	return T_NUMERIC_LITERAL;
1361	}
1362
1363	int Lexer::scanVersionNumber(QChar ch)
1364	{
1365	if (ch == u`'0'`) {
1366	_state.tokenValue = `0`;
1367	return T_VERSION_NUMBER;
1368	}
1369
1370	int acc = `0`;
1371	acc += ch.digitValue();
1372
1373	while (_state.currentChar.isDigit()) {
1374	acc *= `10`;
1375	acc += _state.currentChar.digitValue();
1376	scanChar(); // consume the digit
1377	}
1378
1379	_state.tokenValue = acc;
1380	return T_VERSION_NUMBER;
1381	}
1382
1383	bool Lexer::scanRegExp(RegExpBodyPrefix prefix)
1384	{
1385	_tokenText.resize(size: `0`);
1386	_state.validTokenText = true;
1387	_state.patternFlags = `0`;
1388
1389	if (prefix == EqualPrefix)
1390	_tokenText += u`'='`;
1391
1392	while (true) {
1393	switch (_state.currentChar.unicode()) {
1394	case u`'/'`:
1395	scanChar();
1396
1397	// scan the flags
1398	_state.patternFlags = `0`;
1399	while (isIdentLetter(c: _state.currentChar)) {
1400	int flag = regExpFlagFromChar(ch: _state.currentChar);
1401	if (flag == `0` \|\| _state.patternFlags & flag) {
1402	_errorMessage = QCoreApplication::translate(
1403	context: "QQmlParser", key: "Invalid regular expression flag '%0'")
1404	.arg(a: QChar (_state.currentChar));
1405	return false;
1406	}
1407	_state.patternFlags \|= flag;
1408	scanChar();
1409	}
1410
1411	_tokenLength = _codePtr - _tokenStartPtr - `1`;
1412	return true;
1413
1414	case u`'\\'`:
1415	// regular expression backslash sequence
1416	_tokenText += _state.currentChar;
1417	scanChar();
1418
1419	if (_codePtr > _endPtr \|\| isLineTerminator()) {
1420	_errorMessage = QCoreApplication::translate(
1421	context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1422	return false;
1423	}
1424
1425	_tokenText += _state.currentChar;
1426	scanChar();
1427	break;
1428
1429	case u`'['`:
1430	// regular expression class
1431	_tokenText += _state.currentChar;
1432	scanChar();
1433
1434	while (_codePtr <= _endPtr && !isLineTerminator()) {
1435	if (_state.currentChar == u`']'`)
1436	break;
1437	else if (_state.currentChar == u`'\\'`) {
1438	// regular expression backslash sequence
1439	_tokenText += _state.currentChar;
1440	scanChar();
1441
1442	if (_codePtr > _endPtr \|\| isLineTerminator()) {
1443	_errorMessage = QCoreApplication::translate(
1444	context: "QQmlParser", key: "Unterminated regular expression backslash sequence");
1445	return false;
1446	}
1447
1448	_tokenText += _state.currentChar;
1449	scanChar();
1450	} else {
1451	_tokenText += _state.currentChar;
1452	scanChar();
1453	}
1454	}
1455
1456	if (_state.currentChar != u`']'`) {
1457	_errorMessage = QCoreApplication::translate(
1458	context: "QQmlParser", key: "Unterminated regular expression class");
1459	return false;
1460	}
1461
1462	_tokenText += _state.currentChar;
1463	scanChar(); // skip ]
1464	break;
1465
1466	default:
1467	if (_codePtr > _endPtr \|\| isLineTerminator()) {
1468	_errorMessage = QCoreApplication::translate(
1469	context: "QQmlParser", key: "Unterminated regular expression literal");
1470	return false;
1471	} else {
1472	_tokenText += _state.currentChar;
1473	scanChar();
1474	}
1475	} // switch
1476	} // while
1477
1478	return false;
1479	}
1480
1481	bool Lexer::isLineTerminator() const
1482	{
1483	const ushort unicode = _state.currentChar.unicode();
1484	return unicode == `0x000Au`
1485	\|\| unicode == `0x000Du`
1486	\|\| unicode == `0x2028u`
1487	\|\| unicode == `0x2029u`;
1488	}
1489
1490	unsigned Lexer::isLineTerminatorSequence() const
1491	{
1492	switch (_state.currentChar.unicode()) {
1493	case `0x000Au`:
1494	case `0x2028u`:
1495	case `0x2029u`:
1496	return `1`;
1497	case `0x000Du`:
1498	if (_codePtr->unicode() == `0x000Au`)
1499	return `2`;
1500	else
1501	return `1`;
1502	default:
1503	return `0`;
1504	}
1505	}
1506
1507	bool Lexer::isIdentLetter(QChar ch)
1508	{
1509	// ASCII-biased, since all reserved words are ASCII, aand hence the
1510	// bulk of content to be parsed.
1511	if ((ch >= u`'a'` && ch <= u`'z'`)
1512	\|\| (ch >= u`'A'` && ch <= u`'Z'`)
1513	\|\| ch == u`'$'` \|\| ch == u`'_'`)
1514	return true;
1515	if (ch.unicode() < `128`)
1516	return false;
1517	return ch.isLetterOrNumber();
1518	}
1519
1520	bool Lexer::isDecimalDigit(ushort c)
1521	{
1522	return (c >= u`'0'` && c <= u`'9'`);
1523	}
1524
1525	bool Lexer::isHexDigit(QChar c)
1526	{
1527	return ((c >= u`'0'` && c <= u`'9'`)
1528	\|\| (c >= u`'a'` && c <= u`'f'`)
1529	\|\| (c >= u`'A'` && c <= u`'F'`));
1530	}
1531
1532	bool Lexer::isOctalDigit(ushort c)
1533	{
1534	return (c >= u`'0'` && c <= u`'7'`);
1535	}
1536
1537	QString Lexer::tokenText() const
1538	{
1539	if (_state.validTokenText)
1540	return _tokenText;
1541
1542	if (_state.tokenKind == T_STRING_LITERAL)
1543	return QString (_tokenStartPtr + `1`, _tokenLength - `2`);
1544
1545	return QString (_tokenStartPtr, _tokenLength);
1546	}
1547
1548	Lexer::Error Lexer::errorCode() const
1549	{
1550	return _state.errorCode;
1551	}
1552
1553	QString Lexer::errorMessage() const
1554	{
1555	return _errorMessage;
1556	}
1557
1558	void Lexer::syncProhibitAutomaticSemicolon()
1559	{
1560	if (_state.parenthesesState == BalancedParentheses) {
1561	// we have seen something like "if (foo)", which means we should
1562	// never insert an automatic semicolon at this point, since it would
1563	// then be expanded into an empty statement (ECMA-262 7.9.1)
1564	_state.prohibitAutomaticSemicolon = true;
1565	_state.parenthesesState = IgnoreParentheses;
1566	} else {
1567	_state.prohibitAutomaticSemicolon = false;
1568	}
1569	}
1570
1571	bool Lexer::prevTerminator() const
1572	{
1573	return _state.terminator;
1574	}
1575
1576	bool Lexer::followsClosingBrace() const
1577	{
1578	return _state.followsClosingBrace;
1579	}
1580
1581	bool Lexer::canInsertAutomaticSemicolon(int token) const
1582	{
1583	return token == T_RBRACE \|\| token == EOF_SYMBOL \|\| _state.terminator
1584	\|\| _state.followsClosingBrace;
1585	}
1586
1587	static const int uriTokens[] = {
1588	QQmlJSGrammar::T_IDENTIFIER,
1589	QQmlJSGrammar::T_PROPERTY,
1590	QQmlJSGrammar::T_SIGNAL,
1591	QQmlJSGrammar::T_READONLY,
1592	QQmlJSGrammar::T_ON,
1593	QQmlJSGrammar::T_BREAK,
1594	QQmlJSGrammar::T_CASE,
1595	QQmlJSGrammar::T_CATCH,
1596	QQmlJSGrammar::T_CONTINUE,
1597	QQmlJSGrammar::T_DEFAULT,
1598	QQmlJSGrammar::T_DELETE,
1599	QQmlJSGrammar::T_DO,
1600	QQmlJSGrammar::T_ELSE,
1601	QQmlJSGrammar::T_FALSE,
1602	QQmlJSGrammar::T_FINALLY,
1603	QQmlJSGrammar::T_FOR,
1604	QQmlJSGrammar::T_FUNCTION,
1605	QQmlJSGrammar::T_IF,
1606	QQmlJSGrammar::T_IN,
1607	QQmlJSGrammar::T_OF,
1608	QQmlJSGrammar::T_INSTANCEOF,
1609	QQmlJSGrammar::T_NEW,
1610	QQmlJSGrammar::T_NULL,
1611	QQmlJSGrammar::T_RETURN,
1612	QQmlJSGrammar::T_SWITCH,
1613	QQmlJSGrammar::T_THIS,
1614	QQmlJSGrammar::T_THROW,
1615	QQmlJSGrammar::T_TRUE,
1616	QQmlJSGrammar::T_TRY,
1617	QQmlJSGrammar::T_TYPEOF,
1618	QQmlJSGrammar::T_VAR,
1619	QQmlJSGrammar::T_VOID,
1620	QQmlJSGrammar::T_WHILE,
1621	QQmlJSGrammar::T_CONST,
1622	QQmlJSGrammar::T_DEBUGGER,
1623	QQmlJSGrammar::T_RESERVED_WORD,
1624	QQmlJSGrammar::T_WITH,
1625
1626	QQmlJSGrammar::EOF_SYMBOL
1627	};
1628	static inline bool isUriToken(int token)
1629	{
1630	const int *current = uriTokens;
1631	while (*current != QQmlJSGrammar::EOF_SYMBOL) {
1632	if (*current == token)
1633	return true;
1634	++current;
1635	}
1636	return false;
1637	}
1638
1639	bool Lexer::scanDirectives(Directives directives, DiagnosticMessage error)
1640	{
1641	auto setError = [error, this](QString message) {
1642	error->message = std::move(message);
1643	error->loc.startLine = tokenStartLine();
1644	error->loc.startColumn = tokenStartColumn();
1645	};
1646
1647	QScopedValueRollback<bool> directivesGuard(_state.handlingDirectives, true);
1648	Q_ASSERT(!_qmlMode);
1649
1650	lex(); // fetch the first token
1651
1652	if (_state.tokenKind != T_DOT)
1653	return true;
1654
1655	do {
1656	const int lineNumber = tokenStartLine();
1657	const int column = tokenStartColumn();
1658
1659	lex(); // skip T_DOT
1660
1661	if (!(_state.tokenKind == T_IDENTIFIER \|\| _state.tokenKind == T_IMPORT))
1662	return true; // expected a valid QML/JS directive
1663
1664	const QString directiveName = tokenText();
1665
1666	if (! (directiveName == QLatin1String ("pragma") \|\|
1667	directiveName == QLatin1String ("import"))) {
1668	setError (QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1669	return false; // not a valid directive name
1670	}
1671
1672	// it must be a pragma or an import directive.
1673	if (directiveName == QLatin1String ("pragma")) {
1674	// .pragma library
1675	if (! (lex() == T_IDENTIFIER && tokenText() == QLatin1String("library"))) {
1676	setError (QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1677	return false; // expected `library
1678	}
1679
1680	// we found a .pragma library directive
1681	directives->pragmaLibrary();
1682
1683	} else {
1684	Q_ASSERT(directiveName == QLatin1String ("import"));
1685	lex(); // skip .import
1686
1687	QString pathOrUri;
1688	QString version;
1689	bool fileImport = false; // file or uri import
1690
1691	if (_state.tokenKind == T_STRING_LITERAL) {
1692	// .import T_STRING_LITERAL as T_IDENTIFIER
1693
1694	fileImport = true;
1695	pathOrUri = tokenText();
1696
1697	if (!pathOrUri.endsWith(s: QLatin1String ("js"))) {
1698	setError (QCoreApplication::translate(context: "QQmlParser",key: "Imported file must be a script"));
1699	return false;
1700	}
1701	lex();
1702
1703	} else if (_state.tokenKind == T_IDENTIFIER) {
1704	// .import T_IDENTIFIER (. T_IDENTIFIER) (T_VERSION_NUMBER (. T_VERSION_NUMBER)?)? as T_IDENTIFIER*
1705	while (true) {
1706	if (!isUriToken(token: _state.tokenKind)) {
1707	setError (QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1708	return false;
1709	}
1710
1711	pathOrUri.append(s: tokenText());
1712
1713	lex();
1714	if (tokenStartLine() != lineNumber) {
1715	setError (QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1716	return false;
1717	}
1718	if (_state.tokenKind != QQmlJSGrammar::T_DOT)
1719	break;
1720
1721	pathOrUri.append(c: u`'.'`);
1722
1723	lex();
1724	if (tokenStartLine() != lineNumber) {
1725	setError (QCoreApplication::translate(context: "QQmlParser",key: "Invalid module URI"));
1726	return false;
1727	}
1728	}
1729
1730	if (_state.tokenKind == T_VERSION_NUMBER) {
1731	version = tokenText();
1732	lex();
1733	if (_state.tokenKind == T_DOT) {
1734	version += u`'.'`;
1735	lex();
1736	if (_state.tokenKind != T_VERSION_NUMBER) {
1737	setError (QCoreApplication::translate(
1738	context: "QQmlParser", key: "Incomplete version number (dot but no minor)"));
1739	return false; // expected the module version number
1740	}
1741	version += tokenText();
1742	lex();
1743	}
1744	}
1745	}
1746
1747	//
1748	// recognize the mandatory `as' followed by the module name
1749	//
1750	if (!(_state.tokenKind == T_AS && tokenStartLine() == lineNumber)) {
1751	if (fileImport)
1752	setError (QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1753	else
1754	setError (QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1755	if (tokenStartLine() != lineNumber) {
1756	error->loc.startLine = lineNumber;
1757	error->loc.startColumn = column;
1758	}
1759	return false; // expected `as'
1760	}
1761
1762	if (lex() != T_IDENTIFIER \|\| tokenStartLine() != lineNumber) {
1763	if (fileImport)
1764	setError (QCoreApplication::translate(context: "QQmlParser", key: "File import requires a qualifier"));
1765	else
1766	setError (QCoreApplication::translate(context: "QQmlParser", key: "Module import requires a qualifier"));
1767	return false; // expected module name
1768	}
1769
1770	const QString module = tokenText();
1771	if (!module.at(i: `0`).isUpper()) {
1772	setError (QCoreApplication::translate(context: "QQmlParser",key: "Invalid import qualifier"));
1773	return false;
1774	}
1775
1776	if (fileImport)
1777	directives->importFile(jsfile: pathOrUri, module, line: lineNumber, column);
1778	else
1779	directives->importModule(uri: pathOrUri, version, module, line: lineNumber, column);
1780	}
1781
1782	if (tokenStartLine() != lineNumber) {
1783	setError (QCoreApplication::translate(context: "QQmlParser", key: "Syntax error"));
1784	return false; // the directives cannot span over multiple lines
1785	}
1786
1787	// fetch the first token after the .pragma/.import directive
1788	lex();
1789	} while (_state.tokenKind == T_DOT);
1790
1791	return true;
1792	}
1793
1794	const Lexer::State &Lexer::state() const
1795	{
1796	return _state;
1797	}
1798	void Lexer::setState(const Lexer::State &state)
1799	{
1800	_state = state;
1801	}
1802
1803	int Lexer::parseModeFlags() const {
1804	int flags = `0`;
1805	if (qmlMode())
1806	flags \|= QmlMode\|StaticIsKeyword;
1807	if (yieldIsKeyWord())
1808	flags \|= YieldIsKeyword;
1809	if (_staticIsKeyword)
1810	flags \|= StaticIsKeyword;
1811	return flags;
1812	}
1813
1814	namespace QQmlJS {
1815	QDebug operator<<(QDebug dbg, const Lexer::State &s)
1816	{
1817	dbg << "{\n"
1818	<< " errorCode:" << int(s.errorCode) << ",\n"
1819	<< " currentChar:" << s.currentChar << ",\n"
1820	<< " tokenValue:" << s.tokenValue << ",\n"
1821	<< " parenthesesState:" << s.parenthesesState << ",\n"
1822	<< " parenthesesCount:" << s.parenthesesCount << ",\n"
1823	<< " outerTemplateBraceCount:" << s.outerTemplateBraceCount << ",\n"
1824	<< " bracesCount:" << s.bracesCount << ",\n"
1825	<< " stackToken:" << s.stackToken << ",\n"
1826	<< " patternFlags:" << s.patternFlags << ",\n"
1827	<< " tokenKind:" << s.tokenKind << ",\n"
1828	<< " importState:" << int(s.importState) << ",\n"
1829	<< " validTokenText:" << s.validTokenText << ",\n"
1830	<< " prohibitAutomaticSemicolon:" << s.prohibitAutomaticSemicolon << ",\n"
1831	<< " restrictedKeyword:" << s.restrictedKeyword << ",\n"
1832	<< " terminator:" << s.terminator << ",\n"
1833	<< " followsClosingBrace:" << s.followsClosingBrace << ",\n"
1834	<< " delimited:" << s.delimited << ",\n"
1835	<< " handlingDirectives:" << s.handlingDirectives << ",\n"
1836	<< " generatorLevel:" << s.generatorLevel << "\n}";
1837	return dbg;
1838	}
1839	}
1840
1841	QT_END_NAMESPACE
1842

Provided by KDAB

Definitions

source code of qtdeclarative/src/qml/parser/qqmljslexer.cpp