| 1 | // Copyright (C) 2016 The Qt Company Ltd. |
| 2 | // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only |
| 3 | |
| 4 | #ifndef QQMLJSLEXER_P_H |
| 5 | #define QQMLJSLEXER_P_H |
| 6 | |
| 7 | // |
| 8 | // W A R N I N G |
| 9 | // ------------- |
| 10 | // |
| 11 | // This file is not part of the Qt API. It exists purely as an |
| 12 | // implementation detail. This header file may change from version to |
| 13 | // version without notice, or even be removed. |
| 14 | // |
| 15 | // We mean it. |
| 16 | // |
| 17 | |
| 18 | #include <private/qqmljsglobal_p.h> |
| 19 | #include <private/qqmljsgrammar_p.h> |
| 20 | |
| 21 | #include <QtCore/qstring.h> |
| 22 | #include <QtCore/qstack.h> |
| 23 | |
| 24 | QT_BEGIN_NAMESPACE |
| 25 | |
| 26 | class QDebug; |
| 27 | |
| 28 | namespace QQmlJS { |
| 29 | |
| 30 | class Engine; |
| 31 | struct DiagnosticMessage; |
| 32 | class Directives; |
| 33 | |
| 34 | class QML_PARSER_EXPORT Lexer: public QQmlJSGrammar |
| 35 | { |
| 36 | public: |
| 37 | enum { |
| 38 | T_ABSTRACT = T_RESERVED_WORD, |
| 39 | T_BOOLEAN = T_RESERVED_WORD, |
| 40 | T_BYTE = T_RESERVED_WORD, |
| 41 | T_CHAR = T_RESERVED_WORD, |
| 42 | T_DOUBLE = T_RESERVED_WORD, |
| 43 | T_FLOAT = T_RESERVED_WORD, |
| 44 | T_GOTO = T_RESERVED_WORD, |
| 45 | T_IMPLEMENTS = T_RESERVED_WORD, |
| 46 | T_INT = T_RESERVED_WORD, |
| 47 | T_INTERFACE = T_RESERVED_WORD, |
| 48 | T_LONG = T_RESERVED_WORD, |
| 49 | T_NATIVE = T_RESERVED_WORD, |
| 50 | T_PACKAGE = T_RESERVED_WORD, |
| 51 | T_PRIVATE = T_RESERVED_WORD, |
| 52 | T_PROTECTED = T_RESERVED_WORD, |
| 53 | T_SHORT = T_RESERVED_WORD, |
| 54 | T_SYNCHRONIZED = T_RESERVED_WORD, |
| 55 | T_THROWS = T_RESERVED_WORD, |
| 56 | T_TRANSIENT = T_RESERVED_WORD, |
| 57 | T_VOLATILE = T_RESERVED_WORD |
| 58 | }; |
| 59 | |
| 60 | enum Error { |
| 61 | NoError, |
| 62 | IllegalCharacter, |
| 63 | IllegalNumber, |
| 64 | UnclosedStringLiteral, |
| 65 | IllegalEscapeSequence, |
| 66 | IllegalUnicodeEscapeSequence, |
| 67 | , |
| 68 | IllegalExponentIndicator, |
| 69 | IllegalIdentifier, |
| 70 | IllegalHexadecimalEscapeSequence |
| 71 | }; |
| 72 | |
| 73 | enum RegExpBodyPrefix { |
| 74 | NoPrefix, |
| 75 | EqualPrefix |
| 76 | }; |
| 77 | |
| 78 | enum RegExpFlag { |
| 79 | RegExp_Global = 0x01, |
| 80 | RegExp_IgnoreCase = 0x02, |
| 81 | RegExp_Multiline = 0x04, |
| 82 | RegExp_Unicode = 0x08, |
| 83 | RegExp_Sticky = 0x10 |
| 84 | }; |
| 85 | |
| 86 | enum ParseModeFlags { |
| 87 | QmlMode = 0x1, |
| 88 | YieldIsKeyword = 0x2, |
| 89 | StaticIsKeyword = 0x4 |
| 90 | }; |
| 91 | |
| 92 | enum class ImportState { |
| 93 | SawImport, |
| 94 | NoQmlImport |
| 95 | }; |
| 96 | |
| 97 | enum class LexMode { WholeCode, LineByLine }; |
| 98 | |
| 99 | enum class CodeContinuation { Reset, Continue }; |
| 100 | |
| 101 | public: |
| 102 | Lexer(Engine *engine, LexMode lexMode = LexMode::WholeCode); |
| 103 | |
| 104 | bool qmlMode() const; |
| 105 | bool yieldIsKeyWord() const { return _state.generatorLevel != 0; } |
| 106 | void setStaticIsKeyword(bool b) { _staticIsKeyword = b; } |
| 107 | |
| 108 | QString code() const; |
| 109 | void setCode(const QString &code, int lineno, bool qmlMode = true, |
| 110 | CodeContinuation codeContinuation = CodeContinuation::Reset); |
| 111 | |
| 112 | int lex(); |
| 113 | |
| 114 | bool scanRegExp(RegExpBodyPrefix prefix = NoPrefix); |
| 115 | bool scanDirectives(Directives *directives, DiagnosticMessage *error); |
| 116 | |
| 117 | int regExpFlags() const { return _state.patternFlags; } |
| 118 | QString regExpPattern() const { return _tokenText; } |
| 119 | |
| 120 | int tokenKind() const { return _state.tokenKind; } |
| 121 | int tokenOffset() const { return _currentOffset + _tokenStartPtr - _code.unicode(); } |
| 122 | int tokenLength() const { return _tokenLength; } |
| 123 | |
| 124 | int tokenStartLine() const { return _tokenLine; } |
| 125 | int tokenStartColumn() const { return _tokenColumn; } |
| 126 | |
| 127 | inline QStringView tokenSpell() const { return _tokenSpell; } |
| 128 | inline QStringView rawString() const { return _rawString; } |
| 129 | double tokenValue() const { return _state.tokenValue; } |
| 130 | QString tokenText() const; |
| 131 | |
| 132 | Error errorCode() const; |
| 133 | QString errorMessage() const; |
| 134 | |
| 135 | std::optional<DiagnosticMessage> illegalFileLengthError() const; |
| 136 | |
| 137 | bool canInsertAutomaticSemicolon(int token) const; |
| 138 | |
| 139 | enum ParenthesesState { |
| 140 | IgnoreParentheses, |
| 141 | CountParentheses, |
| 142 | BalancedParentheses |
| 143 | }; |
| 144 | |
| 145 | enum class { , , }; |
| 146 | |
| 147 | void enterGeneratorBody() { ++_state.generatorLevel; } |
| 148 | void leaveGeneratorBody() { --_state.generatorLevel; } |
| 149 | |
| 150 | struct State |
| 151 | { |
| 152 | Error errorCode = NoError; |
| 153 | |
| 154 | QChar currentChar = u'\n'; |
| 155 | double tokenValue = 0; |
| 156 | |
| 157 | // parentheses state |
| 158 | ParenthesesState parenthesesState = IgnoreParentheses; |
| 159 | int parenthesesCount = 0; |
| 160 | |
| 161 | // template string stack |
| 162 | QStack<int> outerTemplateBraceCount; |
| 163 | int bracesCount = -1; |
| 164 | |
| 165 | int stackToken = -1; |
| 166 | |
| 167 | int patternFlags = 0; |
| 168 | int tokenKind = 0; |
| 169 | ImportState importState = ImportState::NoQmlImport; |
| 170 | |
| 171 | bool validTokenText = false; |
| 172 | bool prohibitAutomaticSemicolon = false; |
| 173 | bool restrictedKeyword = false; |
| 174 | bool terminator = false; |
| 175 | bool followsClosingBrace = false; |
| 176 | bool delimited = true; |
| 177 | bool handlingDirectives = false; |
| 178 | CommentState = CommentState::NoComment; |
| 179 | int generatorLevel = 0; |
| 180 | |
| 181 | friend bool operator==(State const &s1, State const &s2) |
| 182 | { |
| 183 | if (s1.errorCode != s2.errorCode) |
| 184 | return false; |
| 185 | if (s1.currentChar != s2.currentChar) |
| 186 | return false; |
| 187 | if (s1.tokenValue != s2.tokenValue) |
| 188 | return false; |
| 189 | if (s1.parenthesesState != s2.parenthesesState) |
| 190 | return false; |
| 191 | if (s1.parenthesesCount != s2.parenthesesCount) |
| 192 | return false; |
| 193 | if (s1.outerTemplateBraceCount != s2.outerTemplateBraceCount) |
| 194 | return false; |
| 195 | if (s1.bracesCount != s2.bracesCount) |
| 196 | return false; |
| 197 | if (s1.stackToken != s2.stackToken) |
| 198 | return false; |
| 199 | if (s1.patternFlags != s2.patternFlags) |
| 200 | return false; |
| 201 | if (s1.tokenKind != s2.tokenKind) |
| 202 | return false; |
| 203 | if (s1.importState != s2.importState) |
| 204 | return false; |
| 205 | if (s1.validTokenText != s2.validTokenText) |
| 206 | return false; |
| 207 | if (s1.prohibitAutomaticSemicolon != s2.prohibitAutomaticSemicolon) |
| 208 | return false; |
| 209 | if (s1.restrictedKeyword != s2.restrictedKeyword) |
| 210 | return false; |
| 211 | if (s1.terminator != s2.terminator) |
| 212 | return false; |
| 213 | if (s1.followsClosingBrace != s2.followsClosingBrace) |
| 214 | return false; |
| 215 | if (s1.delimited != s2.delimited) |
| 216 | return false; |
| 217 | if (s1.handlingDirectives != s2.handlingDirectives) |
| 218 | return false; |
| 219 | if (s1.generatorLevel != s2.generatorLevel) |
| 220 | return false; |
| 221 | return true; |
| 222 | } |
| 223 | |
| 224 | friend bool operator!=(State const &s1, State const &s2) { return !(s1 == s2); } |
| 225 | |
| 226 | friend QML_PARSER_EXPORT QDebug operator<<(QDebug dbg, State const &s); |
| 227 | }; |
| 228 | |
| 229 | const State &state() const; |
| 230 | void setState(const State &state); |
| 231 | |
| 232 | protected: |
| 233 | static int classify(QStringView s, int parseModeFlags); |
| 234 | |
| 235 | private: |
| 236 | int parseModeFlags() const; |
| 237 | bool prevTerminator() const; |
| 238 | bool followsClosingBrace() const; |
| 239 | inline void scanChar(); |
| 240 | inline QChar peekChar(); |
| 241 | int scanToken(); |
| 242 | int scanNumber(QChar ch); |
| 243 | int scanVersionNumber(QChar ch); |
| 244 | enum ScanStringMode : char16_t { |
| 245 | SingleQuote = '\'', |
| 246 | DoubleQuote = '"', |
| 247 | TemplateHead = '`', |
| 248 | TemplateContinuation = 0 |
| 249 | }; |
| 250 | int scanString(ScanStringMode mode); |
| 251 | |
| 252 | bool isLineTerminator() const; |
| 253 | unsigned isLineTerminatorSequence() const; |
| 254 | static bool isIdentLetter(QChar c); |
| 255 | static bool isDecimalDigit(ushort c); |
| 256 | static bool isHexDigit(QChar c); |
| 257 | static bool isOctalDigit(ushort c); |
| 258 | |
| 259 | void syncProhibitAutomaticSemicolon(); |
| 260 | uint decodeUnicodeEscapeCharacter(bool *ok); |
| 261 | QChar decodeHexEscapeCharacter(bool *ok); |
| 262 | |
| 263 | friend QML_PARSER_EXPORT QDebug operator<<(QDebug dbg, const Lexer &l); |
| 264 | |
| 265 | private: |
| 266 | Engine *_engine; |
| 267 | |
| 268 | LexMode _lexMode = LexMode::WholeCode; |
| 269 | QString _code; |
| 270 | const QChar *_endPtr; |
| 271 | bool _qmlMode; |
| 272 | bool _staticIsKeyword = false; |
| 273 | |
| 274 | bool _skipLinefeed = false; |
| 275 | |
| 276 | int _currentLineNumber = 0; |
| 277 | int _currentColumnNumber = 0; |
| 278 | int _currentOffset = 0; |
| 279 | |
| 280 | int _tokenLength = 0; |
| 281 | int _tokenLine = 0; |
| 282 | int _tokenColumn = 0; |
| 283 | |
| 284 | QString _tokenText; |
| 285 | QString _errorMessage; |
| 286 | QStringView _tokenSpell; |
| 287 | QStringView _rawString; |
| 288 | |
| 289 | const QChar *_codePtr = nullptr; |
| 290 | const QChar *_tokenStartPtr = nullptr; |
| 291 | |
| 292 | State _state; |
| 293 | }; |
| 294 | |
| 295 | } // end of namespace QQmlJS |
| 296 | |
| 297 | QT_END_NAMESPACE |
| 298 | |
| 299 | #endif // LEXER_H |
| 300 | |