| 1 | /* | 
| 2 |  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | 
| 3 |  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. | 
| 4 |  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) | 
| 5 |  * | 
| 6 |  *  This library is free software; you can redistribute it and/or | 
| 7 |  *  modify it under the terms of the GNU Library General Public | 
| 8 |  *  License as published by the Free Software Foundation; either | 
| 9 |  *  version 2 of the License, or (at your option) any later version. | 
| 10 |  * | 
| 11 |  *  This library is distributed in the hope that it will be useful, | 
| 12 |  *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 13 |  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
| 14 |  *  Library General Public License for more details. | 
| 15 |  * | 
| 16 |  *  You should have received a copy of the GNU Library General Public License | 
| 17 |  *  along with this library; see the file COPYING.LIB.  If not, write to | 
| 18 |  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | 
| 19 |  *  Boston, MA 02110-1301, USA. | 
| 20 |  * | 
| 21 |  */ | 
| 22 |  | 
| 23 | #include "config.h" | 
| 24 | #include "Lexer.h" | 
| 25 |  | 
| 26 | #include "JSFunction.h" | 
| 27 | #include "JSGlobalObjectFunctions.h" | 
| 28 | #include "NodeInfo.h" | 
| 29 | #include "Nodes.h" | 
| 30 | #include "dtoa.h" | 
| 31 | #include <ctype.h> | 
| 32 | #include <limits.h> | 
| 33 | #include <string.h> | 
| 34 | #include <wtf/Assertions.h> | 
| 35 |  | 
| 36 | using namespace WTF; | 
| 37 | using namespace Unicode; | 
| 38 |  | 
| 39 | // We can't specify the namespace in yacc's C output, so do it here instead. | 
| 40 | using namespace JSC; | 
| 41 |  | 
| 42 | #include "Grammar.h" | 
| 43 | #include "Lookup.h" | 
| 44 | #include "Lexer.lut.h" | 
| 45 |  | 
| 46 | namespace JSC { | 
| 47 |  | 
| 48 | static const UChar byteOrderMark = 0xFEFF; | 
| 49 |  | 
| 50 | Lexer::Lexer(JSGlobalData* globalData) | 
| 51 |     : m_isReparsing(false) | 
| 52 |     , m_globalData(globalData) | 
| 53 |     , m_keywordTable(JSC::mainTable) | 
| 54 | { | 
| 55 |     m_buffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); | 
| 56 |     m_buffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); | 
| 57 | } | 
| 58 |  | 
| 59 | Lexer::~Lexer() | 
| 60 | { | 
| 61 |     m_keywordTable.deleteTable(); | 
| 62 | } | 
| 63 |  | 
| 64 | inline const UChar* Lexer::currentCharacter() const | 
| 65 | { | 
| 66 |     return m_code - 4; | 
| 67 | } | 
| 68 |  | 
| 69 | inline int Lexer::currentOffset() const | 
| 70 | { | 
| 71 |     return currentCharacter() - m_codeStart; | 
| 72 | } | 
| 73 |  | 
| 74 | ALWAYS_INLINE void Lexer::shift1() | 
| 75 | { | 
| 76 |     m_current = m_next1; | 
| 77 |     m_next1 = m_next2; | 
| 78 |     m_next2 = m_next3; | 
| 79 |     if (LIKELY(m_code < m_codeEnd)) | 
| 80 |         m_next3 = m_code[0]; | 
| 81 |     else | 
| 82 |         m_next3 = -1; | 
| 83 |  | 
| 84 |     ++m_code; | 
| 85 | } | 
| 86 |  | 
| 87 | ALWAYS_INLINE void Lexer::shift2() | 
| 88 | { | 
| 89 |     m_current = m_next2; | 
| 90 |     m_next1 = m_next3; | 
| 91 |     if (LIKELY(m_code + 1 < m_codeEnd)) { | 
| 92 |         m_next2 = m_code[0]; | 
| 93 |         m_next3 = m_code[1]; | 
| 94 |     } else { | 
| 95 |         m_next2 = m_code < m_codeEnd ? m_code[0] : -1; | 
| 96 |         m_next3 = -1; | 
| 97 |     } | 
| 98 |  | 
| 99 |     m_code += 2; | 
| 100 | } | 
| 101 |  | 
| 102 | ALWAYS_INLINE void Lexer::shift3() | 
| 103 | { | 
| 104 |     m_current = m_next3; | 
| 105 |     if (LIKELY(m_code + 2 < m_codeEnd)) { | 
| 106 |         m_next1 = m_code[0]; | 
| 107 |         m_next2 = m_code[1]; | 
| 108 |         m_next3 = m_code[2]; | 
| 109 |     } else { | 
| 110 |         m_next1 = m_code < m_codeEnd ? m_code[0] : -1; | 
| 111 |         m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1; | 
| 112 |         m_next3 = -1; | 
| 113 |     } | 
| 114 |  | 
| 115 |     m_code += 3; | 
| 116 | } | 
| 117 |  | 
| 118 | ALWAYS_INLINE void Lexer::shift4() | 
| 119 | { | 
| 120 |     if (LIKELY(m_code + 3 < m_codeEnd)) { | 
| 121 |         m_current = m_code[0]; | 
| 122 |         m_next1 = m_code[1]; | 
| 123 |         m_next2 = m_code[2]; | 
| 124 |         m_next3 = m_code[3]; | 
| 125 |     } else { | 
| 126 |         m_current = m_code < m_codeEnd ? m_code[0] : -1; | 
| 127 |         m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1; | 
| 128 |         m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1; | 
| 129 |         m_next3 = -1; | 
| 130 |     } | 
| 131 |  | 
| 132 |     m_code += 4; | 
| 133 | } | 
| 134 |  | 
| 135 | void Lexer::setCode(const SourceCode& source, ParserArena& arena) | 
| 136 | { | 
| 137 |     m_arena = &arena.identifierArena(); | 
| 138 |  | 
| 139 |     m_lineNumber = source.firstLine(); | 
| 140 |     m_delimited = false; | 
| 141 |     m_lastToken = -1; | 
| 142 |  | 
| 143 |     const UChar* data = source.provider()->data(); | 
| 144 |  | 
| 145 |     m_source = &source; | 
| 146 |     m_codeStart = data; | 
| 147 |     m_code = data + source.startOffset(); | 
| 148 |     m_codeEnd = data + source.endOffset(); | 
| 149 |     m_error = false; | 
| 150 |     m_atLineStart = true; | 
| 151 |  | 
| 152 |     // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters. | 
| 153 |     // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details. | 
| 154 |     if (source.provider()->hasBOMs()) { | 
| 155 |         for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) { | 
| 156 |             if (UNLIKELY(*p == byteOrderMark)) { | 
| 157 |                 copyCodeWithoutBOMs(); | 
| 158 |                 break; | 
| 159 |             } | 
| 160 |         } | 
| 161 |     } | 
| 162 |  | 
| 163 |     // Read the first characters into the 4-character buffer. | 
| 164 |     shift4(); | 
| 165 |     ASSERT(currentOffset() == source.startOffset()); | 
| 166 | } | 
| 167 |  | 
| 168 | void Lexer::copyCodeWithoutBOMs() | 
| 169 | { | 
| 170 |     // Note: In this case, the character offset data for debugging will be incorrect. | 
| 171 |     // If it's important to correctly debug code with extraneous BOMs, then the caller | 
| 172 |     // should strip the BOMs when creating the SourceProvider object and do its own | 
| 173 |     // mapping of offsets within the stripped text to original text offset. | 
| 174 |  | 
| 175 |     m_codeWithoutBOMs.reserveCapacity(newCapacity: m_codeEnd - m_code); | 
| 176 |     for (const UChar* p = m_code; p < m_codeEnd; ++p) { | 
| 177 |         UChar c = *p; | 
| 178 |         if (c != byteOrderMark) | 
| 179 |             m_codeWithoutBOMs.append(val: c); | 
| 180 |     } | 
| 181 |     ptrdiff_t startDelta = m_codeStart - m_code; | 
| 182 |     m_code = m_codeWithoutBOMs.data(); | 
| 183 |     m_codeStart = m_code + startDelta; | 
| 184 |     m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size(); | 
| 185 | } | 
| 186 |  | 
| 187 | void Lexer::shiftLineTerminator() | 
| 188 | { | 
| 189 |     ASSERT(isLineTerminator(m_current)); | 
| 190 |  | 
| 191 |     // Allow both CRLF and LFCR. | 
| 192 |     if (m_current + m_next1 == '\n' + '\r') | 
| 193 |         shift2(); | 
| 194 |     else | 
| 195 |         shift1(); | 
| 196 |  | 
| 197 |     ++m_lineNumber; | 
| 198 | } | 
| 199 |  | 
| 200 | ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length) | 
| 201 | { | 
| 202 |     return &m_arena->makeIdentifier(globalData: m_globalData, characters, length); | 
| 203 | } | 
| 204 |  | 
| 205 | inline bool Lexer::lastTokenWasRestrKeyword() const | 
| 206 | { | 
| 207 |     return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; | 
| 208 | } | 
| 209 |  | 
| 210 | static NEVER_INLINE bool isNonASCIIIdentStart(int c) | 
| 211 | { | 
| 212 |     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); | 
| 213 | } | 
| 214 |  | 
| 215 | static inline bool isIdentStart(int c) | 
| 216 | { | 
| 217 |     return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); | 
| 218 | } | 
| 219 |  | 
| 220 | static NEVER_INLINE bool isNonASCIIIdentPart(int c) | 
| 221 | { | 
| 222 |     return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | 
| 223 |         | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); | 
| 224 | } | 
| 225 |  | 
| 226 | static inline bool isIdentPart(int c) | 
| 227 | { | 
| 228 |     return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); | 
| 229 | } | 
| 230 |  | 
| 231 | static inline int singleEscape(int c) | 
| 232 | { | 
| 233 |     switch (c) { | 
| 234 |         case 'b': | 
| 235 |             return 0x08; | 
| 236 |         case 't': | 
| 237 |             return 0x09; | 
| 238 |         case 'n': | 
| 239 |             return 0x0A; | 
| 240 |         case 'v': | 
| 241 |             return 0x0B; | 
| 242 |         case 'f': | 
| 243 |             return 0x0C; | 
| 244 |         case 'r': | 
| 245 |             return 0x0D; | 
| 246 |         default: | 
| 247 |             return c; | 
| 248 |     } | 
| 249 | } | 
| 250 |  | 
| 251 | inline void Lexer::record8(int c) | 
| 252 | { | 
| 253 |     ASSERT(c >= 0); | 
| 254 |     ASSERT(c <= 0xFF); | 
| 255 |     m_buffer8.append(val: static_cast<char>(c)); | 
| 256 | } | 
| 257 |  | 
| 258 | inline void Lexer::record16(UChar c) | 
| 259 | { | 
| 260 |     m_buffer16.append(val: c); | 
| 261 | } | 
| 262 |  | 
| 263 | inline void Lexer::record16(int c) | 
| 264 | { | 
| 265 |     ASSERT(c >= 0); | 
| 266 |     ASSERT(c <= USHRT_MAX); | 
| 267 |     record16(c: UChar(static_cast<unsigned short>(c))); | 
| 268 | } | 
| 269 |  | 
| 270 | int Lexer::lex(void* p1, void* p2) | 
| 271 | { | 
| 272 |     ASSERT(!m_error); | 
| 273 |     ASSERT(m_buffer8.isEmpty()); | 
| 274 |     ASSERT(m_buffer16.isEmpty()); | 
| 275 |  | 
| 276 |     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); | 
| 277 |     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); | 
| 278 |     int token = 0; | 
| 279 |     m_terminator = false; | 
| 280 |  | 
| 281 | start: | 
| 282 |     while (isWhiteSpace(ch: m_current)) | 
| 283 |         shift1(); | 
| 284 |  | 
| 285 |     int startOffset = currentOffset(); | 
| 286 |  | 
| 287 |     if (m_current == -1) { | 
| 288 | #ifndef QT_BUILD_SCRIPT_LIB /* the parser takes cate about automatic semicolon. | 
| 289 |                               this might add incorrect semicolons */ | 
| 290 |         //m_delimited and m_isReparsing are now useless | 
| 291 |         if (!m_terminator && !m_delimited && !m_isReparsing) { | 
| 292 |             // automatic semicolon insertion if program incomplete | 
| 293 |             token = ';'; | 
| 294 |             goto doneSemicolon; | 
| 295 |         } | 
| 296 | #endif | 
| 297 |         return 0; | 
| 298 |     } | 
| 299 |  | 
| 300 |     m_delimited = false; | 
| 301 |     switch (m_current) { | 
| 302 |         case '>': | 
| 303 |             if (m_next1 == '>' && m_next2 == '>') { | 
| 304 |                 if (m_next3 == '=') { | 
| 305 |                     shift4(); | 
| 306 |                     token = URSHIFTEQUAL; | 
| 307 |                     break; | 
| 308 |                 } | 
| 309 |                 shift3(); | 
| 310 |                 token = URSHIFT; | 
| 311 |                 break; | 
| 312 |             } | 
| 313 |             if (m_next1 == '>') { | 
| 314 |                 if (m_next2 == '=') { | 
| 315 |                     shift3(); | 
| 316 |                     token = RSHIFTEQUAL; | 
| 317 |                     break; | 
| 318 |                 } | 
| 319 |                 shift2(); | 
| 320 |                 token = RSHIFT; | 
| 321 |                 break; | 
| 322 |             } | 
| 323 |             if (m_next1 == '=') { | 
| 324 |                 shift2(); | 
| 325 |                 token = GE; | 
| 326 |                 break; | 
| 327 |             } | 
| 328 |             shift1(); | 
| 329 |             token = '>'; | 
| 330 |             break; | 
| 331 |         case '=': | 
| 332 |             if (m_next1 == '=') { | 
| 333 |                 if (m_next2 == '=') { | 
| 334 |                     shift3(); | 
| 335 |                     token = STREQ; | 
| 336 |                     break; | 
| 337 |                 } | 
| 338 |                 shift2(); | 
| 339 |                 token = EQEQ; | 
| 340 |                 break; | 
| 341 |             } | 
| 342 |             shift1(); | 
| 343 |             token = '='; | 
| 344 |             break; | 
| 345 |         case '!': | 
| 346 |             if (m_next1 == '=') { | 
| 347 |                 if (m_next2 == '=') { | 
| 348 |                     shift3(); | 
| 349 |                     token = STRNEQ; | 
| 350 |                     break; | 
| 351 |                 } | 
| 352 |                 shift2(); | 
| 353 |                 token = NE; | 
| 354 |                 break; | 
| 355 |             } | 
| 356 |             shift1(); | 
| 357 |             token = '!'; | 
| 358 |             break; | 
| 359 |         case '<': | 
| 360 |             if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { | 
| 361 |                 // <!-- marks the beginning of a line comment (for www usage) | 
| 362 |                 shift4(); | 
| 363 |                 goto inSingleLineComment; | 
| 364 |             } | 
| 365 |             if (m_next1 == '<') { | 
| 366 |                 if (m_next2 == '=') { | 
| 367 |                     shift3(); | 
| 368 |                     token = LSHIFTEQUAL; | 
| 369 |                     break; | 
| 370 |                 } | 
| 371 |                 shift2(); | 
| 372 |                 token = LSHIFT; | 
| 373 |                 break; | 
| 374 |             } | 
| 375 |             if (m_next1 == '=') { | 
| 376 |                 shift2(); | 
| 377 |                 token = LE; | 
| 378 |                 break; | 
| 379 |             } | 
| 380 |             shift1(); | 
| 381 |             token = '<'; | 
| 382 |             break; | 
| 383 |         case '+': | 
| 384 |             if (m_next1 == '+') { | 
| 385 |                 shift2(); | 
| 386 |                 if (m_terminator) { | 
| 387 |                     token = AUTOPLUSPLUS; | 
| 388 |                     break; | 
| 389 |                 } | 
| 390 |                 token = PLUSPLUS; | 
| 391 |                 break; | 
| 392 |             } | 
| 393 |             if (m_next1 == '=') { | 
| 394 |                 shift2(); | 
| 395 |                 token = PLUSEQUAL; | 
| 396 |                 break; | 
| 397 |             } | 
| 398 |             shift1(); | 
| 399 |             token = '+'; | 
| 400 |             break; | 
| 401 |         case '-': | 
| 402 |             if (m_next1 == '-') { | 
| 403 |                 if (m_atLineStart && m_next2 == '>') { | 
| 404 |                     shift3(); | 
| 405 |                     goto inSingleLineComment; | 
| 406 |                 } | 
| 407 |                 shift2(); | 
| 408 |                 if (m_terminator) { | 
| 409 |                     token = AUTOMINUSMINUS; | 
| 410 |                     break; | 
| 411 |                 } | 
| 412 |                 token = MINUSMINUS; | 
| 413 |                 break; | 
| 414 |             } | 
| 415 |             if (m_next1 == '=') { | 
| 416 |                 shift2(); | 
| 417 |                 token = MINUSEQUAL; | 
| 418 |                 break; | 
| 419 |             } | 
| 420 |             shift1(); | 
| 421 |             token = '-'; | 
| 422 |             break; | 
| 423 |         case '*': | 
| 424 |             if (m_next1 == '=') { | 
| 425 |                 shift2(); | 
| 426 |                 token = MULTEQUAL; | 
| 427 |                 break; | 
| 428 |             } | 
| 429 |             shift1(); | 
| 430 |             token = '*'; | 
| 431 |             break; | 
| 432 |         case '/': | 
| 433 |             if (m_next1 == '/') { | 
| 434 |                 shift2(); | 
| 435 |                 goto inSingleLineComment; | 
| 436 |             } | 
| 437 |             if (m_next1 == '*') | 
| 438 |                 goto inMultiLineComment; | 
| 439 |             if (m_next1 == '=') { | 
| 440 |                 shift2(); | 
| 441 |                 token = DIVEQUAL; | 
| 442 |                 break; | 
| 443 |             } | 
| 444 |             shift1(); | 
| 445 |             token = '/'; | 
| 446 |             break; | 
| 447 |         case '&': | 
| 448 |             if (m_next1 == '&') { | 
| 449 |                 shift2(); | 
| 450 |                 token = AND; | 
| 451 |                 break; | 
| 452 |             } | 
| 453 |             if (m_next1 == '=') { | 
| 454 |                 shift2(); | 
| 455 |                 token = ANDEQUAL; | 
| 456 |                 break; | 
| 457 |             } | 
| 458 |             shift1(); | 
| 459 |             token = '&'; | 
| 460 |             break; | 
| 461 |         case '^': | 
| 462 |             if (m_next1 == '=') { | 
| 463 |                 shift2(); | 
| 464 |                 token = XOREQUAL; | 
| 465 |                 break; | 
| 466 |             } | 
| 467 |             shift1(); | 
| 468 |             token = '^'; | 
| 469 |             break; | 
| 470 |         case '%': | 
| 471 |             if (m_next1 == '=') { | 
| 472 |                 shift2(); | 
| 473 |                 token = MODEQUAL; | 
| 474 |                 break; | 
| 475 |             } | 
| 476 |             shift1(); | 
| 477 |             token = '%'; | 
| 478 |             break; | 
| 479 |         case '|': | 
| 480 |             if (m_next1 == '=') { | 
| 481 |                 shift2(); | 
| 482 |                 token = OREQUAL; | 
| 483 |                 break; | 
| 484 |             } | 
| 485 |             if (m_next1 == '|') { | 
| 486 |                 shift2(); | 
| 487 |                 token = OR; | 
| 488 |                 break; | 
| 489 |             } | 
| 490 |             shift1(); | 
| 491 |             token = '|'; | 
| 492 |             break; | 
| 493 |         case '.': | 
| 494 |             if (isASCIIDigit(c: m_next1)) { | 
| 495 |                 record8(c: '.'); | 
| 496 |                 shift1(); | 
| 497 |                 goto inNumberAfterDecimalPoint; | 
| 498 |             } | 
| 499 |             token = '.'; | 
| 500 |             shift1(); | 
| 501 |             break; | 
| 502 |         case ',': | 
| 503 |         case '~': | 
| 504 |         case '?': | 
| 505 |         case ':': | 
| 506 |         case '(': | 
| 507 |         case ')': | 
| 508 |         case '[': | 
| 509 |         case ']': | 
| 510 |             token = m_current; | 
| 511 |             shift1(); | 
| 512 |             break; | 
| 513 |         case ';': | 
| 514 |             shift1(); | 
| 515 |             m_delimited = true; | 
| 516 |             token = ';'; | 
| 517 |             break; | 
| 518 |         case '{': | 
| 519 |             lvalp->intValue = currentOffset(); | 
| 520 |             shift1(); | 
| 521 |             token = OPENBRACE; | 
| 522 |             break; | 
| 523 |         case '}': | 
| 524 |             lvalp->intValue = currentOffset(); | 
| 525 |             shift1(); | 
| 526 |             m_delimited = true; | 
| 527 |             token = CLOSEBRACE; | 
| 528 |             break; | 
| 529 |         case '\\': | 
| 530 |             goto startIdentifierWithBackslash; | 
| 531 |         case '0': | 
| 532 |             goto startNumberWithZeroDigit; | 
| 533 |         case '1': | 
| 534 |         case '2': | 
| 535 |         case '3': | 
| 536 |         case '4': | 
| 537 |         case '5': | 
| 538 |         case '6': | 
| 539 |         case '7': | 
| 540 |         case '8': | 
| 541 |         case '9': | 
| 542 |             goto startNumber; | 
| 543 |         case '"': | 
| 544 |         case '\'': | 
| 545 |             goto startString; | 
| 546 |         default: | 
| 547 |             if (isIdentStart(c: m_current)) | 
| 548 |                 goto startIdentifierOrKeyword; | 
| 549 |             if (isLineTerminator(ch: m_current)) { | 
| 550 |                 shiftLineTerminator(); | 
| 551 |                 m_atLineStart = true; | 
| 552 |                 m_terminator = true; | 
| 553 |                 if (lastTokenWasRestrKeyword()) { | 
| 554 |                     token = ';'; | 
| 555 |                     goto doneSemicolon; | 
| 556 |                 } | 
| 557 |                 goto start; | 
| 558 |             } | 
| 559 |             goto returnError; | 
| 560 |     } | 
| 561 |  | 
| 562 |     m_atLineStart = false; | 
| 563 |     goto returnToken; | 
| 564 |  | 
| 565 | startString: { | 
| 566 |     int stringQuoteCharacter = m_current; | 
| 567 |     shift1(); | 
| 568 |  | 
| 569 |     const UChar* stringStart = currentCharacter(); | 
| 570 |     while (m_current != stringQuoteCharacter) { | 
| 571 |         // Fast check for characters that require special handling. | 
| 572 |         // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently | 
| 573 |         // as possible, and lets through all common ASCII characters. | 
| 574 |         if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { | 
| 575 |             m_buffer16.append(data: stringStart, dataSize: currentCharacter() - stringStart); | 
| 576 |             goto inString; | 
| 577 |         } | 
| 578 |         shift1(); | 
| 579 |     } | 
| 580 |     lvalp->ident = makeIdentifier(characters: stringStart, length: currentCharacter() - stringStart); | 
| 581 |     shift1(); | 
| 582 |     m_atLineStart = false; | 
| 583 |     m_delimited = false; | 
| 584 |     token = STRING; | 
| 585 |     goto returnToken; | 
| 586 |  | 
| 587 | inString: | 
| 588 |     while (m_current != stringQuoteCharacter) { | 
| 589 |         if (m_current == '\\') | 
| 590 |             goto inStringEscapeSequence; | 
| 591 |         if (UNLIKELY(isLineTerminator(m_current))) | 
| 592 |             goto returnError; | 
| 593 |         if (UNLIKELY(m_current == -1)) | 
| 594 |             goto returnError; | 
| 595 |         record16(c: m_current); | 
| 596 |         shift1(); | 
| 597 |     } | 
| 598 |     goto doneString; | 
| 599 |  | 
| 600 | inStringEscapeSequence: | 
| 601 |     shift1(); | 
| 602 |     if (m_current == 'x') { | 
| 603 |         shift1(); | 
| 604 |         if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1)) { | 
| 605 |             record16(c: convertHex(c1: m_current, c2: m_next1)); | 
| 606 |             shift2(); | 
| 607 |             goto inString; | 
| 608 |         } | 
| 609 |         record16(c: 'x'); | 
| 610 |         if (m_current == stringQuoteCharacter) | 
| 611 |             goto doneString; | 
| 612 |         goto inString; | 
| 613 |     } | 
| 614 |     if (m_current == 'u') { | 
| 615 |         shift1(); | 
| 616 |         if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1) && isASCIIHexDigit(c: m_next2) && isASCIIHexDigit(c: m_next3)) { | 
| 617 |             record16(c: convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3)); | 
| 618 |             shift4(); | 
| 619 |             goto inString; | 
| 620 |         } | 
| 621 |         if (m_current == stringQuoteCharacter) { | 
| 622 |             record16(c: 'u'); | 
| 623 |             goto doneString; | 
| 624 |         } | 
| 625 |         goto returnError; | 
| 626 |     } | 
| 627 |     if (isASCIIOctalDigit(c: m_current)) { | 
| 628 |         if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(c: m_next1) && isASCIIOctalDigit(c: m_next2)) { | 
| 629 |             record16(c: (m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); | 
| 630 |             shift3(); | 
| 631 |             goto inString; | 
| 632 |         } | 
| 633 |         if (isASCIIOctalDigit(c: m_next1)) { | 
| 634 |             record16(c: (m_current - '0') * 8 + m_next1 - '0'); | 
| 635 |             shift2(); | 
| 636 |             goto inString; | 
| 637 |         } | 
| 638 |         record16(c: m_current - '0'); | 
| 639 |         shift1(); | 
| 640 |         goto inString; | 
| 641 |     } | 
| 642 |     if (isLineTerminator(ch: m_current)) { | 
| 643 |         shiftLineTerminator(); | 
| 644 |         goto inString; | 
| 645 |     } | 
| 646 |     if (m_current == -1) | 
| 647 |         goto returnError; | 
| 648 |     record16(c: singleEscape(c: m_current)); | 
| 649 |     shift1(); | 
| 650 |     goto inString; | 
| 651 | } | 
| 652 |  | 
| 653 | startIdentifierWithBackslash: | 
| 654 |     shift1(); | 
| 655 |     if (UNLIKELY(m_current != 'u')) | 
| 656 |         goto returnError; | 
| 657 |     shift1(); | 
| 658 |     if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) | 
| 659 |         goto returnError; | 
| 660 |     token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3); | 
| 661 |     if (UNLIKELY(!isIdentStart(token))) | 
| 662 |         goto returnError; | 
| 663 |     goto inIdentifierAfterCharacterCheck; | 
| 664 |  | 
| 665 | startIdentifierOrKeyword: { | 
| 666 |     const UChar* identifierStart = currentCharacter(); | 
| 667 |     shift1(); | 
| 668 |     while (isIdentPart(c: m_current)) | 
| 669 |         shift1(); | 
| 670 |     if (LIKELY(m_current != '\\')) { | 
| 671 |         lvalp->ident = makeIdentifier(characters: identifierStart, length: currentCharacter() - identifierStart); | 
| 672 |         goto doneIdentifierOrKeyword; | 
| 673 |     } | 
| 674 |     m_buffer16.append(data: identifierStart, dataSize: currentCharacter() - identifierStart); | 
| 675 | } | 
| 676 |  | 
| 677 |     do { | 
| 678 |         shift1(); | 
| 679 |         if (UNLIKELY(m_current != 'u')) | 
| 680 |             goto returnError; | 
| 681 |         shift1(); | 
| 682 |         if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) | 
| 683 |             goto returnError; | 
| 684 |         token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3); | 
| 685 |         if (UNLIKELY(!isIdentPart(token))) | 
| 686 |             goto returnError; | 
| 687 | inIdentifierAfterCharacterCheck: | 
| 688 |         record16(c: token); | 
| 689 |         shift4(); | 
| 690 |  | 
| 691 |         while (isIdentPart(c: m_current)) { | 
| 692 |             record16(c: m_current); | 
| 693 |             shift1(); | 
| 694 |         } | 
| 695 |     } while (UNLIKELY(m_current == '\\')); | 
| 696 |     goto doneIdentifier; | 
| 697 |  | 
| 698 | : | 
| 699 |     while (!isLineTerminator(ch: m_current)) { | 
| 700 |         if (UNLIKELY(m_current == -1)) | 
| 701 |             return 0; | 
| 702 |         shift1(); | 
| 703 |     } | 
| 704 |     shiftLineTerminator(); | 
| 705 |     m_atLineStart = true; | 
| 706 |     m_terminator = true; | 
| 707 |     if (lastTokenWasRestrKeyword()) | 
| 708 |         goto doneSemicolon; | 
| 709 |     goto start; | 
| 710 |  | 
| 711 | : | 
| 712 |     shift2(); | 
| 713 |     while (m_current != '*' || m_next1 != '/') { | 
| 714 |         if (isLineTerminator(ch: m_current)) | 
| 715 |             shiftLineTerminator(); | 
| 716 |         else { | 
| 717 |             shift1(); | 
| 718 |             if (UNLIKELY(m_current == -1)) | 
| 719 |                 goto returnError; | 
| 720 |         } | 
| 721 |     } | 
| 722 |     shift2(); | 
| 723 |     m_atLineStart = false; | 
| 724 |     goto start; | 
| 725 |  | 
| 726 | startNumberWithZeroDigit: | 
| 727 |     shift1(); | 
| 728 |     if ((m_current | 0x20) == 'x' && isASCIIHexDigit(c: m_next1)) { | 
| 729 |         shift1(); | 
| 730 |         goto inHex; | 
| 731 |     } | 
| 732 |     if (m_current == '.') { | 
| 733 |         record8(c: '0'); | 
| 734 |         record8(c: '.'); | 
| 735 |         shift1(); | 
| 736 |         goto inNumberAfterDecimalPoint; | 
| 737 |     } | 
| 738 |     if ((m_current | 0x20) == 'e') { | 
| 739 |         record8(c: '0'); | 
| 740 |         record8(c: 'e'); | 
| 741 |         shift1(); | 
| 742 |         goto inExponentIndicator; | 
| 743 |     } | 
| 744 |     if (isASCIIOctalDigit(c: m_current)) | 
| 745 |         goto inOctal; | 
| 746 |     if (isASCIIDigit(c: m_current)) | 
| 747 |         goto startNumber; | 
| 748 |     lvalp->doubleValue = 0; | 
| 749 |     goto doneNumeric; | 
| 750 |  | 
| 751 | inNumberAfterDecimalPoint: | 
| 752 |     while (isASCIIDigit(c: m_current)) { | 
| 753 |         record8(c: m_current); | 
| 754 |         shift1(); | 
| 755 |     } | 
| 756 |     if ((m_current | 0x20) == 'e') { | 
| 757 |         record8(c: 'e'); | 
| 758 |         shift1(); | 
| 759 |         goto inExponentIndicator; | 
| 760 |     } | 
| 761 |     goto doneNumber; | 
| 762 |  | 
| 763 | inExponentIndicator: | 
| 764 |     if (m_current == '+' || m_current == '-') { | 
| 765 |         record8(c: m_current); | 
| 766 |         shift1(); | 
| 767 |     } | 
| 768 |     if (!isASCIIDigit(c: m_current)) | 
| 769 |         goto returnError; | 
| 770 |     do { | 
| 771 |         record8(c: m_current); | 
| 772 |         shift1(); | 
| 773 |     } while (isASCIIDigit(c: m_current)); | 
| 774 |     goto doneNumber; | 
| 775 |  | 
| 776 | inOctal: { | 
| 777 |     do { | 
| 778 |         record8(c: m_current); | 
| 779 |         shift1(); | 
| 780 |     } while (isASCIIOctalDigit(c: m_current)); | 
| 781 |     if (isASCIIDigit(c: m_current)) | 
| 782 |         goto startNumber; | 
| 783 |  | 
| 784 |     double dval = 0; | 
| 785 |  | 
| 786 |     const char* end = m_buffer8.end(); | 
| 787 |     for (const char* p = m_buffer8.data(); p < end; ++p) { | 
| 788 |         dval *= 8; | 
| 789 |         dval += *p - '0'; | 
| 790 |     } | 
| 791 |     if (dval >= mantissaOverflowLowerBound) | 
| 792 |         dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: 8); | 
| 793 |  | 
| 794 |     m_buffer8.resize(size: 0); | 
| 795 |  | 
| 796 |     lvalp->doubleValue = dval; | 
| 797 |     goto doneNumeric; | 
| 798 | } | 
| 799 |  | 
| 800 | inHex: { | 
| 801 |     do { | 
| 802 |         record8(c: m_current); | 
| 803 |         shift1(); | 
| 804 |     } while (isASCIIHexDigit(c: m_current)); | 
| 805 |  | 
| 806 |     double dval = 0; | 
| 807 |  | 
| 808 |     const char* end = m_buffer8.end(); | 
| 809 |     for (const char* p = m_buffer8.data(); p < end; ++p) { | 
| 810 |         dval *= 16; | 
| 811 |         dval += toASCIIHexValue(c: *p); | 
| 812 |     } | 
| 813 |     if (dval >= mantissaOverflowLowerBound) | 
| 814 |         dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: 16); | 
| 815 |  | 
| 816 |     m_buffer8.resize(size: 0); | 
| 817 |  | 
| 818 |     lvalp->doubleValue = dval; | 
| 819 |     goto doneNumeric; | 
| 820 | } | 
| 821 |  | 
| 822 | startNumber: | 
| 823 |     record8(c: m_current); | 
| 824 |     shift1(); | 
| 825 |     while (isASCIIDigit(c: m_current)) { | 
| 826 |         record8(c: m_current); | 
| 827 |         shift1(); | 
| 828 |     } | 
| 829 |     if (m_current == '.') { | 
| 830 |         record8(c: '.'); | 
| 831 |         shift1(); | 
| 832 |         goto inNumberAfterDecimalPoint; | 
| 833 |     } | 
| 834 |     if ((m_current | 0x20) == 'e') { | 
| 835 |         record8(c: 'e'); | 
| 836 |         shift1(); | 
| 837 |         goto inExponentIndicator; | 
| 838 |     } | 
| 839 |  | 
| 840 |     // Fall through into doneNumber. | 
| 841 |  | 
| 842 | doneNumber: | 
| 843 |     // Null-terminate string for strtod. | 
| 844 |     m_buffer8.append(val: '\0'); | 
| 845 |     lvalp->doubleValue = WTF::strtod(s00: m_buffer8.data(), se: 0); | 
| 846 |     m_buffer8.resize(size: 0); | 
| 847 |  | 
| 848 |     // Fall through into doneNumeric. | 
| 849 |  | 
| 850 | doneNumeric: | 
| 851 |     // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. | 
| 852 |     if (UNLIKELY(isIdentStart(m_current))) | 
| 853 |         goto returnError; | 
| 854 |  | 
| 855 |     m_atLineStart = false; | 
| 856 |     m_delimited = false; | 
| 857 |     token = NUMBER; | 
| 858 |     goto returnToken; | 
| 859 |  | 
| 860 | doneSemicolon: | 
| 861 |     token = ';'; | 
| 862 |     m_delimited = true; | 
| 863 |     goto returnToken; | 
| 864 |  | 
| 865 | doneIdentifier: | 
| 866 |     m_atLineStart = false; | 
| 867 |     m_delimited = false; | 
| 868 |     lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); | 
| 869 |     m_buffer16.resize(size: 0); | 
| 870 |     token = IDENT; | 
| 871 |     goto returnToken; | 
| 872 |  | 
| 873 | doneIdentifierOrKeyword: { | 
| 874 |     m_atLineStart = false; | 
| 875 |     m_delimited = false; | 
| 876 |     m_buffer16.resize(size: 0); | 
| 877 |     const HashEntry* entry = m_keywordTable.entry(globalData: m_globalData, identifier: *lvalp->ident); | 
| 878 |     token = entry ? entry->lexerValue() : IDENT; | 
| 879 |     goto returnToken; | 
| 880 | } | 
| 881 |  | 
| 882 | doneString: | 
| 883 |     // Atomize constant strings in case they're later used in property lookup. | 
| 884 |     shift1(); | 
| 885 |     m_atLineStart = false; | 
| 886 |     m_delimited = false; | 
| 887 |     lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); | 
| 888 |     m_buffer16.resize(size: 0); | 
| 889 |     token = STRING; | 
| 890 |  | 
| 891 |     // Fall through into returnToken. | 
| 892 |  | 
| 893 | returnToken: { | 
| 894 |     int lineNumber = m_lineNumber; | 
| 895 |     llocp->first_line = lineNumber; | 
| 896 |     llocp->last_line = lineNumber; | 
| 897 |     llocp->first_column = startOffset; | 
| 898 |     llocp->last_column = currentOffset(); | 
| 899 |  | 
| 900 |     m_lastToken = token; | 
| 901 |     return token; | 
| 902 | } | 
| 903 |  | 
| 904 | returnError: | 
| 905 |     m_error = true; | 
| 906 |     return -1; | 
| 907 | } | 
| 908 |  | 
| 909 | bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) | 
| 910 | { | 
| 911 |     ASSERT(m_buffer16.isEmpty()); | 
| 912 |  | 
| 913 |     bool lastWasEscape = false; | 
| 914 |     bool inBrackets = false; | 
| 915 |  | 
| 916 |     if (patternPrefix) { | 
| 917 |         ASSERT(!isLineTerminator(patternPrefix)); | 
| 918 |         ASSERT(patternPrefix != '/'); | 
| 919 |         ASSERT(patternPrefix != '['); | 
| 920 |         record16(c: patternPrefix); | 
| 921 |     } | 
| 922 |  | 
| 923 |     while (true) { | 
| 924 |         int current = m_current; | 
| 925 |  | 
| 926 |         if (isLineTerminator(ch: current) || current == -1) { | 
| 927 |             m_buffer16.resize(size: 0); | 
| 928 |             return false; | 
| 929 |         } | 
| 930 |  | 
| 931 |         shift1(); | 
| 932 |  | 
| 933 |         if (current == '/' && !lastWasEscape && !inBrackets) | 
| 934 |             break; | 
| 935 |  | 
| 936 |         record16(c: current); | 
| 937 |  | 
| 938 |         if (lastWasEscape) { | 
| 939 |             lastWasEscape = false; | 
| 940 |             continue; | 
| 941 |         } | 
| 942 |  | 
| 943 |         switch (current) { | 
| 944 |         case '[': | 
| 945 |             inBrackets = true; | 
| 946 |             break; | 
| 947 |         case ']': | 
| 948 |             inBrackets = false; | 
| 949 |             break; | 
| 950 |         case '\\': | 
| 951 |             lastWasEscape = true; | 
| 952 |             break; | 
| 953 |         } | 
| 954 |     } | 
| 955 |  | 
| 956 |     pattern = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); | 
| 957 |     m_buffer16.resize(size: 0); | 
| 958 |  | 
| 959 |     while (isIdentPart(c: m_current)) { | 
| 960 |         record16(c: m_current); | 
| 961 |         shift1(); | 
| 962 |     } | 
| 963 |  | 
| 964 |     flags = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); | 
| 965 |     m_buffer16.resize(size: 0); | 
| 966 |  | 
| 967 |     return true; | 
| 968 | } | 
| 969 |  | 
| 970 | bool Lexer::skipRegExp() | 
| 971 | { | 
| 972 |     bool lastWasEscape = false; | 
| 973 |     bool inBrackets = false; | 
| 974 |  | 
| 975 |     while (true) { | 
| 976 |         int current = m_current; | 
| 977 |  | 
| 978 |         if (isLineTerminator(ch: current) || current == -1) | 
| 979 |             return false; | 
| 980 |  | 
| 981 |         shift1(); | 
| 982 |  | 
| 983 |         if (current == '/' && !lastWasEscape && !inBrackets) | 
| 984 |             break; | 
| 985 |  | 
| 986 |         if (lastWasEscape) { | 
| 987 |             lastWasEscape = false; | 
| 988 |             continue; | 
| 989 |         } | 
| 990 |  | 
| 991 |         switch (current) { | 
| 992 |         case '[': | 
| 993 |             inBrackets = true; | 
| 994 |             break; | 
| 995 |         case ']': | 
| 996 |             inBrackets = false; | 
| 997 |             break; | 
| 998 |         case '\\': | 
| 999 |             lastWasEscape = true; | 
| 1000 |             break; | 
| 1001 |         } | 
| 1002 |     } | 
| 1003 |  | 
| 1004 |     while (isIdentPart(c: m_current)) | 
| 1005 |         shift1(); | 
| 1006 |  | 
| 1007 |     return true; | 
| 1008 | } | 
| 1009 |  | 
| 1010 | void Lexer::clear() | 
| 1011 | { | 
| 1012 |     m_arena = 0; | 
| 1013 |     m_codeWithoutBOMs.clear(); | 
| 1014 |  | 
| 1015 |     Vector<char> newBuffer8; | 
| 1016 |     newBuffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); | 
| 1017 |     m_buffer8.swap(other&: newBuffer8); | 
| 1018 |  | 
| 1019 |     Vector<UChar> newBuffer16; | 
| 1020 |     newBuffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); | 
| 1021 |     m_buffer16.swap(other&: newBuffer16); | 
| 1022 |  | 
| 1023 |     m_isReparsing = false; | 
| 1024 | } | 
| 1025 |  | 
| 1026 | SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) | 
| 1027 | { | 
| 1028 |     if (m_codeWithoutBOMs.isEmpty()) | 
| 1029 |         return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); | 
| 1030 |  | 
| 1031 |     const UChar* data = m_source->provider()->data(); | 
| 1032 |  | 
| 1033 |     ASSERT(openBrace < closeBrace); | 
| 1034 |  | 
| 1035 |     int numBOMsBeforeOpenBrace = 0; | 
| 1036 |     int numBOMsBetweenBraces = 0; | 
| 1037 |  | 
| 1038 |     int i; | 
| 1039 |     for (i = m_source->startOffset(); i < openBrace; ++i) | 
| 1040 |         numBOMsBeforeOpenBrace += data[i] == byteOrderMark; | 
| 1041 |     for (; i < closeBrace; ++i) | 
| 1042 |         numBOMsBetweenBraces += data[i] == byteOrderMark; | 
| 1043 |  | 
| 1044 |     return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace, | 
| 1045 |         closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine); | 
| 1046 | } | 
| 1047 |  | 
| 1048 | } // namespace JSC | 
| 1049 |  |