1/*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include "config.h"
24#include "Lexer.h"
25
26#include "JSFunction.h"
27#include "JSGlobalObjectFunctions.h"
28#include "NodeInfo.h"
29#include "Nodes.h"
30#include "dtoa.h"
31#include <ctype.h>
32#include <limits.h>
33#include <string.h>
34#include <wtf/Assertions.h>
35
36using namespace WTF;
37using namespace Unicode;
38
39// We can't specify the namespace in yacc's C output, so do it here instead.
40using namespace JSC;
41
42#include "Grammar.h"
43#include "Lookup.h"
44#include "Lexer.lut.h"
45
46namespace JSC {
47
48static const UChar byteOrderMark = 0xFEFF;
49
50Lexer::Lexer(JSGlobalData* globalData)
51 : m_isReparsing(false)
52 , m_globalData(globalData)
53 , m_keywordTable(JSC::mainTable)
54{
55 m_buffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
56 m_buffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
57}
58
59Lexer::~Lexer()
60{
61 m_keywordTable.deleteTable();
62}
63
64inline const UChar* Lexer::currentCharacter() const
65{
66 return m_code - 4;
67}
68
69inline int Lexer::currentOffset() const
70{
71 return currentCharacter() - m_codeStart;
72}
73
74ALWAYS_INLINE void Lexer::shift1()
75{
76 m_current = m_next1;
77 m_next1 = m_next2;
78 m_next2 = m_next3;
79 if (LIKELY(m_code < m_codeEnd))
80 m_next3 = m_code[0];
81 else
82 m_next3 = -1;
83
84 ++m_code;
85}
86
87ALWAYS_INLINE void Lexer::shift2()
88{
89 m_current = m_next2;
90 m_next1 = m_next3;
91 if (LIKELY(m_code + 1 < m_codeEnd)) {
92 m_next2 = m_code[0];
93 m_next3 = m_code[1];
94 } else {
95 m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
96 m_next3 = -1;
97 }
98
99 m_code += 2;
100}
101
102ALWAYS_INLINE void Lexer::shift3()
103{
104 m_current = m_next3;
105 if (LIKELY(m_code + 2 < m_codeEnd)) {
106 m_next1 = m_code[0];
107 m_next2 = m_code[1];
108 m_next3 = m_code[2];
109 } else {
110 m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
111 m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
112 m_next3 = -1;
113 }
114
115 m_code += 3;
116}
117
118ALWAYS_INLINE void Lexer::shift4()
119{
120 if (LIKELY(m_code + 3 < m_codeEnd)) {
121 m_current = m_code[0];
122 m_next1 = m_code[1];
123 m_next2 = m_code[2];
124 m_next3 = m_code[3];
125 } else {
126 m_current = m_code < m_codeEnd ? m_code[0] : -1;
127 m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
128 m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
129 m_next3 = -1;
130 }
131
132 m_code += 4;
133}
134
135void Lexer::setCode(const SourceCode& source, ParserArena& arena)
136{
137 m_arena = &arena.identifierArena();
138
139 m_lineNumber = source.firstLine();
140 m_delimited = false;
141 m_lastToken = -1;
142
143 const UChar* data = source.provider()->data();
144
145 m_source = &source;
146 m_codeStart = data;
147 m_code = data + source.startOffset();
148 m_codeEnd = data + source.endOffset();
149 m_error = false;
150 m_atLineStart = true;
151
152 // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153 // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154 if (source.provider()->hasBOMs()) {
155 for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156 if (UNLIKELY(*p == byteOrderMark)) {
157 copyCodeWithoutBOMs();
158 break;
159 }
160 }
161 }
162
163 // Read the first characters into the 4-character buffer.
164 shift4();
165 ASSERT(currentOffset() == source.startOffset());
166}
167
168void Lexer::copyCodeWithoutBOMs()
169{
170 // Note: In this case, the character offset data for debugging will be incorrect.
171 // If it's important to correctly debug code with extraneous BOMs, then the caller
172 // should strip the BOMs when creating the SourceProvider object and do its own
173 // mapping of offsets within the stripped text to original text offset.
174
175 m_codeWithoutBOMs.reserveCapacity(newCapacity: m_codeEnd - m_code);
176 for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177 UChar c = *p;
178 if (c != byteOrderMark)
179 m_codeWithoutBOMs.append(val: c);
180 }
181 ptrdiff_t startDelta = m_codeStart - m_code;
182 m_code = m_codeWithoutBOMs.data();
183 m_codeStart = m_code + startDelta;
184 m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185}
186
187void Lexer::shiftLineTerminator()
188{
189 ASSERT(isLineTerminator(m_current));
190
191 // Allow both CRLF and LFCR.
192 if (m_current + m_next1 == '\n' + '\r')
193 shift2();
194 else
195 shift1();
196
197 ++m_lineNumber;
198}
199
200ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201{
202 return &m_arena->makeIdentifier(globalData: m_globalData, characters, length);
203}
204
205inline bool Lexer::lastTokenWasRestrKeyword() const
206{
207 return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
208}
209
210static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211{
212 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
213}
214
215static inline bool isIdentStart(int c)
216{
217 return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
218}
219
220static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221{
222 return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
223 | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
224}
225
226static inline bool isIdentPart(int c)
227{
228 return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
229}
230
231static inline int singleEscape(int c)
232{
233 switch (c) {
234 case 'b':
235 return 0x08;
236 case 't':
237 return 0x09;
238 case 'n':
239 return 0x0A;
240 case 'v':
241 return 0x0B;
242 case 'f':
243 return 0x0C;
244 case 'r':
245 return 0x0D;
246 default:
247 return c;
248 }
249}
250
251inline void Lexer::record8(int c)
252{
253 ASSERT(c >= 0);
254 ASSERT(c <= 0xFF);
255 m_buffer8.append(val: static_cast<char>(c));
256}
257
258inline void Lexer::record16(UChar c)
259{
260 m_buffer16.append(val: c);
261}
262
263inline void Lexer::record16(int c)
264{
265 ASSERT(c >= 0);
266 ASSERT(c <= USHRT_MAX);
267 record16(c: UChar(static_cast<unsigned short>(c)));
268}
269
270int Lexer::lex(void* p1, void* p2)
271{
272 ASSERT(!m_error);
273 ASSERT(m_buffer8.isEmpty());
274 ASSERT(m_buffer16.isEmpty());
275
276 YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277 YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278 int token = 0;
279 m_terminator = false;
280
281start:
282 while (isWhiteSpace(ch: m_current))
283 shift1();
284
285 int startOffset = currentOffset();
286
287 if (m_current == -1) {
288#ifndef QT_BUILD_SCRIPT_LIB /* the parser takes cate about automatic semicolon.
289 this might add incorrect semicolons */
290 //m_delimited and m_isReparsing are now useless
291 if (!m_terminator && !m_delimited && !m_isReparsing) {
292 // automatic semicolon insertion if program incomplete
293 token = ';';
294 goto doneSemicolon;
295 }
296#endif
297 return 0;
298 }
299
300 m_delimited = false;
301 switch (m_current) {
302 case '>':
303 if (m_next1 == '>' && m_next2 == '>') {
304 if (m_next3 == '=') {
305 shift4();
306 token = URSHIFTEQUAL;
307 break;
308 }
309 shift3();
310 token = URSHIFT;
311 break;
312 }
313 if (m_next1 == '>') {
314 if (m_next2 == '=') {
315 shift3();
316 token = RSHIFTEQUAL;
317 break;
318 }
319 shift2();
320 token = RSHIFT;
321 break;
322 }
323 if (m_next1 == '=') {
324 shift2();
325 token = GE;
326 break;
327 }
328 shift1();
329 token = '>';
330 break;
331 case '=':
332 if (m_next1 == '=') {
333 if (m_next2 == '=') {
334 shift3();
335 token = STREQ;
336 break;
337 }
338 shift2();
339 token = EQEQ;
340 break;
341 }
342 shift1();
343 token = '=';
344 break;
345 case '!':
346 if (m_next1 == '=') {
347 if (m_next2 == '=') {
348 shift3();
349 token = STRNEQ;
350 break;
351 }
352 shift2();
353 token = NE;
354 break;
355 }
356 shift1();
357 token = '!';
358 break;
359 case '<':
360 if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
361 // <!-- marks the beginning of a line comment (for www usage)
362 shift4();
363 goto inSingleLineComment;
364 }
365 if (m_next1 == '<') {
366 if (m_next2 == '=') {
367 shift3();
368 token = LSHIFTEQUAL;
369 break;
370 }
371 shift2();
372 token = LSHIFT;
373 break;
374 }
375 if (m_next1 == '=') {
376 shift2();
377 token = LE;
378 break;
379 }
380 shift1();
381 token = '<';
382 break;
383 case '+':
384 if (m_next1 == '+') {
385 shift2();
386 if (m_terminator) {
387 token = AUTOPLUSPLUS;
388 break;
389 }
390 token = PLUSPLUS;
391 break;
392 }
393 if (m_next1 == '=') {
394 shift2();
395 token = PLUSEQUAL;
396 break;
397 }
398 shift1();
399 token = '+';
400 break;
401 case '-':
402 if (m_next1 == '-') {
403 if (m_atLineStart && m_next2 == '>') {
404 shift3();
405 goto inSingleLineComment;
406 }
407 shift2();
408 if (m_terminator) {
409 token = AUTOMINUSMINUS;
410 break;
411 }
412 token = MINUSMINUS;
413 break;
414 }
415 if (m_next1 == '=') {
416 shift2();
417 token = MINUSEQUAL;
418 break;
419 }
420 shift1();
421 token = '-';
422 break;
423 case '*':
424 if (m_next1 == '=') {
425 shift2();
426 token = MULTEQUAL;
427 break;
428 }
429 shift1();
430 token = '*';
431 break;
432 case '/':
433 if (m_next1 == '/') {
434 shift2();
435 goto inSingleLineComment;
436 }
437 if (m_next1 == '*')
438 goto inMultiLineComment;
439 if (m_next1 == '=') {
440 shift2();
441 token = DIVEQUAL;
442 break;
443 }
444 shift1();
445 token = '/';
446 break;
447 case '&':
448 if (m_next1 == '&') {
449 shift2();
450 token = AND;
451 break;
452 }
453 if (m_next1 == '=') {
454 shift2();
455 token = ANDEQUAL;
456 break;
457 }
458 shift1();
459 token = '&';
460 break;
461 case '^':
462 if (m_next1 == '=') {
463 shift2();
464 token = XOREQUAL;
465 break;
466 }
467 shift1();
468 token = '^';
469 break;
470 case '%':
471 if (m_next1 == '=') {
472 shift2();
473 token = MODEQUAL;
474 break;
475 }
476 shift1();
477 token = '%';
478 break;
479 case '|':
480 if (m_next1 == '=') {
481 shift2();
482 token = OREQUAL;
483 break;
484 }
485 if (m_next1 == '|') {
486 shift2();
487 token = OR;
488 break;
489 }
490 shift1();
491 token = '|';
492 break;
493 case '.':
494 if (isASCIIDigit(c: m_next1)) {
495 record8(c: '.');
496 shift1();
497 goto inNumberAfterDecimalPoint;
498 }
499 token = '.';
500 shift1();
501 break;
502 case ',':
503 case '~':
504 case '?':
505 case ':':
506 case '(':
507 case ')':
508 case '[':
509 case ']':
510 token = m_current;
511 shift1();
512 break;
513 case ';':
514 shift1();
515 m_delimited = true;
516 token = ';';
517 break;
518 case '{':
519 lvalp->intValue = currentOffset();
520 shift1();
521 token = OPENBRACE;
522 break;
523 case '}':
524 lvalp->intValue = currentOffset();
525 shift1();
526 m_delimited = true;
527 token = CLOSEBRACE;
528 break;
529 case '\\':
530 goto startIdentifierWithBackslash;
531 case '0':
532 goto startNumberWithZeroDigit;
533 case '1':
534 case '2':
535 case '3':
536 case '4':
537 case '5':
538 case '6':
539 case '7':
540 case '8':
541 case '9':
542 goto startNumber;
543 case '"':
544 case '\'':
545 goto startString;
546 default:
547 if (isIdentStart(c: m_current))
548 goto startIdentifierOrKeyword;
549 if (isLineTerminator(ch: m_current)) {
550 shiftLineTerminator();
551 m_atLineStart = true;
552 m_terminator = true;
553 if (lastTokenWasRestrKeyword()) {
554 token = ';';
555 goto doneSemicolon;
556 }
557 goto start;
558 }
559 goto returnError;
560 }
561
562 m_atLineStart = false;
563 goto returnToken;
564
565startString: {
566 int stringQuoteCharacter = m_current;
567 shift1();
568
569 const UChar* stringStart = currentCharacter();
570 while (m_current != stringQuoteCharacter) {
571 // Fast check for characters that require special handling.
572 // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
573 // as possible, and lets through all common ASCII characters.
574 if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
575 m_buffer16.append(data: stringStart, dataSize: currentCharacter() - stringStart);
576 goto inString;
577 }
578 shift1();
579 }
580 lvalp->ident = makeIdentifier(characters: stringStart, length: currentCharacter() - stringStart);
581 shift1();
582 m_atLineStart = false;
583 m_delimited = false;
584 token = STRING;
585 goto returnToken;
586
587inString:
588 while (m_current != stringQuoteCharacter) {
589 if (m_current == '\\')
590 goto inStringEscapeSequence;
591 if (UNLIKELY(isLineTerminator(m_current)))
592 goto returnError;
593 if (UNLIKELY(m_current == -1))
594 goto returnError;
595 record16(c: m_current);
596 shift1();
597 }
598 goto doneString;
599
600inStringEscapeSequence:
601 shift1();
602 if (m_current == 'x') {
603 shift1();
604 if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1)) {
605 record16(c: convertHex(c1: m_current, c2: m_next1));
606 shift2();
607 goto inString;
608 }
609 record16(c: 'x');
610 if (m_current == stringQuoteCharacter)
611 goto doneString;
612 goto inString;
613 }
614 if (m_current == 'u') {
615 shift1();
616 if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1) && isASCIIHexDigit(c: m_next2) && isASCIIHexDigit(c: m_next3)) {
617 record16(c: convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3));
618 shift4();
619 goto inString;
620 }
621 if (m_current == stringQuoteCharacter) {
622 record16(c: 'u');
623 goto doneString;
624 }
625 goto returnError;
626 }
627 if (isASCIIOctalDigit(c: m_current)) {
628 if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(c: m_next1) && isASCIIOctalDigit(c: m_next2)) {
629 record16(c: (m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
630 shift3();
631 goto inString;
632 }
633 if (isASCIIOctalDigit(c: m_next1)) {
634 record16(c: (m_current - '0') * 8 + m_next1 - '0');
635 shift2();
636 goto inString;
637 }
638 record16(c: m_current - '0');
639 shift1();
640 goto inString;
641 }
642 if (isLineTerminator(ch: m_current)) {
643 shiftLineTerminator();
644 goto inString;
645 }
646 if (m_current == -1)
647 goto returnError;
648 record16(c: singleEscape(c: m_current));
649 shift1();
650 goto inString;
651}
652
653startIdentifierWithBackslash:
654 shift1();
655 if (UNLIKELY(m_current != 'u'))
656 goto returnError;
657 shift1();
658 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
659 goto returnError;
660 token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3);
661 if (UNLIKELY(!isIdentStart(token)))
662 goto returnError;
663 goto inIdentifierAfterCharacterCheck;
664
665startIdentifierOrKeyword: {
666 const UChar* identifierStart = currentCharacter();
667 shift1();
668 while (isIdentPart(c: m_current))
669 shift1();
670 if (LIKELY(m_current != '\\')) {
671 lvalp->ident = makeIdentifier(characters: identifierStart, length: currentCharacter() - identifierStart);
672 goto doneIdentifierOrKeyword;
673 }
674 m_buffer16.append(data: identifierStart, dataSize: currentCharacter() - identifierStart);
675}
676
677 do {
678 shift1();
679 if (UNLIKELY(m_current != 'u'))
680 goto returnError;
681 shift1();
682 if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
683 goto returnError;
684 token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3);
685 if (UNLIKELY(!isIdentPart(token)))
686 goto returnError;
687inIdentifierAfterCharacterCheck:
688 record16(c: token);
689 shift4();
690
691 while (isIdentPart(c: m_current)) {
692 record16(c: m_current);
693 shift1();
694 }
695 } while (UNLIKELY(m_current == '\\'));
696 goto doneIdentifier;
697
698inSingleLineComment:
699 while (!isLineTerminator(ch: m_current)) {
700 if (UNLIKELY(m_current == -1))
701 return 0;
702 shift1();
703 }
704 shiftLineTerminator();
705 m_atLineStart = true;
706 m_terminator = true;
707 if (lastTokenWasRestrKeyword())
708 goto doneSemicolon;
709 goto start;
710
711inMultiLineComment:
712 shift2();
713 while (m_current != '*' || m_next1 != '/') {
714 if (isLineTerminator(ch: m_current))
715 shiftLineTerminator();
716 else {
717 shift1();
718 if (UNLIKELY(m_current == -1))
719 goto returnError;
720 }
721 }
722 shift2();
723 m_atLineStart = false;
724 goto start;
725
726startNumberWithZeroDigit:
727 shift1();
728 if ((m_current | 0x20) == 'x' && isASCIIHexDigit(c: m_next1)) {
729 shift1();
730 goto inHex;
731 }
732 if (m_current == '.') {
733 record8(c: '0');
734 record8(c: '.');
735 shift1();
736 goto inNumberAfterDecimalPoint;
737 }
738 if ((m_current | 0x20) == 'e') {
739 record8(c: '0');
740 record8(c: 'e');
741 shift1();
742 goto inExponentIndicator;
743 }
744 if (isASCIIOctalDigit(c: m_current))
745 goto inOctal;
746 if (isASCIIDigit(c: m_current))
747 goto startNumber;
748 lvalp->doubleValue = 0;
749 goto doneNumeric;
750
751inNumberAfterDecimalPoint:
752 while (isASCIIDigit(c: m_current)) {
753 record8(c: m_current);
754 shift1();
755 }
756 if ((m_current | 0x20) == 'e') {
757 record8(c: 'e');
758 shift1();
759 goto inExponentIndicator;
760 }
761 goto doneNumber;
762
763inExponentIndicator:
764 if (m_current == '+' || m_current == '-') {
765 record8(c: m_current);
766 shift1();
767 }
768 if (!isASCIIDigit(c: m_current))
769 goto returnError;
770 do {
771 record8(c: m_current);
772 shift1();
773 } while (isASCIIDigit(c: m_current));
774 goto doneNumber;
775
776inOctal: {
777 do {
778 record8(c: m_current);
779 shift1();
780 } while (isASCIIOctalDigit(c: m_current));
781 if (isASCIIDigit(c: m_current))
782 goto startNumber;
783
784 double dval = 0;
785
786 const char* end = m_buffer8.end();
787 for (const char* p = m_buffer8.data(); p < end; ++p) {
788 dval *= 8;
789 dval += *p - '0';
790 }
791 if (dval >= mantissaOverflowLowerBound)
792 dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: 8);
793
794 m_buffer8.resize(size: 0);
795
796 lvalp->doubleValue = dval;
797 goto doneNumeric;
798}
799
800inHex: {
801 do {
802 record8(c: m_current);
803 shift1();
804 } while (isASCIIHexDigit(c: m_current));
805
806 double dval = 0;
807
808 const char* end = m_buffer8.end();
809 for (const char* p = m_buffer8.data(); p < end; ++p) {
810 dval *= 16;
811 dval += toASCIIHexValue(c: *p);
812 }
813 if (dval >= mantissaOverflowLowerBound)
814 dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: 16);
815
816 m_buffer8.resize(size: 0);
817
818 lvalp->doubleValue = dval;
819 goto doneNumeric;
820}
821
822startNumber:
823 record8(c: m_current);
824 shift1();
825 while (isASCIIDigit(c: m_current)) {
826 record8(c: m_current);
827 shift1();
828 }
829 if (m_current == '.') {
830 record8(c: '.');
831 shift1();
832 goto inNumberAfterDecimalPoint;
833 }
834 if ((m_current | 0x20) == 'e') {
835 record8(c: 'e');
836 shift1();
837 goto inExponentIndicator;
838 }
839
840 // Fall through into doneNumber.
841
842doneNumber:
843 // Null-terminate string for strtod.
844 m_buffer8.append(val: '\0');
845 lvalp->doubleValue = WTF::strtod(s00: m_buffer8.data(), se: 0);
846 m_buffer8.resize(size: 0);
847
848 // Fall through into doneNumeric.
849
850doneNumeric:
851 // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
852 if (UNLIKELY(isIdentStart(m_current)))
853 goto returnError;
854
855 m_atLineStart = false;
856 m_delimited = false;
857 token = NUMBER;
858 goto returnToken;
859
860doneSemicolon:
861 token = ';';
862 m_delimited = true;
863 goto returnToken;
864
865doneIdentifier:
866 m_atLineStart = false;
867 m_delimited = false;
868 lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
869 m_buffer16.resize(size: 0);
870 token = IDENT;
871 goto returnToken;
872
873doneIdentifierOrKeyword: {
874 m_atLineStart = false;
875 m_delimited = false;
876 m_buffer16.resize(size: 0);
877 const HashEntry* entry = m_keywordTable.entry(globalData: m_globalData, identifier: *lvalp->ident);
878 token = entry ? entry->lexerValue() : IDENT;
879 goto returnToken;
880}
881
882doneString:
883 // Atomize constant strings in case they're later used in property lookup.
884 shift1();
885 m_atLineStart = false;
886 m_delimited = false;
887 lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
888 m_buffer16.resize(size: 0);
889 token = STRING;
890
891 // Fall through into returnToken.
892
893returnToken: {
894 int lineNumber = m_lineNumber;
895 llocp->first_line = lineNumber;
896 llocp->last_line = lineNumber;
897 llocp->first_column = startOffset;
898 llocp->last_column = currentOffset();
899
900 m_lastToken = token;
901 return token;
902}
903
904returnError:
905 m_error = true;
906 return -1;
907}
908
909bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix)
910{
911 ASSERT(m_buffer16.isEmpty());
912
913 bool lastWasEscape = false;
914 bool inBrackets = false;
915
916 if (patternPrefix) {
917 ASSERT(!isLineTerminator(patternPrefix));
918 ASSERT(patternPrefix != '/');
919 ASSERT(patternPrefix != '[');
920 record16(c: patternPrefix);
921 }
922
923 while (true) {
924 int current = m_current;
925
926 if (isLineTerminator(ch: current) || current == -1) {
927 m_buffer16.resize(size: 0);
928 return false;
929 }
930
931 shift1();
932
933 if (current == '/' && !lastWasEscape && !inBrackets)
934 break;
935
936 record16(c: current);
937
938 if (lastWasEscape) {
939 lastWasEscape = false;
940 continue;
941 }
942
943 switch (current) {
944 case '[':
945 inBrackets = true;
946 break;
947 case ']':
948 inBrackets = false;
949 break;
950 case '\\':
951 lastWasEscape = true;
952 break;
953 }
954 }
955
956 pattern = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
957 m_buffer16.resize(size: 0);
958
959 while (isIdentPart(c: m_current)) {
960 record16(c: m_current);
961 shift1();
962 }
963
964 flags = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
965 m_buffer16.resize(size: 0);
966
967 return true;
968}
969
970bool Lexer::skipRegExp()
971{
972 bool lastWasEscape = false;
973 bool inBrackets = false;
974
975 while (true) {
976 int current = m_current;
977
978 if (isLineTerminator(ch: current) || current == -1)
979 return false;
980
981 shift1();
982
983 if (current == '/' && !lastWasEscape && !inBrackets)
984 break;
985
986 if (lastWasEscape) {
987 lastWasEscape = false;
988 continue;
989 }
990
991 switch (current) {
992 case '[':
993 inBrackets = true;
994 break;
995 case ']':
996 inBrackets = false;
997 break;
998 case '\\':
999 lastWasEscape = true;
1000 break;
1001 }
1002 }
1003
1004 while (isIdentPart(c: m_current))
1005 shift1();
1006
1007 return true;
1008}
1009
1010void Lexer::clear()
1011{
1012 m_arena = 0;
1013 m_codeWithoutBOMs.clear();
1014
1015 Vector<char> newBuffer8;
1016 newBuffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
1017 m_buffer8.swap(other&: newBuffer8);
1018
1019 Vector<UChar> newBuffer16;
1020 newBuffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
1021 m_buffer16.swap(other&: newBuffer16);
1022
1023 m_isReparsing = false;
1024}
1025
1026SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1027{
1028 if (m_codeWithoutBOMs.isEmpty())
1029 return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
1030
1031 const UChar* data = m_source->provider()->data();
1032
1033 ASSERT(openBrace < closeBrace);
1034
1035 int numBOMsBeforeOpenBrace = 0;
1036 int numBOMsBetweenBraces = 0;
1037
1038 int i;
1039 for (i = m_source->startOffset(); i < openBrace; ++i)
1040 numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1041 for (; i < closeBrace; ++i)
1042 numBOMsBetweenBraces += data[i] == byteOrderMark;
1043
1044 return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1045 closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
1046}
1047
1048} // namespace JSC
1049

source code of qtscript/src/3rdparty/javascriptcore/JavaScriptCore/parser/Lexer.cpp