1 | /* |
2 | * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) |
3 | * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. |
4 | * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Library General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Library General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Library General Public License |
17 | * along with this library; see the file COPYING.LIB. If not, write to |
18 | * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
19 | * Boston, MA 02110-1301, USA. |
20 | * |
21 | */ |
22 | |
23 | #include "config.h" |
24 | #include "Lexer.h" |
25 | |
26 | #include "JSFunction.h" |
27 | #include "JSGlobalObjectFunctions.h" |
28 | #include "NodeInfo.h" |
29 | #include "Nodes.h" |
30 | #include "dtoa.h" |
31 | #include <ctype.h> |
32 | #include <limits.h> |
33 | #include <string.h> |
34 | #include <wtf/Assertions.h> |
35 | |
36 | using namespace WTF; |
37 | using namespace Unicode; |
38 | |
39 | // We can't specify the namespace in yacc's C output, so do it here instead. |
40 | using namespace JSC; |
41 | |
42 | #include "Grammar.h" |
43 | #include "Lookup.h" |
44 | #include "Lexer.lut.h" |
45 | |
46 | namespace JSC { |
47 | |
48 | static const UChar byteOrderMark = 0xFEFF; |
49 | |
50 | Lexer::Lexer(JSGlobalData* globalData) |
51 | : m_isReparsing(false) |
52 | , m_globalData(globalData) |
53 | , m_keywordTable(JSC::mainTable) |
54 | { |
55 | m_buffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); |
56 | m_buffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); |
57 | } |
58 | |
59 | Lexer::~Lexer() |
60 | { |
61 | m_keywordTable.deleteTable(); |
62 | } |
63 | |
64 | inline const UChar* Lexer::currentCharacter() const |
65 | { |
66 | return m_code - 4; |
67 | } |
68 | |
69 | inline int Lexer::currentOffset() const |
70 | { |
71 | return currentCharacter() - m_codeStart; |
72 | } |
73 | |
74 | ALWAYS_INLINE void Lexer::shift1() |
75 | { |
76 | m_current = m_next1; |
77 | m_next1 = m_next2; |
78 | m_next2 = m_next3; |
79 | if (LIKELY(m_code < m_codeEnd)) |
80 | m_next3 = m_code[0]; |
81 | else |
82 | m_next3 = -1; |
83 | |
84 | ++m_code; |
85 | } |
86 | |
87 | ALWAYS_INLINE void Lexer::shift2() |
88 | { |
89 | m_current = m_next2; |
90 | m_next1 = m_next3; |
91 | if (LIKELY(m_code + 1 < m_codeEnd)) { |
92 | m_next2 = m_code[0]; |
93 | m_next3 = m_code[1]; |
94 | } else { |
95 | m_next2 = m_code < m_codeEnd ? m_code[0] : -1; |
96 | m_next3 = -1; |
97 | } |
98 | |
99 | m_code += 2; |
100 | } |
101 | |
102 | ALWAYS_INLINE void Lexer::shift3() |
103 | { |
104 | m_current = m_next3; |
105 | if (LIKELY(m_code + 2 < m_codeEnd)) { |
106 | m_next1 = m_code[0]; |
107 | m_next2 = m_code[1]; |
108 | m_next3 = m_code[2]; |
109 | } else { |
110 | m_next1 = m_code < m_codeEnd ? m_code[0] : -1; |
111 | m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1; |
112 | m_next3 = -1; |
113 | } |
114 | |
115 | m_code += 3; |
116 | } |
117 | |
118 | ALWAYS_INLINE void Lexer::shift4() |
119 | { |
120 | if (LIKELY(m_code + 3 < m_codeEnd)) { |
121 | m_current = m_code[0]; |
122 | m_next1 = m_code[1]; |
123 | m_next2 = m_code[2]; |
124 | m_next3 = m_code[3]; |
125 | } else { |
126 | m_current = m_code < m_codeEnd ? m_code[0] : -1; |
127 | m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1; |
128 | m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1; |
129 | m_next3 = -1; |
130 | } |
131 | |
132 | m_code += 4; |
133 | } |
134 | |
135 | void Lexer::setCode(const SourceCode& source, ParserArena& arena) |
136 | { |
137 | m_arena = &arena.identifierArena(); |
138 | |
139 | m_lineNumber = source.firstLine(); |
140 | m_delimited = false; |
141 | m_lastToken = -1; |
142 | |
143 | const UChar* data = source.provider()->data(); |
144 | |
145 | m_source = &source; |
146 | m_codeStart = data; |
147 | m_code = data + source.startOffset(); |
148 | m_codeEnd = data + source.endOffset(); |
149 | m_error = false; |
150 | m_atLineStart = true; |
151 | |
152 | // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters. |
153 | // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details. |
154 | if (source.provider()->hasBOMs()) { |
155 | for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) { |
156 | if (UNLIKELY(*p == byteOrderMark)) { |
157 | copyCodeWithoutBOMs(); |
158 | break; |
159 | } |
160 | } |
161 | } |
162 | |
163 | // Read the first characters into the 4-character buffer. |
164 | shift4(); |
165 | ASSERT(currentOffset() == source.startOffset()); |
166 | } |
167 | |
168 | void Lexer::copyCodeWithoutBOMs() |
169 | { |
170 | // Note: In this case, the character offset data for debugging will be incorrect. |
171 | // If it's important to correctly debug code with extraneous BOMs, then the caller |
172 | // should strip the BOMs when creating the SourceProvider object and do its own |
173 | // mapping of offsets within the stripped text to original text offset. |
174 | |
175 | m_codeWithoutBOMs.reserveCapacity(newCapacity: m_codeEnd - m_code); |
176 | for (const UChar* p = m_code; p < m_codeEnd; ++p) { |
177 | UChar c = *p; |
178 | if (c != byteOrderMark) |
179 | m_codeWithoutBOMs.append(val: c); |
180 | } |
181 | ptrdiff_t startDelta = m_codeStart - m_code; |
182 | m_code = m_codeWithoutBOMs.data(); |
183 | m_codeStart = m_code + startDelta; |
184 | m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size(); |
185 | } |
186 | |
187 | void Lexer::shiftLineTerminator() |
188 | { |
189 | ASSERT(isLineTerminator(m_current)); |
190 | |
191 | // Allow both CRLF and LFCR. |
192 | if (m_current + m_next1 == '\n' + '\r') |
193 | shift2(); |
194 | else |
195 | shift1(); |
196 | |
197 | ++m_lineNumber; |
198 | } |
199 | |
200 | ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length) |
201 | { |
202 | return &m_arena->makeIdentifier(globalData: m_globalData, characters, length); |
203 | } |
204 | |
205 | inline bool Lexer::lastTokenWasRestrKeyword() const |
206 | { |
207 | return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; |
208 | } |
209 | |
210 | static NEVER_INLINE bool isNonASCIIIdentStart(int c) |
211 | { |
212 | return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); |
213 | } |
214 | |
215 | static inline bool isIdentStart(int c) |
216 | { |
217 | return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); |
218 | } |
219 | |
220 | static NEVER_INLINE bool isNonASCIIIdentPart(int c) |
221 | { |
222 | return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other |
223 | | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); |
224 | } |
225 | |
226 | static inline bool isIdentPart(int c) |
227 | { |
228 | return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); |
229 | } |
230 | |
231 | static inline int singleEscape(int c) |
232 | { |
233 | switch (c) { |
234 | case 'b': |
235 | return 0x08; |
236 | case 't': |
237 | return 0x09; |
238 | case 'n': |
239 | return 0x0A; |
240 | case 'v': |
241 | return 0x0B; |
242 | case 'f': |
243 | return 0x0C; |
244 | case 'r': |
245 | return 0x0D; |
246 | default: |
247 | return c; |
248 | } |
249 | } |
250 | |
251 | inline void Lexer::record8(int c) |
252 | { |
253 | ASSERT(c >= 0); |
254 | ASSERT(c <= 0xFF); |
255 | m_buffer8.append(val: static_cast<char>(c)); |
256 | } |
257 | |
258 | inline void Lexer::record16(UChar c) |
259 | { |
260 | m_buffer16.append(val: c); |
261 | } |
262 | |
263 | inline void Lexer::record16(int c) |
264 | { |
265 | ASSERT(c >= 0); |
266 | ASSERT(c <= USHRT_MAX); |
267 | record16(c: UChar(static_cast<unsigned short>(c))); |
268 | } |
269 | |
270 | int Lexer::lex(void* p1, void* p2) |
271 | { |
272 | ASSERT(!m_error); |
273 | ASSERT(m_buffer8.isEmpty()); |
274 | ASSERT(m_buffer16.isEmpty()); |
275 | |
276 | YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); |
277 | YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); |
278 | int token = 0; |
279 | m_terminator = false; |
280 | |
281 | start: |
282 | while (isWhiteSpace(ch: m_current)) |
283 | shift1(); |
284 | |
285 | int startOffset = currentOffset(); |
286 | |
287 | if (m_current == -1) { |
288 | #ifndef QT_BUILD_SCRIPT_LIB /* the parser takes cate about automatic semicolon. |
289 | this might add incorrect semicolons */ |
290 | //m_delimited and m_isReparsing are now useless |
291 | if (!m_terminator && !m_delimited && !m_isReparsing) { |
292 | // automatic semicolon insertion if program incomplete |
293 | token = ';'; |
294 | goto doneSemicolon; |
295 | } |
296 | #endif |
297 | return 0; |
298 | } |
299 | |
300 | m_delimited = false; |
301 | switch (m_current) { |
302 | case '>': |
303 | if (m_next1 == '>' && m_next2 == '>') { |
304 | if (m_next3 == '=') { |
305 | shift4(); |
306 | token = URSHIFTEQUAL; |
307 | break; |
308 | } |
309 | shift3(); |
310 | token = URSHIFT; |
311 | break; |
312 | } |
313 | if (m_next1 == '>') { |
314 | if (m_next2 == '=') { |
315 | shift3(); |
316 | token = RSHIFTEQUAL; |
317 | break; |
318 | } |
319 | shift2(); |
320 | token = RSHIFT; |
321 | break; |
322 | } |
323 | if (m_next1 == '=') { |
324 | shift2(); |
325 | token = GE; |
326 | break; |
327 | } |
328 | shift1(); |
329 | token = '>'; |
330 | break; |
331 | case '=': |
332 | if (m_next1 == '=') { |
333 | if (m_next2 == '=') { |
334 | shift3(); |
335 | token = STREQ; |
336 | break; |
337 | } |
338 | shift2(); |
339 | token = EQEQ; |
340 | break; |
341 | } |
342 | shift1(); |
343 | token = '='; |
344 | break; |
345 | case '!': |
346 | if (m_next1 == '=') { |
347 | if (m_next2 == '=') { |
348 | shift3(); |
349 | token = STRNEQ; |
350 | break; |
351 | } |
352 | shift2(); |
353 | token = NE; |
354 | break; |
355 | } |
356 | shift1(); |
357 | token = '!'; |
358 | break; |
359 | case '<': |
360 | if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { |
361 | // <!-- marks the beginning of a line comment (for www usage) |
362 | shift4(); |
363 | goto inSingleLineComment; |
364 | } |
365 | if (m_next1 == '<') { |
366 | if (m_next2 == '=') { |
367 | shift3(); |
368 | token = LSHIFTEQUAL; |
369 | break; |
370 | } |
371 | shift2(); |
372 | token = LSHIFT; |
373 | break; |
374 | } |
375 | if (m_next1 == '=') { |
376 | shift2(); |
377 | token = LE; |
378 | break; |
379 | } |
380 | shift1(); |
381 | token = '<'; |
382 | break; |
383 | case '+': |
384 | if (m_next1 == '+') { |
385 | shift2(); |
386 | if (m_terminator) { |
387 | token = AUTOPLUSPLUS; |
388 | break; |
389 | } |
390 | token = PLUSPLUS; |
391 | break; |
392 | } |
393 | if (m_next1 == '=') { |
394 | shift2(); |
395 | token = PLUSEQUAL; |
396 | break; |
397 | } |
398 | shift1(); |
399 | token = '+'; |
400 | break; |
401 | case '-': |
402 | if (m_next1 == '-') { |
403 | if (m_atLineStart && m_next2 == '>') { |
404 | shift3(); |
405 | goto inSingleLineComment; |
406 | } |
407 | shift2(); |
408 | if (m_terminator) { |
409 | token = AUTOMINUSMINUS; |
410 | break; |
411 | } |
412 | token = MINUSMINUS; |
413 | break; |
414 | } |
415 | if (m_next1 == '=') { |
416 | shift2(); |
417 | token = MINUSEQUAL; |
418 | break; |
419 | } |
420 | shift1(); |
421 | token = '-'; |
422 | break; |
423 | case '*': |
424 | if (m_next1 == '=') { |
425 | shift2(); |
426 | token = MULTEQUAL; |
427 | break; |
428 | } |
429 | shift1(); |
430 | token = '*'; |
431 | break; |
432 | case '/': |
433 | if (m_next1 == '/') { |
434 | shift2(); |
435 | goto inSingleLineComment; |
436 | } |
437 | if (m_next1 == '*') |
438 | goto inMultiLineComment; |
439 | if (m_next1 == '=') { |
440 | shift2(); |
441 | token = DIVEQUAL; |
442 | break; |
443 | } |
444 | shift1(); |
445 | token = '/'; |
446 | break; |
447 | case '&': |
448 | if (m_next1 == '&') { |
449 | shift2(); |
450 | token = AND; |
451 | break; |
452 | } |
453 | if (m_next1 == '=') { |
454 | shift2(); |
455 | token = ANDEQUAL; |
456 | break; |
457 | } |
458 | shift1(); |
459 | token = '&'; |
460 | break; |
461 | case '^': |
462 | if (m_next1 == '=') { |
463 | shift2(); |
464 | token = XOREQUAL; |
465 | break; |
466 | } |
467 | shift1(); |
468 | token = '^'; |
469 | break; |
470 | case '%': |
471 | if (m_next1 == '=') { |
472 | shift2(); |
473 | token = MODEQUAL; |
474 | break; |
475 | } |
476 | shift1(); |
477 | token = '%'; |
478 | break; |
479 | case '|': |
480 | if (m_next1 == '=') { |
481 | shift2(); |
482 | token = OREQUAL; |
483 | break; |
484 | } |
485 | if (m_next1 == '|') { |
486 | shift2(); |
487 | token = OR; |
488 | break; |
489 | } |
490 | shift1(); |
491 | token = '|'; |
492 | break; |
493 | case '.': |
494 | if (isASCIIDigit(c: m_next1)) { |
495 | record8(c: '.'); |
496 | shift1(); |
497 | goto inNumberAfterDecimalPoint; |
498 | } |
499 | token = '.'; |
500 | shift1(); |
501 | break; |
502 | case ',': |
503 | case '~': |
504 | case '?': |
505 | case ':': |
506 | case '(': |
507 | case ')': |
508 | case '[': |
509 | case ']': |
510 | token = m_current; |
511 | shift1(); |
512 | break; |
513 | case ';': |
514 | shift1(); |
515 | m_delimited = true; |
516 | token = ';'; |
517 | break; |
518 | case '{': |
519 | lvalp->intValue = currentOffset(); |
520 | shift1(); |
521 | token = OPENBRACE; |
522 | break; |
523 | case '}': |
524 | lvalp->intValue = currentOffset(); |
525 | shift1(); |
526 | m_delimited = true; |
527 | token = CLOSEBRACE; |
528 | break; |
529 | case '\\': |
530 | goto startIdentifierWithBackslash; |
531 | case '0': |
532 | goto startNumberWithZeroDigit; |
533 | case '1': |
534 | case '2': |
535 | case '3': |
536 | case '4': |
537 | case '5': |
538 | case '6': |
539 | case '7': |
540 | case '8': |
541 | case '9': |
542 | goto startNumber; |
543 | case '"': |
544 | case '\'': |
545 | goto startString; |
546 | default: |
547 | if (isIdentStart(c: m_current)) |
548 | goto startIdentifierOrKeyword; |
549 | if (isLineTerminator(ch: m_current)) { |
550 | shiftLineTerminator(); |
551 | m_atLineStart = true; |
552 | m_terminator = true; |
553 | if (lastTokenWasRestrKeyword()) { |
554 | token = ';'; |
555 | goto doneSemicolon; |
556 | } |
557 | goto start; |
558 | } |
559 | goto returnError; |
560 | } |
561 | |
562 | m_atLineStart = false; |
563 | goto returnToken; |
564 | |
565 | startString: { |
566 | int stringQuoteCharacter = m_current; |
567 | shift1(); |
568 | |
569 | const UChar* stringStart = currentCharacter(); |
570 | while (m_current != stringQuoteCharacter) { |
571 | // Fast check for characters that require special handling. |
572 | // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently |
573 | // as possible, and lets through all common ASCII characters. |
574 | if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { |
575 | m_buffer16.append(data: stringStart, dataSize: currentCharacter() - stringStart); |
576 | goto inString; |
577 | } |
578 | shift1(); |
579 | } |
580 | lvalp->ident = makeIdentifier(characters: stringStart, length: currentCharacter() - stringStart); |
581 | shift1(); |
582 | m_atLineStart = false; |
583 | m_delimited = false; |
584 | token = STRING; |
585 | goto returnToken; |
586 | |
587 | inString: |
588 | while (m_current != stringQuoteCharacter) { |
589 | if (m_current == '\\') |
590 | goto inStringEscapeSequence; |
591 | if (UNLIKELY(isLineTerminator(m_current))) |
592 | goto returnError; |
593 | if (UNLIKELY(m_current == -1)) |
594 | goto returnError; |
595 | record16(c: m_current); |
596 | shift1(); |
597 | } |
598 | goto doneString; |
599 | |
600 | inStringEscapeSequence: |
601 | shift1(); |
602 | if (m_current == 'x') { |
603 | shift1(); |
604 | if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1)) { |
605 | record16(c: convertHex(c1: m_current, c2: m_next1)); |
606 | shift2(); |
607 | goto inString; |
608 | } |
609 | record16(c: 'x'); |
610 | if (m_current == stringQuoteCharacter) |
611 | goto doneString; |
612 | goto inString; |
613 | } |
614 | if (m_current == 'u') { |
615 | shift1(); |
616 | if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1) && isASCIIHexDigit(c: m_next2) && isASCIIHexDigit(c: m_next3)) { |
617 | record16(c: convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3)); |
618 | shift4(); |
619 | goto inString; |
620 | } |
621 | if (m_current == stringQuoteCharacter) { |
622 | record16(c: 'u'); |
623 | goto doneString; |
624 | } |
625 | goto returnError; |
626 | } |
627 | if (isASCIIOctalDigit(c: m_current)) { |
628 | if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(c: m_next1) && isASCIIOctalDigit(c: m_next2)) { |
629 | record16(c: (m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); |
630 | shift3(); |
631 | goto inString; |
632 | } |
633 | if (isASCIIOctalDigit(c: m_next1)) { |
634 | record16(c: (m_current - '0') * 8 + m_next1 - '0'); |
635 | shift2(); |
636 | goto inString; |
637 | } |
638 | record16(c: m_current - '0'); |
639 | shift1(); |
640 | goto inString; |
641 | } |
642 | if (isLineTerminator(ch: m_current)) { |
643 | shiftLineTerminator(); |
644 | goto inString; |
645 | } |
646 | if (m_current == -1) |
647 | goto returnError; |
648 | record16(c: singleEscape(c: m_current)); |
649 | shift1(); |
650 | goto inString; |
651 | } |
652 | |
653 | startIdentifierWithBackslash: |
654 | shift1(); |
655 | if (UNLIKELY(m_current != 'u')) |
656 | goto returnError; |
657 | shift1(); |
658 | if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) |
659 | goto returnError; |
660 | token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3); |
661 | if (UNLIKELY(!isIdentStart(token))) |
662 | goto returnError; |
663 | goto inIdentifierAfterCharacterCheck; |
664 | |
665 | startIdentifierOrKeyword: { |
666 | const UChar* identifierStart = currentCharacter(); |
667 | shift1(); |
668 | while (isIdentPart(c: m_current)) |
669 | shift1(); |
670 | if (LIKELY(m_current != '\\')) { |
671 | lvalp->ident = makeIdentifier(characters: identifierStart, length: currentCharacter() - identifierStart); |
672 | goto doneIdentifierOrKeyword; |
673 | } |
674 | m_buffer16.append(data: identifierStart, dataSize: currentCharacter() - identifierStart); |
675 | } |
676 | |
677 | do { |
678 | shift1(); |
679 | if (UNLIKELY(m_current != 'u')) |
680 | goto returnError; |
681 | shift1(); |
682 | if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) |
683 | goto returnError; |
684 | token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3); |
685 | if (UNLIKELY(!isIdentPart(token))) |
686 | goto returnError; |
687 | inIdentifierAfterCharacterCheck: |
688 | record16(c: token); |
689 | shift4(); |
690 | |
691 | while (isIdentPart(c: m_current)) { |
692 | record16(c: m_current); |
693 | shift1(); |
694 | } |
695 | } while (UNLIKELY(m_current == '\\')); |
696 | goto doneIdentifier; |
697 | |
698 | : |
699 | while (!isLineTerminator(ch: m_current)) { |
700 | if (UNLIKELY(m_current == -1)) |
701 | return 0; |
702 | shift1(); |
703 | } |
704 | shiftLineTerminator(); |
705 | m_atLineStart = true; |
706 | m_terminator = true; |
707 | if (lastTokenWasRestrKeyword()) |
708 | goto doneSemicolon; |
709 | goto start; |
710 | |
711 | : |
712 | shift2(); |
713 | while (m_current != '*' || m_next1 != '/') { |
714 | if (isLineTerminator(ch: m_current)) |
715 | shiftLineTerminator(); |
716 | else { |
717 | shift1(); |
718 | if (UNLIKELY(m_current == -1)) |
719 | goto returnError; |
720 | } |
721 | } |
722 | shift2(); |
723 | m_atLineStart = false; |
724 | goto start; |
725 | |
726 | startNumberWithZeroDigit: |
727 | shift1(); |
728 | if ((m_current | 0x20) == 'x' && isASCIIHexDigit(c: m_next1)) { |
729 | shift1(); |
730 | goto inHex; |
731 | } |
732 | if (m_current == '.') { |
733 | record8(c: '0'); |
734 | record8(c: '.'); |
735 | shift1(); |
736 | goto inNumberAfterDecimalPoint; |
737 | } |
738 | if ((m_current | 0x20) == 'e') { |
739 | record8(c: '0'); |
740 | record8(c: 'e'); |
741 | shift1(); |
742 | goto inExponentIndicator; |
743 | } |
744 | if (isASCIIOctalDigit(c: m_current)) |
745 | goto inOctal; |
746 | if (isASCIIDigit(c: m_current)) |
747 | goto startNumber; |
748 | lvalp->doubleValue = 0; |
749 | goto doneNumeric; |
750 | |
751 | inNumberAfterDecimalPoint: |
752 | while (isASCIIDigit(c: m_current)) { |
753 | record8(c: m_current); |
754 | shift1(); |
755 | } |
756 | if ((m_current | 0x20) == 'e') { |
757 | record8(c: 'e'); |
758 | shift1(); |
759 | goto inExponentIndicator; |
760 | } |
761 | goto doneNumber; |
762 | |
763 | inExponentIndicator: |
764 | if (m_current == '+' || m_current == '-') { |
765 | record8(c: m_current); |
766 | shift1(); |
767 | } |
768 | if (!isASCIIDigit(c: m_current)) |
769 | goto returnError; |
770 | do { |
771 | record8(c: m_current); |
772 | shift1(); |
773 | } while (isASCIIDigit(c: m_current)); |
774 | goto doneNumber; |
775 | |
776 | inOctal: { |
777 | do { |
778 | record8(c: m_current); |
779 | shift1(); |
780 | } while (isASCIIOctalDigit(c: m_current)); |
781 | if (isASCIIDigit(c: m_current)) |
782 | goto startNumber; |
783 | |
784 | double dval = 0; |
785 | |
786 | const char* end = m_buffer8.end(); |
787 | for (const char* p = m_buffer8.data(); p < end; ++p) { |
788 | dval *= 8; |
789 | dval += *p - '0'; |
790 | } |
791 | if (dval >= mantissaOverflowLowerBound) |
792 | dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: 8); |
793 | |
794 | m_buffer8.resize(size: 0); |
795 | |
796 | lvalp->doubleValue = dval; |
797 | goto doneNumeric; |
798 | } |
799 | |
800 | inHex: { |
801 | do { |
802 | record8(c: m_current); |
803 | shift1(); |
804 | } while (isASCIIHexDigit(c: m_current)); |
805 | |
806 | double dval = 0; |
807 | |
808 | const char* end = m_buffer8.end(); |
809 | for (const char* p = m_buffer8.data(); p < end; ++p) { |
810 | dval *= 16; |
811 | dval += toASCIIHexValue(c: *p); |
812 | } |
813 | if (dval >= mantissaOverflowLowerBound) |
814 | dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: 16); |
815 | |
816 | m_buffer8.resize(size: 0); |
817 | |
818 | lvalp->doubleValue = dval; |
819 | goto doneNumeric; |
820 | } |
821 | |
822 | startNumber: |
823 | record8(c: m_current); |
824 | shift1(); |
825 | while (isASCIIDigit(c: m_current)) { |
826 | record8(c: m_current); |
827 | shift1(); |
828 | } |
829 | if (m_current == '.') { |
830 | record8(c: '.'); |
831 | shift1(); |
832 | goto inNumberAfterDecimalPoint; |
833 | } |
834 | if ((m_current | 0x20) == 'e') { |
835 | record8(c: 'e'); |
836 | shift1(); |
837 | goto inExponentIndicator; |
838 | } |
839 | |
840 | // Fall through into doneNumber. |
841 | |
842 | doneNumber: |
843 | // Null-terminate string for strtod. |
844 | m_buffer8.append(val: '\0'); |
845 | lvalp->doubleValue = WTF::strtod(s00: m_buffer8.data(), se: 0); |
846 | m_buffer8.resize(size: 0); |
847 | |
848 | // Fall through into doneNumeric. |
849 | |
850 | doneNumeric: |
851 | // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. |
852 | if (UNLIKELY(isIdentStart(m_current))) |
853 | goto returnError; |
854 | |
855 | m_atLineStart = false; |
856 | m_delimited = false; |
857 | token = NUMBER; |
858 | goto returnToken; |
859 | |
860 | doneSemicolon: |
861 | token = ';'; |
862 | m_delimited = true; |
863 | goto returnToken; |
864 | |
865 | doneIdentifier: |
866 | m_atLineStart = false; |
867 | m_delimited = false; |
868 | lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); |
869 | m_buffer16.resize(size: 0); |
870 | token = IDENT; |
871 | goto returnToken; |
872 | |
873 | doneIdentifierOrKeyword: { |
874 | m_atLineStart = false; |
875 | m_delimited = false; |
876 | m_buffer16.resize(size: 0); |
877 | const HashEntry* entry = m_keywordTable.entry(globalData: m_globalData, identifier: *lvalp->ident); |
878 | token = entry ? entry->lexerValue() : IDENT; |
879 | goto returnToken; |
880 | } |
881 | |
882 | doneString: |
883 | // Atomize constant strings in case they're later used in property lookup. |
884 | shift1(); |
885 | m_atLineStart = false; |
886 | m_delimited = false; |
887 | lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); |
888 | m_buffer16.resize(size: 0); |
889 | token = STRING; |
890 | |
891 | // Fall through into returnToken. |
892 | |
893 | returnToken: { |
894 | int lineNumber = m_lineNumber; |
895 | llocp->first_line = lineNumber; |
896 | llocp->last_line = lineNumber; |
897 | llocp->first_column = startOffset; |
898 | llocp->last_column = currentOffset(); |
899 | |
900 | m_lastToken = token; |
901 | return token; |
902 | } |
903 | |
904 | returnError: |
905 | m_error = true; |
906 | return -1; |
907 | } |
908 | |
909 | bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) |
910 | { |
911 | ASSERT(m_buffer16.isEmpty()); |
912 | |
913 | bool lastWasEscape = false; |
914 | bool inBrackets = false; |
915 | |
916 | if (patternPrefix) { |
917 | ASSERT(!isLineTerminator(patternPrefix)); |
918 | ASSERT(patternPrefix != '/'); |
919 | ASSERT(patternPrefix != '['); |
920 | record16(c: patternPrefix); |
921 | } |
922 | |
923 | while (true) { |
924 | int current = m_current; |
925 | |
926 | if (isLineTerminator(ch: current) || current == -1) { |
927 | m_buffer16.resize(size: 0); |
928 | return false; |
929 | } |
930 | |
931 | shift1(); |
932 | |
933 | if (current == '/' && !lastWasEscape && !inBrackets) |
934 | break; |
935 | |
936 | record16(c: current); |
937 | |
938 | if (lastWasEscape) { |
939 | lastWasEscape = false; |
940 | continue; |
941 | } |
942 | |
943 | switch (current) { |
944 | case '[': |
945 | inBrackets = true; |
946 | break; |
947 | case ']': |
948 | inBrackets = false; |
949 | break; |
950 | case '\\': |
951 | lastWasEscape = true; |
952 | break; |
953 | } |
954 | } |
955 | |
956 | pattern = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); |
957 | m_buffer16.resize(size: 0); |
958 | |
959 | while (isIdentPart(c: m_current)) { |
960 | record16(c: m_current); |
961 | shift1(); |
962 | } |
963 | |
964 | flags = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size()); |
965 | m_buffer16.resize(size: 0); |
966 | |
967 | return true; |
968 | } |
969 | |
970 | bool Lexer::skipRegExp() |
971 | { |
972 | bool lastWasEscape = false; |
973 | bool inBrackets = false; |
974 | |
975 | while (true) { |
976 | int current = m_current; |
977 | |
978 | if (isLineTerminator(ch: current) || current == -1) |
979 | return false; |
980 | |
981 | shift1(); |
982 | |
983 | if (current == '/' && !lastWasEscape && !inBrackets) |
984 | break; |
985 | |
986 | if (lastWasEscape) { |
987 | lastWasEscape = false; |
988 | continue; |
989 | } |
990 | |
991 | switch (current) { |
992 | case '[': |
993 | inBrackets = true; |
994 | break; |
995 | case ']': |
996 | inBrackets = false; |
997 | break; |
998 | case '\\': |
999 | lastWasEscape = true; |
1000 | break; |
1001 | } |
1002 | } |
1003 | |
1004 | while (isIdentPart(c: m_current)) |
1005 | shift1(); |
1006 | |
1007 | return true; |
1008 | } |
1009 | |
1010 | void Lexer::clear() |
1011 | { |
1012 | m_arena = 0; |
1013 | m_codeWithoutBOMs.clear(); |
1014 | |
1015 | Vector<char> newBuffer8; |
1016 | newBuffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); |
1017 | m_buffer8.swap(other&: newBuffer8); |
1018 | |
1019 | Vector<UChar> newBuffer16; |
1020 | newBuffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity); |
1021 | m_buffer16.swap(other&: newBuffer16); |
1022 | |
1023 | m_isReparsing = false; |
1024 | } |
1025 | |
1026 | SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) |
1027 | { |
1028 | if (m_codeWithoutBOMs.isEmpty()) |
1029 | return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); |
1030 | |
1031 | const UChar* data = m_source->provider()->data(); |
1032 | |
1033 | ASSERT(openBrace < closeBrace); |
1034 | |
1035 | int numBOMsBeforeOpenBrace = 0; |
1036 | int numBOMsBetweenBraces = 0; |
1037 | |
1038 | int i; |
1039 | for (i = m_source->startOffset(); i < openBrace; ++i) |
1040 | numBOMsBeforeOpenBrace += data[i] == byteOrderMark; |
1041 | for (; i < closeBrace; ++i) |
1042 | numBOMsBetweenBraces += data[i] == byteOrderMark; |
1043 | |
1044 | return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace, |
1045 | closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine); |
1046 | } |
1047 | |
1048 | } // namespace JSC |
1049 | |