Lexer.cpp source code [qtscript/src/3rdparty/javascriptcore/JavaScriptCore/parser/Lexer.cpp]

1	/*
2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3	* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved.
4	* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Library General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Library General Public License for more details.
15	*
16	* You should have received a copy of the GNU Library General Public License
17	* along with this library; see the file COPYING.LIB. If not, write to
18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19	* Boston, MA 02110-1301, USA.
20	*
21	*/
22
23	#include "config.h"
24	#include "Lexer.h"
25
26	#include "JSFunction.h"
27	#include "JSGlobalObjectFunctions.h"
28	#include "NodeInfo.h"
29	#include "Nodes.h"
30	#include "dtoa.h"
31	#include <ctype.h>
32	#include <limits.h>
33	#include <string.h>
34	#include <wtf/Assertions.h>
35
36	using namespace WTF;
37	using namespace Unicode;
38
39	// We can't specify the namespace in yacc's C output, so do it here instead.
40	using namespace JSC;
41
42	#include "Grammar.h"
43	#include "Lookup.h"
44	#include "Lexer.lut.h"
45
46	namespace JSC {
47
48	static const UChar byteOrderMark = `0xFEFF`;
49
50	Lexer::Lexer(JSGlobalData* globalData)
51	: m_isReparsing(false)
52	, m_globalData(globalData)
53	, m_keywordTable (JSC::mainTable)
54	{
55	m_buffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
56	m_buffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
57	}
58
59	Lexer::~Lexer()
60	{
61	m_keywordTable.deleteTable();
62	}
63
64	inline const UChar* Lexer::currentCharacter() const
65	{
66	return m_code - `4`;
67	}
68
69	inline int Lexer::currentOffset() const
70	{
71	return currentCharacter() - m_codeStart;
72	}
73
74	ALWAYS_INLINE void Lexer::shift1()
75	{
76	m_current = m_next1;
77	m_next1 = m_next2;
78	m_next2 = m_next3;
79	if (LIKELY(m_code < m_codeEnd))
80	m_next3 = m_code[`0`];
81	else
82	m_next3 = -`1`;
83
84	++m_code;
85	}
86
87	ALWAYS_INLINE void Lexer::shift2()
88	{
89	m_current = m_next2;
90	m_next1 = m_next3;
91	if (LIKELY(m_code + `1` < m_codeEnd)) {
92	m_next2 = m_code[`0`];
93	m_next3 = m_code[`1`];
94	} else {
95	m_next2 = m_code < m_codeEnd ? m_code[`0`] : -`1`;
96	m_next3 = -`1`;
97	}
98
99	m_code += `2`;
100	}
101
102	ALWAYS_INLINE void Lexer::shift3()
103	{
104	m_current = m_next3;
105	if (LIKELY(m_code + `2` < m_codeEnd)) {
106	m_next1 = m_code[`0`];
107	m_next2 = m_code[`1`];
108	m_next3 = m_code[`2`];
109	} else {
110	m_next1 = m_code < m_codeEnd ? m_code[`0`] : -`1`;
111	m_next2 = m_code + `1` < m_codeEnd ? m_code[`1`] : -`1`;
112	m_next3 = -`1`;
113	}
114
115	m_code += `3`;
116	}
117
118	ALWAYS_INLINE void Lexer::shift4()
119	{
120	if (LIKELY(m_code + `3` < m_codeEnd)) {
121	m_current = m_code[`0`];
122	m_next1 = m_code[`1`];
123	m_next2 = m_code[`2`];
124	m_next3 = m_code[`3`];
125	} else {
126	m_current = m_code < m_codeEnd ? m_code[`0`] : -`1`;
127	m_next1 = m_code + `1` < m_codeEnd ? m_code[`1`] : -`1`;
128	m_next2 = m_code + `2` < m_codeEnd ? m_code[`2`] : -`1`;
129	m_next3 = -`1`;
130	}
131
132	m_code += `4`;
133	}
134
135	void Lexer::setCode(const SourceCode& source, ParserArena& arena)
136	{
137	m_arena = &arena.identifierArena();
138
139	m_lineNumber = source.firstLine();
140	m_delimited = false;
141	m_lastToken = -`1`;
142
143	const UChar* data = source.provider()->data();
144
145	m_source = &source;
146	m_codeStart = data;
147	m_code = data + source.startOffset();
148	m_codeEnd = data + source.endOffset();
149	m_error = false;
150	m_atLineStart = true;
151
152	// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
153	// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
154	if (source.provider()->hasBOMs()) {
155	for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
156	if (UNLIKELY(*p == byteOrderMark)) {
157	copyCodeWithoutBOMs();
158	break;
159	}
160	}
161	}
162
163	// Read the first characters into the 4-character buffer.
164	shift4();
165	ASSERT(currentOffset() == source.startOffset());
166	}
167
168	void Lexer::copyCodeWithoutBOMs()
169	{
170	// Note: In this case, the character offset data for debugging will be incorrect.
171	// If it's important to correctly debug code with extraneous BOMs, then the caller
172	// should strip the BOMs when creating the SourceProvider object and do its own
173	// mapping of offsets within the stripped text to original text offset.
174
175	m_codeWithoutBOMs.reserveCapacity(newCapacity: m_codeEnd - m_code);
176	for (const UChar* p = m_code; p < m_codeEnd; ++p) {
177	UChar c = *p;
178	if (c != byteOrderMark)
179	m_codeWithoutBOMs.append(val: c);
180	}
181	ptrdiff_t startDelta = m_codeStart - m_code;
182	m_code = m_codeWithoutBOMs.data();
183	m_codeStart = m_code + startDelta;
184	m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
185	}
186
187	void Lexer::shiftLineTerminator()
188	{
189	ASSERT(isLineTerminator(m_current));
190
191	// Allow both CRLF and LFCR.
192	if (m_current + m_next1 == `'\n'` + `'\r'`)
193	shift2();
194	else
195	shift1();
196
197	++m_lineNumber;
198	}
199
200	ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
201	{
202	return &m_arena->makeIdentifier(globalData: m_globalData, characters, length);
203	}
204
205	inline bool Lexer::lastTokenWasRestrKeyword() const
206	{
207	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
208	}
209
210	static NEVER_INLINE bool isNonASCIIIdentStart(int c)
211	{
212	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
213	}
214
215	static inline bool isIdentStart(int c)
216	{
217	return isASCII(c) ? isASCIIAlpha(c) \|\| c == `'$'` \|\| c == `'_'` : isNonASCIIIdentStart(c);
218	}
219
220	static NEVER_INLINE bool isNonASCIIIdentPart(int c)
221	{
222	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
223	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector);
224	}
225
226	static inline bool isIdentPart(int c)
227	{
228	return isASCII(c) ? isASCIIAlphanumeric(c) \|\| c == `'$'` \|\| c == `'_'` : isNonASCIIIdentPart(c);
229	}
230
231	static inline int singleEscape(int c)
232	{
233	switch (c) {
234	case `'b'`:
235	return `0x08`;
236	case `'t'`:
237	return `0x09`;
238	case `'n'`:
239	return `0x0A`;
240	case `'v'`:
241	return `0x0B`;
242	case `'f'`:
243	return `0x0C`;
244	case `'r'`:
245	return `0x0D`;
246	default:
247	return c;
248	}
249	}
250
251	inline void Lexer::record8(int c)
252	{
253	ASSERT(c >= `0`);
254	ASSERT(c <= `0xFF`);
255	m_buffer8.append(val: static_cast<char>(c));
256	}
257
258	inline void Lexer::record16(UChar c)
259	{
260	m_buffer16.append(val: c);
261	}
262
263	inline void Lexer::record16(int c)
264	{
265	ASSERT(c >= `0`);
266	ASSERT(c <= USHRT_MAX);
267	record16(c: UChar(static_cast<unsigned short>(c)));
268	}
269
270	int Lexer::lex(void* p1, void* p2)
271	{
272	ASSERT(!m_error);
273	ASSERT(m_buffer8.isEmpty());
274	ASSERT(m_buffer16.isEmpty());
275
276	YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
277	YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
278	int token = `0`;
279	m_terminator = false;
280
281	start:
282	while (isWhiteSpace(ch: m_current))
283	shift1();
284
285	int startOffset = currentOffset();
286
287	if (m_current == -`1`) {
288	#ifndef QT_BUILD_SCRIPT_LIB /* the parser takes cate about automatic semicolon.
289	this might add incorrect semicolons */
290	//m_delimited and m_isReparsing are now useless
291	if (!m_terminator && !m_delimited && !m_isReparsing) {
292	// automatic semicolon insertion if program incomplete
293	token = `';'`;
294	goto doneSemicolon;
295	}
296	#endif
297	return `0`;
298	}
299
300	m_delimited = false;
301	switch (m_current) {
302	case `'>'`:
303	if (m_next1 == `'>'` && m_next2 == `'>'`) {
304	if (m_next3 == `'='`) {
305	shift4();
306	token = URSHIFTEQUAL;
307	break;
308	}
309	shift3();
310	token = URSHIFT;
311	break;
312	}
313	if (m_next1 == `'>'`) {
314	if (m_next2 == `'='`) {
315	shift3();
316	token = RSHIFTEQUAL;
317	break;
318	}
319	shift2();
320	token = RSHIFT;
321	break;
322	}
323	if (m_next1 == `'='`) {
324	shift2();
325	token = GE;
326	break;
327	}
328	shift1();
329	token = `'>'`;
330	break;
331	case `'='`:
332	if (m_next1 == `'='`) {
333	if (m_next2 == `'='`) {
334	shift3();
335	token = STREQ;
336	break;
337	}
338	shift2();
339	token = EQEQ;
340	break;
341	}
342	shift1();
343	token = `'='`;
344	break;
345	case `'!'`:
346	if (m_next1 == `'='`) {
347	if (m_next2 == `'='`) {
348	shift3();
349	token = STRNEQ;
350	break;
351	}
352	shift2();
353	token = NE;
354	break;
355	}
356	shift1();
357	token = `'!'`;
358	break;
359	case `'<'`:
360	if (m_next1 == `'!'` && m_next2 == `'-'` && m_next3 == `'-'`) {
361	// <!-- marks the beginning of a line comment (for www usage)
362	shift4();
363	goto inSingleLineComment;
364	}
365	if (m_next1 == `'<'`) {
366	if (m_next2 == `'='`) {
367	shift3();
368	token = LSHIFTEQUAL;
369	break;
370	}
371	shift2();
372	token = LSHIFT;
373	break;
374	}
375	if (m_next1 == `'='`) {
376	shift2();
377	token = LE;
378	break;
379	}
380	shift1();
381	token = `'<'`;
382	break;
383	case `'+'`:
384	if (m_next1 == `'+'`) {
385	shift2();
386	if (m_terminator) {
387	token = AUTOPLUSPLUS;
388	break;
389	}
390	token = PLUSPLUS;
391	break;
392	}
393	if (m_next1 == `'='`) {
394	shift2();
395	token = PLUSEQUAL;
396	break;
397	}
398	shift1();
399	token = `'+'`;
400	break;
401	case `'-'`:
402	if (m_next1 == `'-'`) {
403	if (m_atLineStart && m_next2 == `'>'`) {
404	shift3();
405	goto inSingleLineComment;
406	}
407	shift2();
408	if (m_terminator) {
409	token = AUTOMINUSMINUS;
410	break;
411	}
412	token = MINUSMINUS;
413	break;
414	}
415	if (m_next1 == `'='`) {
416	shift2();
417	token = MINUSEQUAL;
418	break;
419	}
420	shift1();
421	token = `'-'`;
422	break;
423	case `'*'`:
424	if (m_next1 == `'='`) {
425	shift2();
426	token = MULTEQUAL;
427	break;
428	}
429	shift1();
430	token = `'*'`;
431	break;
432	case `'/'`:
433	if (m_next1 == `'/'`) {
434	shift2();
435	goto inSingleLineComment;
436	}
437	if (m_next1 == `'*'`)
438	goto inMultiLineComment;
439	if (m_next1 == `'='`) {
440	shift2();
441	token = DIVEQUAL;
442	break;
443	}
444	shift1();
445	token = `'/'`;
446	break;
447	case `'&'`:
448	if (m_next1 == `'&'`) {
449	shift2();
450	token = AND;
451	break;
452	}
453	if (m_next1 == `'='`) {
454	shift2();
455	token = ANDEQUAL;
456	break;
457	}
458	shift1();
459	token = `'&'`;
460	break;
461	case `'^'`:
462	if (m_next1 == `'='`) {
463	shift2();
464	token = XOREQUAL;
465	break;
466	}
467	shift1();
468	token = `'^'`;
469	break;
470	case `'%'`:
471	if (m_next1 == `'='`) {
472	shift2();
473	token = MODEQUAL;
474	break;
475	}
476	shift1();
477	token = `'%'`;
478	break;
479	case `'\|'`:
480	if (m_next1 == `'='`) {
481	shift2();
482	token = OREQUAL;
483	break;
484	}
485	if (m_next1 == `'\|'`) {
486	shift2();
487	token = OR;
488	break;
489	}
490	shift1();
491	token = `'\|'`;
492	break;
493	case `'.'`:
494	if (isASCIIDigit(c: m_next1)) {
495	record8(c: `'.'`);
496	shift1();
497	goto inNumberAfterDecimalPoint;
498	}
499	token = `'.'`;
500	shift1();
501	break;
502	case `','`:
503	case `'~'`:
504	case `'?'`:
505	case `':'`:
506	case `'('`:
507	case `')'`:
508	case `'['`:
509	case `']'`:
510	token = m_current;
511	shift1();
512	break;
513	case `';'`:
514	shift1();
515	m_delimited = true;
516	token = `';'`;
517	break;
518	case `'{'`:
519	lvalp->intValue = currentOffset();
520	shift1();
521	token = OPENBRACE;
522	break;
523	case `'}'`:
524	lvalp->intValue = currentOffset();
525	shift1();
526	m_delimited = true;
527	token = CLOSEBRACE;
528	break;
529	case `'\\'`:
530	goto startIdentifierWithBackslash;
531	case `'0'`:
532	goto startNumberWithZeroDigit;
533	case `'1'`:
534	case `'2'`:
535	case `'3'`:
536	case `'4'`:
537	case `'5'`:
538	case `'6'`:
539	case `'7'`:
540	case `'8'`:
541	case `'9'`:
542	goto startNumber;
543	case `'"'`:
544	case `'\''`:
545	goto startString;
546	default:
547	if (isIdentStart(c: m_current))
548	goto startIdentifierOrKeyword;
549	if (isLineTerminator(ch: m_current)) {
550	shiftLineTerminator();
551	m_atLineStart = true;
552	m_terminator = true;
553	if (lastTokenWasRestrKeyword()) {
554	token = `';'`;
555	goto doneSemicolon;
556	}
557	goto start;
558	}
559	goto returnError;
560	}
561
562	m_atLineStart = false;
563	goto returnToken;
564
565	startString: {
566	int stringQuoteCharacter = m_current;
567	shift1();
568
569	const UChar* stringStart = currentCharacter();
570	while (m_current != stringQuoteCharacter) {
571	// Fast check for characters that require special handling.
572	// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
573	// as possible, and lets through all common ASCII characters.
574	if (UNLIKELY(m_current == `'\\'`) \|\| UNLIKELY(((static_cast<unsigned>(m_current) - `0xE`) & `0x2000`))) {
575	m_buffer16.append(data: stringStart, dataSize: currentCharacter() - stringStart);
576	goto inString;
577	}
578	shift1();
579	}
580	lvalp->ident = makeIdentifier(characters: stringStart, length: currentCharacter() - stringStart);
581	shift1();
582	m_atLineStart = false;
583	m_delimited = false;
584	token = STRING;
585	goto returnToken;
586
587	inString:
588	while (m_current != stringQuoteCharacter) {
589	if (m_current == `'\\'`)
590	goto inStringEscapeSequence;
591	if (UNLIKELY(isLineTerminator(m_current)))
592	goto returnError;
593	if (UNLIKELY(m_current == -`1`))
594	goto returnError;
595	record16(c: m_current);
596	shift1();
597	}
598	goto doneString;
599
600	inStringEscapeSequence:
601	shift1();
602	if (m_current == `'x'`) {
603	shift1();
604	if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1)) {
605	record16(c: convertHex(c1: m_current, c2: m_next1));
606	shift2();
607	goto inString;
608	}
609	record16(c: `'x'`);
610	if (m_current == stringQuoteCharacter)
611	goto doneString;
612	goto inString;
613	}
614	if (m_current == `'u'`) {
615	shift1();
616	if (isASCIIHexDigit(c: m_current) && isASCIIHexDigit(c: m_next1) && isASCIIHexDigit(c: m_next2) && isASCIIHexDigit(c: m_next3)) {
617	record16(c: convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3));
618	shift4();
619	goto inString;
620	}
621	if (m_current == stringQuoteCharacter) {
622	record16(c: `'u'`);
623	goto doneString;
624	}
625	goto returnError;
626	}
627	if (isASCIIOctalDigit(c: m_current)) {
628	if (m_current >= `'0'` && m_current <= `'3'` && isASCIIOctalDigit(c: m_next1) && isASCIIOctalDigit(c: m_next2)) {
629	record16(c: (m_current - `'0'`) * `64` + (m_next1 - `'0'`) * `8` + m_next2 - `'0'`);
630	shift3();
631	goto inString;
632	}
633	if (isASCIIOctalDigit(c: m_next1)) {
634	record16(c: (m_current - `'0'`) * `8` + m_next1 - `'0'`);
635	shift2();
636	goto inString;
637	}
638	record16(c: m_current - `'0'`);
639	shift1();
640	goto inString;
641	}
642	if (isLineTerminator(ch: m_current)) {
643	shiftLineTerminator();
644	goto inString;
645	}
646	if (m_current == -`1`)
647	goto returnError;
648	record16(c: singleEscape(c: m_current));
649	shift1();
650	goto inString;
651	}
652
653	startIdentifierWithBackslash:
654	shift1();
655	if (UNLIKELY(m_current != `'u'`))
656	goto returnError;
657	shift1();
658	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
659	goto returnError;
660	token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3);
661	if (UNLIKELY(!isIdentStart(token)))
662	goto returnError;
663	goto inIdentifierAfterCharacterCheck;
664
665	startIdentifierOrKeyword: {
666	const UChar* identifierStart = currentCharacter();
667	shift1();
668	while (isIdentPart(c: m_current))
669	shift1();
670	if (LIKELY(m_current != `'\\'`)) {
671	lvalp->ident = makeIdentifier(characters: identifierStart, length: currentCharacter() - identifierStart);
672	goto doneIdentifierOrKeyword;
673	}
674	m_buffer16.append(data: identifierStart, dataSize: currentCharacter() - identifierStart);
675	}
676
677	do {
678	shift1();
679	if (UNLIKELY(m_current != `'u'`))
680	goto returnError;
681	shift1();
682	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(m_next1) \|\| !isASCIIHexDigit(m_next2) \|\| !isASCIIHexDigit(m_next3)))
683	goto returnError;
684	token = convertUnicode(c1: m_current, c2: m_next1, c3: m_next2, c4: m_next3);
685	if (UNLIKELY(!isIdentPart(token)))
686	goto returnError;
687	inIdentifierAfterCharacterCheck:
688	record16(c: token);
689	shift4();
690
691	while (isIdentPart(c: m_current)) {
692	record16(c: m_current);
693	shift1();
694	}
695	} while (UNLIKELY(m_current == `'\\'`));
696	goto doneIdentifier;
697
698	inSingleLineComment:
699	while (!isLineTerminator(ch: m_current)) {
700	if (UNLIKELY(m_current == -`1`))
701	return `0`;
702	shift1();
703	}
704	shiftLineTerminator();
705	m_atLineStart = true;
706	m_terminator = true;
707	if (lastTokenWasRestrKeyword())
708	goto doneSemicolon;
709	goto start;
710
711	inMultiLineComment:
712	shift2();
713	while (m_current != `'*'` \|\| m_next1 != `'/'`) {
714	if (isLineTerminator(ch: m_current))
715	shiftLineTerminator();
716	else {
717	shift1();
718	if (UNLIKELY(m_current == -`1`))
719	goto returnError;
720	}
721	}
722	shift2();
723	m_atLineStart = false;
724	goto start;
725
726	startNumberWithZeroDigit:
727	shift1();
728	if ((m_current \| `0x20`) == `'x'` && isASCIIHexDigit(c: m_next1)) {
729	shift1();
730	goto inHex;
731	}
732	if (m_current == `'.'`) {
733	record8(c: `'0'`);
734	record8(c: `'.'`);
735	shift1();
736	goto inNumberAfterDecimalPoint;
737	}
738	if ((m_current \| `0x20`) == `'e'`) {
739	record8(c: `'0'`);
740	record8(c: `'e'`);
741	shift1();
742	goto inExponentIndicator;
743	}
744	if (isASCIIOctalDigit(c: m_current))
745	goto inOctal;
746	if (isASCIIDigit(c: m_current))
747	goto startNumber;
748	lvalp->doubleValue = `0`;
749	goto doneNumeric;
750
751	inNumberAfterDecimalPoint:
752	while (isASCIIDigit(c: m_current)) {
753	record8(c: m_current);
754	shift1();
755	}
756	if ((m_current \| `0x20`) == `'e'`) {
757	record8(c: `'e'`);
758	shift1();
759	goto inExponentIndicator;
760	}
761	goto doneNumber;
762
763	inExponentIndicator:
764	if (m_current == `'+'` \|\| m_current == `'-'`) {
765	record8(c: m_current);
766	shift1();
767	}
768	if (!isASCIIDigit(c: m_current))
769	goto returnError;
770	do {
771	record8(c: m_current);
772	shift1();
773	} while (isASCIIDigit(c: m_current));
774	goto doneNumber;
775
776	inOctal: {
777	do {
778	record8(c: m_current);
779	shift1();
780	} while (isASCIIOctalDigit(c: m_current));
781	if (isASCIIDigit(c: m_current))
782	goto startNumber;
783
784	double dval = `0`;
785
786	const char* end = m_buffer8.end();
787	for (const char* p = m_buffer8.data(); p < end; ++p) {
788	dval *= `8`;
789	dval += *p - `'0'`;
790	}
791	if (dval >= mantissaOverflowLowerBound)
792	dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: `8`);
793
794	m_buffer8.resize(size: `0`);
795
796	lvalp->doubleValue = dval;
797	goto doneNumeric;
798	}
799
800	inHex: {
801	do {
802	record8(c: m_current);
803	shift1();
804	} while (isASCIIHexDigit(c: m_current));
805
806	double dval = `0`;
807
808	const char* end = m_buffer8.end();
809	for (const char* p = m_buffer8.data(); p < end; ++p) {
810	dval *= `16`;
811	dval += toASCIIHexValue(c: *p);
812	}
813	if (dval >= mantissaOverflowLowerBound)
814	dval = parseIntOverflow(m_buffer8.data(), length: end - m_buffer8.data(), radix: `16`);
815
816	m_buffer8.resize(size: `0`);
817
818	lvalp->doubleValue = dval;
819	goto doneNumeric;
820	}
821
822	startNumber:
823	record8(c: m_current);
824	shift1();
825	while (isASCIIDigit(c: m_current)) {
826	record8(c: m_current);
827	shift1();
828	}
829	if (m_current == `'.'`) {
830	record8(c: `'.'`);
831	shift1();
832	goto inNumberAfterDecimalPoint;
833	}
834	if ((m_current \| `0x20`) == `'e'`) {
835	record8(c: `'e'`);
836	shift1();
837	goto inExponentIndicator;
838	}
839
840	// Fall through into doneNumber.
841
842	doneNumber:
843	// Null-terminate string for strtod.
844	m_buffer8.append(val: `'\0'`);
845	lvalp->doubleValue = WTF::strtod(s00: m_buffer8.data(), se: `0`);
846	m_buffer8.resize(size: `0`);
847
848	// Fall through into doneNumeric.
849
850	doneNumeric:
851	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
852	if (UNLIKELY(isIdentStart(m_current)))
853	goto returnError;
854
855	m_atLineStart = false;
856	m_delimited = false;
857	token = NUMBER;
858	goto returnToken;
859
860	doneSemicolon:
861	token = `';'`;
862	m_delimited = true;
863	goto returnToken;
864
865	doneIdentifier:
866	m_atLineStart = false;
867	m_delimited = false;
868	lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
869	m_buffer16.resize(size: `0`);
870	token = IDENT;
871	goto returnToken;
872
873	doneIdentifierOrKeyword: {
874	m_atLineStart = false;
875	m_delimited = false;
876	m_buffer16.resize(size: `0`);
877	const HashEntry* entry = m_keywordTable.entry(globalData: m_globalData, identifier: *lvalp->ident);
878	token = entry ? entry->lexerValue() : IDENT;
879	goto returnToken;
880	}
881
882	doneString:
883	// Atomize constant strings in case they're later used in property lookup.
884	shift1();
885	m_atLineStart = false;
886	m_delimited = false;
887	lvalp->ident = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
888	m_buffer16.resize(size: `0`);
889	token = STRING;
890
891	// Fall through into returnToken.
892
893	returnToken: {
894	int lineNumber = m_lineNumber;
895	llocp->first_line = lineNumber;
896	llocp->last_line = lineNumber;
897	llocp->first_column = startOffset;
898	llocp->last_column = currentOffset();
899
900	m_lastToken = token;
901	return token;
902	}
903
904	returnError:
905	m_error = true;
906	return -`1`;
907	}
908
909	bool Lexer::scanRegExp(const Identifier& pattern, const* Identifier*& flags, UChar patternPrefix)
910	{
911	ASSERT(m_buffer16.isEmpty());
912
913	bool lastWasEscape = false;
914	bool inBrackets = false;
915
916	if (patternPrefix) {
917	ASSERT(!isLineTerminator(patternPrefix));
918	ASSERT(patternPrefix != `'/'`);
919	ASSERT(patternPrefix != `'['`);
920	record16(c: patternPrefix);
921	}
922
923	while (true) {
924	int current = m_current;
925
926	if (isLineTerminator(ch: current) \|\| current == -`1`) {
927	m_buffer16.resize(size: `0`);
928	return false;
929	}
930
931	shift1();
932
933	if (current == `'/'` && !lastWasEscape && !inBrackets)
934	break;
935
936	record16(c: current);
937
938	if (lastWasEscape) {
939	lastWasEscape = false;
940	continue;
941	}
942
943	switch (current) {
944	case `'['`:
945	inBrackets = true;
946	break;
947	case `']'`:
948	inBrackets = false;
949	break;
950	case `'\\'`:
951	lastWasEscape = true;
952	break;
953	}
954	}
955
956	pattern = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
957	m_buffer16.resize(size: `0`);
958
959	while (isIdentPart(c: m_current)) {
960	record16(c: m_current);
961	shift1();
962	}
963
964	flags = makeIdentifier(characters: m_buffer16.data(), length: m_buffer16.size());
965	m_buffer16.resize(size: `0`);
966
967	return true;
968	}
969
970	bool Lexer::skipRegExp()
971	{
972	bool lastWasEscape = false;
973	bool inBrackets = false;
974
975	while (true) {
976	int current = m_current;
977
978	if (isLineTerminator(ch: current) \|\| current == -`1`)
979	return false;
980
981	shift1();
982
983	if (current == `'/'` && !lastWasEscape && !inBrackets)
984	break;
985
986	if (lastWasEscape) {
987	lastWasEscape = false;
988	continue;
989	}
990
991	switch (current) {
992	case `'['`:
993	inBrackets = true;
994	break;
995	case `']'`:
996	inBrackets = false;
997	break;
998	case `'\\'`:
999	lastWasEscape = true;
1000	break;
1001	}
1002	}
1003
1004	while (isIdentPart(c: m_current))
1005	shift1();
1006
1007	return true;
1008	}
1009
1010	void Lexer::clear()
1011	{
1012	m_arena = `0`;
1013	m_codeWithoutBOMs.clear();
1014
1015	Vector<char> newBuffer8;
1016	newBuffer8.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
1017	m_buffer8.swap(other&: newBuffer8);
1018
1019	Vector<UChar> newBuffer16;
1020	newBuffer16.reserveInitialCapacity(initialCapacity: initialReadBufferCapacity);
1021	m_buffer16.swap(other&: newBuffer16);
1022
1023	m_isReparsing = false;
1024	}
1025
1026	SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
1027	{
1028	if (m_codeWithoutBOMs.isEmpty())
1029	return SourceCode (m_source->provider(), openBrace, closeBrace + `1`, firstLine);
1030
1031	const UChar* data = m_source->provider()->data();
1032
1033	ASSERT(openBrace < closeBrace);
1034
1035	int numBOMsBeforeOpenBrace = `0`;
1036	int numBOMsBetweenBraces = `0`;
1037
1038	int i;
1039	for (i = m_source->startOffset(); i < openBrace; ++i)
1040	numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
1041	for (; i < closeBrace; ++i)
1042	numBOMsBetweenBraces += data[i] == byteOrderMark;
1043
1044	return SourceCode (m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
1045	closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + `1`, firstLine);
1046	}
1047
1048	} // namespace JSC
1049

source code of qtscript/src/3rdparty/javascriptcore/JavaScriptCore/parser/Lexer.cpp