LiteralParser.cpp source code [qtscript/src/3rdparty/javascriptcore/JavaScriptCore/runtime/LiteralParser.cpp]

1	/*
2	* Copyright (C) 2009 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "LiteralParser.h"
28
29	#include "JSArray.h"
30	#include "JSString.h"
31	#include "Lexer.h"
32	#include "StringBuilder.h"
33	#include <wtf/ASCIICType.h>
34	#include <wtf/dtoa.h>
35
36	namespace JSC {
37
38	LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token)
39	{
40	while (m_ptr < m_end && isASCIISpace(c: *m_ptr))
41	++m_ptr;
42
43	ASSERT(m_ptr <= m_end);
44	if (m_ptr >= m_end) {
45	token.type = TokEnd;
46	token.start = token.end = m_ptr;
47	return TokEnd;
48	}
49	token.type = TokError;
50	token.start = m_ptr;
51	switch (*m_ptr) {
52	case `'['`:
53	token.type = TokLBracket;
54	token.end = ++m_ptr;
55	return TokLBracket;
56	case `']'`:
57	token.type = TokRBracket;
58	token.end = ++m_ptr;
59	return TokRBracket;
60	case `'('`:
61	token.type = TokLParen;
62	token.end = ++m_ptr;
63	return TokLBracket;
64	case `')'`:
65	token.type = TokRParen;
66	token.end = ++m_ptr;
67	return TokRBracket;
68	case `'{'`:
69	token.type = TokLBrace;
70	token.end = ++m_ptr;
71	return TokLBrace;
72	case `'}'`:
73	token.type = TokRBrace;
74	token.end = ++m_ptr;
75	return TokRBrace;
76	case `','`:
77	token.type = TokComma;
78	token.end = ++m_ptr;
79	return TokComma;
80	case `':'`:
81	token.type = TokColon;
82	token.end = ++m_ptr;
83	return TokColon;
84	case `'"'`:
85	if (m_mode == StrictJSON)
86	return lexString<StrictJSON>(token);
87	return lexString<NonStrictJSON>(token);
88	case `'t'`:
89	if (m_end - m_ptr >= `4` && m_ptr[`1`] == `'r'` && m_ptr[`2`] == `'u'` && m_ptr[`3`] == `'e'`) {
90	m_ptr += `4`;
91	token.type = TokTrue;
92	token.end = m_ptr;
93	return TokTrue;
94	}
95	break;
96	case `'f'`:
97	if (m_end - m_ptr >= `5` && m_ptr[`1`] == `'a'` && m_ptr[`2`] == `'l'` && m_ptr[`3`] == `'s'` && m_ptr[`4`] == `'e'`) {
98	m_ptr += `5`;
99	token.type = TokFalse;
100	token.end = m_ptr;
101	return TokFalse;
102	}
103	break;
104	case `'n'`:
105	if (m_end - m_ptr >= `4` && m_ptr[`1`] == `'u'` && m_ptr[`2`] == `'l'` && m_ptr[`3`] == `'l'`) {
106	m_ptr += `4`;
107	token.type = TokNull;
108	token.end = m_ptr;
109	return TokNull;
110	}
111	break;
112	case `'-'`:
113	case `'0'`:
114	case `'1'`:
115	case `'2'`:
116	case `'3'`:
117	case `'4'`:
118	case `'5'`:
119	case `'6'`:
120	case `'7'`:
121	case `'8'`:
122	case `'9'`:
123	return lexNumber(token);
124	}
125	return TokError;
126	}
127
128	template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c)
129	{
130	return (c >= `' '` && (mode == LiteralParser::StrictJSON \|\| c <= `0xff`) && c != `'\\'` && c != `'"'`) \|\| c == `'\t'`;
131	}
132
133	// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions.
134	template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token)
135	{
136	++m_ptr;
137	const UChar* runStart;
138	StringBuilder builder;
139	do {
140	runStart = m_ptr;
141	while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr))
142	++m_ptr;
143	if (runStart < m_ptr)
144	builder.append(str: runStart, len: m_ptr - runStart);
145	if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == `'\\'`) {
146	++m_ptr;
147	if (m_ptr >= m_end)
148	return TokError;
149	switch (*m_ptr) {
150	case `'"'`:
151	builder.append(u: `'"'`);
152	m_ptr++;
153	break;
154	case `'\\'`:
155	builder.append(u: `'\\'`);
156	m_ptr++;
157	break;
158	case `'/'`:
159	builder.append(u: `'/'`);
160	m_ptr++;
161	break;
162	case `'b'`:
163	builder.append(u: `'\b'`);
164	m_ptr++;
165	break;
166	case `'f'`:
167	builder.append(u: `'\f'`);
168	m_ptr++;
169	break;
170	case `'n'`:
171	builder.append(u: `'\n'`);
172	m_ptr++;
173	break;
174	case `'r'`:
175	builder.append(u: `'\r'`);
176	m_ptr++;
177	break;
178	case `'t'`:
179	builder.append(u: `'\t'`);
180	m_ptr++;
181	break;
182
183	case `'u'`:
184	if ((m_end - m_ptr) < `5`) // uNNNN == 5 characters
185	return TokError;
186	for (int i = `1`; i < `5`; i++) {
187	if (!isASCIIHexDigit(c: m_ptr[i]))
188	return TokError;
189	}
190	builder.append(JSC::Lexer::convertUnicode(c1: m_ptr[`1`], c2: m_ptr[`2`], c3: m_ptr[`3`], c4: m_ptr[`4`]));
191	m_ptr += `5`;
192	break;
193
194	default:
195	return TokError;
196	}
197	}
198	} while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != `'"'`);
199
200	if (m_ptr >= m_end \|\| *m_ptr != `'"'`)
201	return TokError;
202
203	token.stringToken = builder.release();
204	token.type = TokString;
205	token.end = ++m_ptr;
206	return TokString;
207	}
208
209	LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token)
210	{
211	// ES5 and json.org define numbers as
212	// number
213	// int
214	// int frac? exp?
215	//
216	// int
217	// -? 0
218	// -? digit1-9 digits?
219	//
220	// digits
221	// digit digits?
222	//
223	// -?(0 \| [1-9][0-9]) ('.' [0-9]+)? ([eE][+-]? [0-9]+)?*
224
225	if (m_ptr < m_end && m_ptr == `'-'`) // -?*
226	++m_ptr;
227
228	// (0 \| [1-9][0-9])*
229	if (m_ptr < m_end && m_ptr == `'0'`) // 0*
230	++m_ptr;
231	else if (m_ptr < m_end && m_ptr >= `'1'` && m_ptr <= `'9'`) { // [1-9]
232	++m_ptr;
233	// [0-9]*
234	while (m_ptr < m_end && isASCIIDigit(c: *m_ptr))
235	++m_ptr;
236	} else
237	return TokError;
238
239	// ('.' [0-9]+)?
240	if (m_ptr < m_end && *m_ptr == `'.'`) {
241	++m_ptr;
242	// [0-9]+
243	if (m_ptr >= m_end \|\| !isASCIIDigit(c: *m_ptr))
244	return TokError;
245
246	++m_ptr;
247	while (m_ptr < m_end && isASCIIDigit(c: *m_ptr))
248	++m_ptr;
249	}
250
251	// ([eE][+-]? [0-9]+)?
252	if (m_ptr < m_end && (m_ptr == `'e'` \|\| m_ptr == `'E'`)) { // [eE]
253	++m_ptr;
254
255	// [-+]?
256	if (m_ptr < m_end && (m_ptr == `'-'` \|\| m_ptr == `'+'`))
257	++m_ptr;
258
259	// [0-9]+
260	if (m_ptr >= m_end \|\| !isASCIIDigit(c: *m_ptr))
261	return TokError;
262
263	++m_ptr;
264	while (m_ptr < m_end && isASCIIDigit(c: *m_ptr))
265	++m_ptr;
266	}
267
268	token.type = TokNumber;
269	token.end = m_ptr;
270	Vector<char, `64`> buffer(token.end - token.start + `1`);
271	int i;
272	for (i = `0`; i < token.end - token.start; i++) {
273	ASSERT(static_cast<char>(token.start[i]) == token.start[i]);
274	buffer [i] = static_cast<char>(token.start[i]);
275	}
276	buffer [i] = `0`;
277	char* end;
278	token.numberToken = WTF::strtod(s00: buffer.data(), se: &end);
279	ASSERT(buffer.data() + (token.end - token.start) == end);
280	return TokNumber;
281	}
282
283	JSValue LiteralParser::parse(ParserState initialState)
284	{
285	ParserState state = initialState;
286	MarkedArgumentBuffer objectStack;
287	JSValue lastValue;
288	Vector<ParserState, `16`> stateStack;
289	Vector<Identifier, `16`> identifierStack;
290	while (`1`) {
291	switch(state) {
292	startParseArray:
293	case StartParseArray: {
294	JSArray* array = constructEmptyArray(exec: m_exec);
295	objectStack.append(v: array);
296	// fallthrough
297	}
298	doParseArrayStartExpression:
299	case DoParseArrayStartExpression: {
300	TokenType lastToken = m_lexer.currentToken().type;
301	if (m_lexer.next() == TokRBracket) {
302	if (lastToken == TokComma)
303	return JSValue ();
304	m_lexer.next();
305	lastValue = objectStack.last();
306	objectStack.removeLast();
307	break;
308	}
309
310	stateStack.append(val: DoParseArrayEndExpression);
311	goto startParseExpression;
312	}
313	case DoParseArrayEndExpression: {
314	asArray(value: objectStack.last())->push(m_exec, lastValue);
315
316	if (m_lexer.currentToken().type == TokComma)
317	goto doParseArrayStartExpression;
318
319	if (m_lexer.currentToken().type != TokRBracket)
320	return JSValue ();
321
322	m_lexer.next();
323	lastValue = objectStack.last();
324	objectStack.removeLast();
325	break;
326	}
327	startParseObject:
328	case StartParseObject: {
329	JSObject* object = constructEmptyObject(exec: m_exec);
330	objectStack.append(v: object);
331
332	TokenType type = m_lexer.next();
333	if (type == TokString) {
334	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
335
336	// Check for colon
337	if (m_lexer.next() != TokColon)
338	return JSValue ();
339
340	m_lexer.next();
341	identifierStack.append(val: Identifier (m_exec, identifierToken.stringToken));
342	stateStack.append(val: DoParseObjectEndExpression);
343	goto startParseExpression;
344	} else if (type != TokRBrace)
345	return JSValue ();
346	m_lexer.next();
347	lastValue = objectStack.last();
348	objectStack.removeLast();
349	break;
350	}
351	doParseObjectStartExpression:
352	case DoParseObjectStartExpression: {
353	TokenType type = m_lexer.next();
354	if (type != TokString)
355	return JSValue ();
356	Lexer::LiteralParserToken identifierToken = m_lexer.currentToken();
357
358	// Check for colon
359	if (m_lexer.next() != TokColon)
360	return JSValue ();
361
362	m_lexer.next();
363	identifierStack.append(val: Identifier (m_exec, identifierToken.stringToken));
364	stateStack.append(val: DoParseObjectEndExpression);
365	goto startParseExpression;
366	}
367	case DoParseObjectEndExpression:
368	{
369	asObject(value: objectStack.last())->putDirect(propertyName: identifierStack.last(), value: lastValue);
370	identifierStack.removeLast();
371	if (m_lexer.currentToken().type == TokComma)
372	goto doParseObjectStartExpression;
373	if (m_lexer.currentToken().type != TokRBrace)
374	return JSValue ();
375	m_lexer.next();
376	lastValue = objectStack.last();
377	objectStack.removeLast();
378	break;
379	}
380	startParseExpression:
381	case StartParseExpression: {
382	switch (m_lexer.currentToken().type) {
383	case TokLBracket:
384	goto startParseArray;
385	case TokLBrace:
386	goto startParseObject;
387	case TokString: {
388	Lexer::LiteralParserToken stringToken = m_lexer.currentToken();
389	m_lexer.next();
390	lastValue = jsString(exec: m_exec, s: stringToken.stringToken);
391	break;
392	}
393	case TokNumber: {
394	Lexer::LiteralParserToken numberToken = m_lexer.currentToken();
395	m_lexer.next();
396	lastValue = jsNumber(exec: m_exec, d: numberToken.numberToken);
397	break;
398	}
399	case TokNull:
400	m_lexer.next();
401	lastValue = jsNull();
402	break;
403
404	case TokTrue:
405	m_lexer.next();
406	lastValue = jsBoolean(b: true);
407	break;
408
409	case TokFalse:
410	m_lexer.next();
411	lastValue = jsBoolean(b: false);
412	break;
413
414	default:
415	// Error
416	return JSValue ();
417	}
418	break;
419	}
420	case StartParseStatement: {
421	switch (m_lexer.currentToken().type) {
422	case TokLBracket:
423	case TokNumber:
424	case TokString:
425	goto startParseExpression;
426
427	case TokLParen: {
428	m_lexer.next();
429	stateStack.append(val: StartParseStatementEndStatement);
430	goto startParseExpression;
431	}
432	default:
433	return JSValue ();
434	}
435	}
436	case StartParseStatementEndStatement: {
437	ASSERT(stateStack.isEmpty());
438	if (m_lexer.currentToken().type != TokRParen)
439	return JSValue ();
440	if (m_lexer.next() == TokEnd)
441	return lastValue;
442	return JSValue ();
443	}
444	default:
445	ASSERT_NOT_REACHED();
446	}
447	if (stateStack.isEmpty())
448	return lastValue;
449	state = stateStack.last();
450	stateStack.removeLast();
451	continue;
452	}
453	}
454
455	}
456

source code of qtscript/src/3rdparty/javascriptcore/JavaScriptCore/runtime/LiteralParser.cpp