1 | /* |
2 | * Copyright (C) 2009 Apple Inc. All rights reserved. |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions |
6 | * are met: |
7 | * 1. Redistributions of source code must retain the above copyright |
8 | * notice, this list of conditions and the following disclaimer. |
9 | * 2. Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
24 | */ |
25 | |
26 | #include "config.h" |
27 | #include "LiteralParser.h" |
28 | |
29 | #include "JSArray.h" |
30 | #include "JSString.h" |
31 | #include "Lexer.h" |
32 | #include "StringBuilder.h" |
33 | #include <wtf/ASCIICType.h> |
34 | #include <wtf/dtoa.h> |
35 | |
36 | namespace JSC { |
37 | |
38 | LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) |
39 | { |
40 | while (m_ptr < m_end && isASCIISpace(c: *m_ptr)) |
41 | ++m_ptr; |
42 | |
43 | ASSERT(m_ptr <= m_end); |
44 | if (m_ptr >= m_end) { |
45 | token.type = TokEnd; |
46 | token.start = token.end = m_ptr; |
47 | return TokEnd; |
48 | } |
49 | token.type = TokError; |
50 | token.start = m_ptr; |
51 | switch (*m_ptr) { |
52 | case '[': |
53 | token.type = TokLBracket; |
54 | token.end = ++m_ptr; |
55 | return TokLBracket; |
56 | case ']': |
57 | token.type = TokRBracket; |
58 | token.end = ++m_ptr; |
59 | return TokRBracket; |
60 | case '(': |
61 | token.type = TokLParen; |
62 | token.end = ++m_ptr; |
63 | return TokLBracket; |
64 | case ')': |
65 | token.type = TokRParen; |
66 | token.end = ++m_ptr; |
67 | return TokRBracket; |
68 | case '{': |
69 | token.type = TokLBrace; |
70 | token.end = ++m_ptr; |
71 | return TokLBrace; |
72 | case '}': |
73 | token.type = TokRBrace; |
74 | token.end = ++m_ptr; |
75 | return TokRBrace; |
76 | case ',': |
77 | token.type = TokComma; |
78 | token.end = ++m_ptr; |
79 | return TokComma; |
80 | case ':': |
81 | token.type = TokColon; |
82 | token.end = ++m_ptr; |
83 | return TokColon; |
84 | case '"': |
85 | if (m_mode == StrictJSON) |
86 | return lexString<StrictJSON>(token); |
87 | return lexString<NonStrictJSON>(token); |
88 | case 't': |
89 | if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { |
90 | m_ptr += 4; |
91 | token.type = TokTrue; |
92 | token.end = m_ptr; |
93 | return TokTrue; |
94 | } |
95 | break; |
96 | case 'f': |
97 | if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { |
98 | m_ptr += 5; |
99 | token.type = TokFalse; |
100 | token.end = m_ptr; |
101 | return TokFalse; |
102 | } |
103 | break; |
104 | case 'n': |
105 | if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { |
106 | m_ptr += 4; |
107 | token.type = TokNull; |
108 | token.end = m_ptr; |
109 | return TokNull; |
110 | } |
111 | break; |
112 | case '-': |
113 | case '0': |
114 | case '1': |
115 | case '2': |
116 | case '3': |
117 | case '4': |
118 | case '5': |
119 | case '6': |
120 | case '7': |
121 | case '8': |
122 | case '9': |
123 | return lexNumber(token); |
124 | } |
125 | return TokError; |
126 | } |
127 | |
128 | template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) |
129 | { |
130 | return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; |
131 | } |
132 | |
133 | // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. |
134 | template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) |
135 | { |
136 | ++m_ptr; |
137 | const UChar* runStart; |
138 | StringBuilder builder; |
139 | do { |
140 | runStart = m_ptr; |
141 | while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) |
142 | ++m_ptr; |
143 | if (runStart < m_ptr) |
144 | builder.append(str: runStart, len: m_ptr - runStart); |
145 | if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { |
146 | ++m_ptr; |
147 | if (m_ptr >= m_end) |
148 | return TokError; |
149 | switch (*m_ptr) { |
150 | case '"': |
151 | builder.append(u: '"'); |
152 | m_ptr++; |
153 | break; |
154 | case '\\': |
155 | builder.append(u: '\\'); |
156 | m_ptr++; |
157 | break; |
158 | case '/': |
159 | builder.append(u: '/'); |
160 | m_ptr++; |
161 | break; |
162 | case 'b': |
163 | builder.append(u: '\b'); |
164 | m_ptr++; |
165 | break; |
166 | case 'f': |
167 | builder.append(u: '\f'); |
168 | m_ptr++; |
169 | break; |
170 | case 'n': |
171 | builder.append(u: '\n'); |
172 | m_ptr++; |
173 | break; |
174 | case 'r': |
175 | builder.append(u: '\r'); |
176 | m_ptr++; |
177 | break; |
178 | case 't': |
179 | builder.append(u: '\t'); |
180 | m_ptr++; |
181 | break; |
182 | |
183 | case 'u': |
184 | if ((m_end - m_ptr) < 5) // uNNNN == 5 characters |
185 | return TokError; |
186 | for (int i = 1; i < 5; i++) { |
187 | if (!isASCIIHexDigit(c: m_ptr[i])) |
188 | return TokError; |
189 | } |
190 | builder.append(JSC::Lexer::convertUnicode(c1: m_ptr[1], c2: m_ptr[2], c3: m_ptr[3], c4: m_ptr[4])); |
191 | m_ptr += 5; |
192 | break; |
193 | |
194 | default: |
195 | return TokError; |
196 | } |
197 | } |
198 | } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); |
199 | |
200 | if (m_ptr >= m_end || *m_ptr != '"') |
201 | return TokError; |
202 | |
203 | token.stringToken = builder.release(); |
204 | token.type = TokString; |
205 | token.end = ++m_ptr; |
206 | return TokString; |
207 | } |
208 | |
209 | LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) |
210 | { |
211 | // ES5 and json.org define numbers as |
212 | // number |
213 | // int |
214 | // int frac? exp? |
215 | // |
216 | // int |
217 | // -? 0 |
218 | // -? digit1-9 digits? |
219 | // |
220 | // digits |
221 | // digit digits? |
222 | // |
223 | // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? |
224 | |
225 | if (m_ptr < m_end && *m_ptr == '-') // -? |
226 | ++m_ptr; |
227 | |
228 | // (0 | [1-9][0-9]*) |
229 | if (m_ptr < m_end && *m_ptr == '0') // 0 |
230 | ++m_ptr; |
231 | else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] |
232 | ++m_ptr; |
233 | // [0-9]* |
234 | while (m_ptr < m_end && isASCIIDigit(c: *m_ptr)) |
235 | ++m_ptr; |
236 | } else |
237 | return TokError; |
238 | |
239 | // ('.' [0-9]+)? |
240 | if (m_ptr < m_end && *m_ptr == '.') { |
241 | ++m_ptr; |
242 | // [0-9]+ |
243 | if (m_ptr >= m_end || !isASCIIDigit(c: *m_ptr)) |
244 | return TokError; |
245 | |
246 | ++m_ptr; |
247 | while (m_ptr < m_end && isASCIIDigit(c: *m_ptr)) |
248 | ++m_ptr; |
249 | } |
250 | |
251 | // ([eE][+-]? [0-9]+)? |
252 | if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] |
253 | ++m_ptr; |
254 | |
255 | // [-+]? |
256 | if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) |
257 | ++m_ptr; |
258 | |
259 | // [0-9]+ |
260 | if (m_ptr >= m_end || !isASCIIDigit(c: *m_ptr)) |
261 | return TokError; |
262 | |
263 | ++m_ptr; |
264 | while (m_ptr < m_end && isASCIIDigit(c: *m_ptr)) |
265 | ++m_ptr; |
266 | } |
267 | |
268 | token.type = TokNumber; |
269 | token.end = m_ptr; |
270 | Vector<char, 64> buffer(token.end - token.start + 1); |
271 | int i; |
272 | for (i = 0; i < token.end - token.start; i++) { |
273 | ASSERT(static_cast<char>(token.start[i]) == token.start[i]); |
274 | buffer[i] = static_cast<char>(token.start[i]); |
275 | } |
276 | buffer[i] = 0; |
277 | char* end; |
278 | token.numberToken = WTF::strtod(s00: buffer.data(), se: &end); |
279 | ASSERT(buffer.data() + (token.end - token.start) == end); |
280 | return TokNumber; |
281 | } |
282 | |
283 | JSValue LiteralParser::parse(ParserState initialState) |
284 | { |
285 | ParserState state = initialState; |
286 | MarkedArgumentBuffer objectStack; |
287 | JSValue lastValue; |
288 | Vector<ParserState, 16> stateStack; |
289 | Vector<Identifier, 16> identifierStack; |
290 | while (1) { |
291 | switch(state) { |
292 | startParseArray: |
293 | case StartParseArray: { |
294 | JSArray* array = constructEmptyArray(exec: m_exec); |
295 | objectStack.append(v: array); |
296 | // fallthrough |
297 | } |
298 | doParseArrayStartExpression: |
299 | case DoParseArrayStartExpression: { |
300 | TokenType lastToken = m_lexer.currentToken().type; |
301 | if (m_lexer.next() == TokRBracket) { |
302 | if (lastToken == TokComma) |
303 | return JSValue(); |
304 | m_lexer.next(); |
305 | lastValue = objectStack.last(); |
306 | objectStack.removeLast(); |
307 | break; |
308 | } |
309 | |
310 | stateStack.append(val: DoParseArrayEndExpression); |
311 | goto startParseExpression; |
312 | } |
313 | case DoParseArrayEndExpression: { |
314 | asArray(value: objectStack.last())->push(m_exec, lastValue); |
315 | |
316 | if (m_lexer.currentToken().type == TokComma) |
317 | goto doParseArrayStartExpression; |
318 | |
319 | if (m_lexer.currentToken().type != TokRBracket) |
320 | return JSValue(); |
321 | |
322 | m_lexer.next(); |
323 | lastValue = objectStack.last(); |
324 | objectStack.removeLast(); |
325 | break; |
326 | } |
327 | startParseObject: |
328 | case StartParseObject: { |
329 | JSObject* object = constructEmptyObject(exec: m_exec); |
330 | objectStack.append(v: object); |
331 | |
332 | TokenType type = m_lexer.next(); |
333 | if (type == TokString) { |
334 | Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); |
335 | |
336 | // Check for colon |
337 | if (m_lexer.next() != TokColon) |
338 | return JSValue(); |
339 | |
340 | m_lexer.next(); |
341 | identifierStack.append(val: Identifier(m_exec, identifierToken.stringToken)); |
342 | stateStack.append(val: DoParseObjectEndExpression); |
343 | goto startParseExpression; |
344 | } else if (type != TokRBrace) |
345 | return JSValue(); |
346 | m_lexer.next(); |
347 | lastValue = objectStack.last(); |
348 | objectStack.removeLast(); |
349 | break; |
350 | } |
351 | doParseObjectStartExpression: |
352 | case DoParseObjectStartExpression: { |
353 | TokenType type = m_lexer.next(); |
354 | if (type != TokString) |
355 | return JSValue(); |
356 | Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); |
357 | |
358 | // Check for colon |
359 | if (m_lexer.next() != TokColon) |
360 | return JSValue(); |
361 | |
362 | m_lexer.next(); |
363 | identifierStack.append(val: Identifier(m_exec, identifierToken.stringToken)); |
364 | stateStack.append(val: DoParseObjectEndExpression); |
365 | goto startParseExpression; |
366 | } |
367 | case DoParseObjectEndExpression: |
368 | { |
369 | asObject(value: objectStack.last())->putDirect(propertyName: identifierStack.last(), value: lastValue); |
370 | identifierStack.removeLast(); |
371 | if (m_lexer.currentToken().type == TokComma) |
372 | goto doParseObjectStartExpression; |
373 | if (m_lexer.currentToken().type != TokRBrace) |
374 | return JSValue(); |
375 | m_lexer.next(); |
376 | lastValue = objectStack.last(); |
377 | objectStack.removeLast(); |
378 | break; |
379 | } |
380 | startParseExpression: |
381 | case StartParseExpression: { |
382 | switch (m_lexer.currentToken().type) { |
383 | case TokLBracket: |
384 | goto startParseArray; |
385 | case TokLBrace: |
386 | goto startParseObject; |
387 | case TokString: { |
388 | Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); |
389 | m_lexer.next(); |
390 | lastValue = jsString(exec: m_exec, s: stringToken.stringToken); |
391 | break; |
392 | } |
393 | case TokNumber: { |
394 | Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); |
395 | m_lexer.next(); |
396 | lastValue = jsNumber(exec: m_exec, d: numberToken.numberToken); |
397 | break; |
398 | } |
399 | case TokNull: |
400 | m_lexer.next(); |
401 | lastValue = jsNull(); |
402 | break; |
403 | |
404 | case TokTrue: |
405 | m_lexer.next(); |
406 | lastValue = jsBoolean(b: true); |
407 | break; |
408 | |
409 | case TokFalse: |
410 | m_lexer.next(); |
411 | lastValue = jsBoolean(b: false); |
412 | break; |
413 | |
414 | default: |
415 | // Error |
416 | return JSValue(); |
417 | } |
418 | break; |
419 | } |
420 | case StartParseStatement: { |
421 | switch (m_lexer.currentToken().type) { |
422 | case TokLBracket: |
423 | case TokNumber: |
424 | case TokString: |
425 | goto startParseExpression; |
426 | |
427 | case TokLParen: { |
428 | m_lexer.next(); |
429 | stateStack.append(val: StartParseStatementEndStatement); |
430 | goto startParseExpression; |
431 | } |
432 | default: |
433 | return JSValue(); |
434 | } |
435 | } |
436 | case StartParseStatementEndStatement: { |
437 | ASSERT(stateStack.isEmpty()); |
438 | if (m_lexer.currentToken().type != TokRParen) |
439 | return JSValue(); |
440 | if (m_lexer.next() == TokEnd) |
441 | return lastValue; |
442 | return JSValue(); |
443 | } |
444 | default: |
445 | ASSERT_NOT_REACHED(); |
446 | } |
447 | if (stateStack.isEmpty()) |
448 | return lastValue; |
449 | state = stateStack.last(); |
450 | stateStack.removeLast(); |
451 | continue; |
452 | } |
453 | } |
454 | |
455 | } |
456 | |