| 1 | /* | 
| 2 |  * Copyright (C) 2009 Apple Inc. All rights reserved. | 
| 3 |  * | 
| 4 |  * Redistribution and use in source and binary forms, with or without | 
| 5 |  * modification, are permitted provided that the following conditions | 
| 6 |  * are met: | 
| 7 |  * 1. Redistributions of source code must retain the above copyright | 
| 8 |  *    notice, this list of conditions and the following disclaimer. | 
| 9 |  * 2. Redistributions in binary form must reproduce the above copyright | 
| 10 |  *    notice, this list of conditions and the following disclaimer in the | 
| 11 |  *    documentation and/or other materials provided with the distribution. | 
| 12 |  * | 
| 13 |  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 
| 14 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 15 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 
| 16 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR | 
| 17 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
| 18 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
| 19 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| 20 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 
| 21 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
| 22 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
| 23 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  | 
| 24 |  */ | 
| 25 |  | 
| 26 | #include "config.h" | 
| 27 | #include "LiteralParser.h" | 
| 28 |  | 
| 29 | #include "JSArray.h" | 
| 30 | #include "JSString.h" | 
| 31 | #include "Lexer.h" | 
| 32 | #include "StringBuilder.h" | 
| 33 | #include <wtf/ASCIICType.h> | 
| 34 | #include <wtf/dtoa.h> | 
| 35 |  | 
| 36 | namespace JSC { | 
| 37 |  | 
| 38 | LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) | 
| 39 | { | 
| 40 |     while (m_ptr < m_end && isASCIISpace(c: *m_ptr)) | 
| 41 |         ++m_ptr; | 
| 42 |  | 
| 43 |     ASSERT(m_ptr <= m_end); | 
| 44 |     if (m_ptr >= m_end) { | 
| 45 |         token.type = TokEnd; | 
| 46 |         token.start = token.end = m_ptr; | 
| 47 |         return TokEnd; | 
| 48 |     } | 
| 49 |     token.type = TokError; | 
| 50 |     token.start = m_ptr; | 
| 51 |     switch (*m_ptr) { | 
| 52 |         case '[': | 
| 53 |             token.type = TokLBracket; | 
| 54 |             token.end = ++m_ptr; | 
| 55 |             return TokLBracket; | 
| 56 |         case ']': | 
| 57 |             token.type = TokRBracket; | 
| 58 |             token.end = ++m_ptr; | 
| 59 |             return TokRBracket; | 
| 60 |         case '(': | 
| 61 |             token.type = TokLParen; | 
| 62 |             token.end = ++m_ptr; | 
| 63 |             return TokLBracket; | 
| 64 |         case ')': | 
| 65 |             token.type = TokRParen; | 
| 66 |             token.end = ++m_ptr; | 
| 67 |             return TokRBracket; | 
| 68 |         case '{': | 
| 69 |             token.type = TokLBrace; | 
| 70 |             token.end = ++m_ptr; | 
| 71 |             return TokLBrace; | 
| 72 |         case '}': | 
| 73 |             token.type = TokRBrace; | 
| 74 |             token.end = ++m_ptr; | 
| 75 |             return TokRBrace; | 
| 76 |         case ',': | 
| 77 |             token.type = TokComma; | 
| 78 |             token.end = ++m_ptr; | 
| 79 |             return TokComma; | 
| 80 |         case ':': | 
| 81 |             token.type = TokColon; | 
| 82 |             token.end = ++m_ptr; | 
| 83 |             return TokColon; | 
| 84 |         case '"': | 
| 85 |             if (m_mode == StrictJSON) | 
| 86 |                 return lexString<StrictJSON>(token); | 
| 87 |             return lexString<NonStrictJSON>(token); | 
| 88 |         case 't': | 
| 89 |             if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { | 
| 90 |                 m_ptr += 4; | 
| 91 |                 token.type = TokTrue; | 
| 92 |                 token.end = m_ptr; | 
| 93 |                 return TokTrue; | 
| 94 |             } | 
| 95 |             break; | 
| 96 |         case 'f': | 
| 97 |             if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { | 
| 98 |                 m_ptr += 5; | 
| 99 |                 token.type = TokFalse; | 
| 100 |                 token.end = m_ptr; | 
| 101 |                 return TokFalse; | 
| 102 |             } | 
| 103 |             break; | 
| 104 |         case 'n': | 
| 105 |             if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { | 
| 106 |                 m_ptr += 4; | 
| 107 |                 token.type = TokNull; | 
| 108 |                 token.end = m_ptr; | 
| 109 |                 return TokNull; | 
| 110 |             } | 
| 111 |             break;     | 
| 112 |         case '-': | 
| 113 |         case '0': | 
| 114 |         case '1': | 
| 115 |         case '2': | 
| 116 |         case '3': | 
| 117 |         case '4': | 
| 118 |         case '5': | 
| 119 |         case '6': | 
| 120 |         case '7': | 
| 121 |         case '8': | 
| 122 |         case '9': | 
| 123 |             return lexNumber(token); | 
| 124 |     } | 
| 125 |     return TokError; | 
| 126 | } | 
| 127 |  | 
| 128 | template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) | 
| 129 | { | 
| 130 |     return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; | 
| 131 | } | 
| 132 |  | 
| 133 | // "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. | 
| 134 | template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) | 
| 135 | { | 
| 136 |     ++m_ptr; | 
| 137 |     const UChar* runStart; | 
| 138 |     StringBuilder builder; | 
| 139 |     do { | 
| 140 |         runStart = m_ptr; | 
| 141 |         while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) | 
| 142 |             ++m_ptr; | 
| 143 |         if (runStart < m_ptr) | 
| 144 |             builder.append(str: runStart, len: m_ptr - runStart); | 
| 145 |         if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { | 
| 146 |             ++m_ptr; | 
| 147 |             if (m_ptr >= m_end) | 
| 148 |                 return TokError; | 
| 149 |             switch (*m_ptr) { | 
| 150 |                 case '"': | 
| 151 |                     builder.append(u: '"'); | 
| 152 |                     m_ptr++; | 
| 153 |                     break; | 
| 154 |                 case '\\': | 
| 155 |                     builder.append(u: '\\'); | 
| 156 |                     m_ptr++; | 
| 157 |                     break; | 
| 158 |                 case '/': | 
| 159 |                     builder.append(u: '/'); | 
| 160 |                     m_ptr++; | 
| 161 |                     break; | 
| 162 |                 case 'b': | 
| 163 |                     builder.append(u: '\b'); | 
| 164 |                     m_ptr++; | 
| 165 |                     break; | 
| 166 |                 case 'f': | 
| 167 |                     builder.append(u: '\f'); | 
| 168 |                     m_ptr++; | 
| 169 |                     break; | 
| 170 |                 case 'n': | 
| 171 |                     builder.append(u: '\n'); | 
| 172 |                     m_ptr++; | 
| 173 |                     break; | 
| 174 |                 case 'r': | 
| 175 |                     builder.append(u: '\r'); | 
| 176 |                     m_ptr++; | 
| 177 |                     break; | 
| 178 |                 case 't': | 
| 179 |                     builder.append(u: '\t'); | 
| 180 |                     m_ptr++; | 
| 181 |                     break; | 
| 182 |  | 
| 183 |                 case 'u': | 
| 184 |                     if ((m_end - m_ptr) < 5) // uNNNN == 5 characters | 
| 185 |                         return TokError; | 
| 186 |                     for (int i = 1; i < 5; i++) { | 
| 187 |                         if (!isASCIIHexDigit(c: m_ptr[i])) | 
| 188 |                             return TokError; | 
| 189 |                     } | 
| 190 |                     builder.append(JSC::Lexer::convertUnicode(c1: m_ptr[1], c2: m_ptr[2], c3: m_ptr[3], c4: m_ptr[4])); | 
| 191 |                     m_ptr += 5; | 
| 192 |                     break; | 
| 193 |  | 
| 194 |                 default: | 
| 195 |                     return TokError; | 
| 196 |             } | 
| 197 |         } | 
| 198 |     } while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); | 
| 199 |  | 
| 200 |     if (m_ptr >= m_end || *m_ptr != '"') | 
| 201 |         return TokError; | 
| 202 |  | 
| 203 |     token.stringToken = builder.release(); | 
| 204 |     token.type = TokString; | 
| 205 |     token.end = ++m_ptr; | 
| 206 |     return TokString; | 
| 207 | } | 
| 208 |  | 
| 209 | LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) | 
| 210 | { | 
| 211 |     // ES5 and json.org define numbers as | 
| 212 |     // number | 
| 213 |     //     int | 
| 214 |     //     int frac? exp? | 
| 215 |     // | 
| 216 |     // int | 
| 217 |     //     -? 0 | 
| 218 |     //     -? digit1-9 digits? | 
| 219 |     // | 
| 220 |     // digits | 
| 221 |     //     digit digits? | 
| 222 |     // | 
| 223 |     // -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? | 
| 224 |  | 
| 225 |     if (m_ptr < m_end && *m_ptr == '-') // -? | 
| 226 |         ++m_ptr; | 
| 227 |      | 
| 228 |     // (0 | [1-9][0-9]*) | 
| 229 |     if (m_ptr < m_end && *m_ptr == '0') // 0 | 
| 230 |         ++m_ptr; | 
| 231 |     else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] | 
| 232 |         ++m_ptr; | 
| 233 |         // [0-9]* | 
| 234 |         while (m_ptr < m_end && isASCIIDigit(c: *m_ptr)) | 
| 235 |             ++m_ptr; | 
| 236 |     } else | 
| 237 |         return TokError; | 
| 238 |  | 
| 239 |     // ('.' [0-9]+)? | 
| 240 |     if (m_ptr < m_end && *m_ptr == '.') { | 
| 241 |         ++m_ptr; | 
| 242 |         // [0-9]+ | 
| 243 |         if (m_ptr >= m_end || !isASCIIDigit(c: *m_ptr)) | 
| 244 |             return TokError; | 
| 245 |  | 
| 246 |         ++m_ptr; | 
| 247 |         while (m_ptr < m_end && isASCIIDigit(c: *m_ptr)) | 
| 248 |             ++m_ptr; | 
| 249 |     } | 
| 250 |  | 
| 251 |     //  ([eE][+-]? [0-9]+)? | 
| 252 |     if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] | 
| 253 |         ++m_ptr; | 
| 254 |  | 
| 255 |         // [-+]? | 
| 256 |         if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) | 
| 257 |             ++m_ptr; | 
| 258 |  | 
| 259 |         // [0-9]+ | 
| 260 |         if (m_ptr >= m_end || !isASCIIDigit(c: *m_ptr)) | 
| 261 |             return TokError; | 
| 262 |          | 
| 263 |         ++m_ptr; | 
| 264 |         while (m_ptr < m_end && isASCIIDigit(c: *m_ptr)) | 
| 265 |             ++m_ptr; | 
| 266 |     } | 
| 267 |      | 
| 268 |     token.type = TokNumber; | 
| 269 |     token.end = m_ptr; | 
| 270 |     Vector<char, 64> buffer(token.end - token.start + 1); | 
| 271 |     int i; | 
| 272 |     for (i = 0; i < token.end - token.start; i++) { | 
| 273 |         ASSERT(static_cast<char>(token.start[i]) == token.start[i]); | 
| 274 |         buffer[i] = static_cast<char>(token.start[i]); | 
| 275 |     } | 
| 276 |     buffer[i] = 0; | 
| 277 |     char* end; | 
| 278 |     token.numberToken = WTF::strtod(s00: buffer.data(), se: &end); | 
| 279 |     ASSERT(buffer.data() + (token.end - token.start) == end); | 
| 280 |     return TokNumber; | 
| 281 | } | 
| 282 |  | 
| 283 | JSValue LiteralParser::parse(ParserState initialState) | 
| 284 | { | 
| 285 |     ParserState state = initialState; | 
| 286 |     MarkedArgumentBuffer objectStack; | 
| 287 |     JSValue lastValue; | 
| 288 |     Vector<ParserState, 16> stateStack; | 
| 289 |     Vector<Identifier, 16> identifierStack; | 
| 290 |     while (1) { | 
| 291 |         switch(state) { | 
| 292 |             startParseArray: | 
| 293 |             case StartParseArray: { | 
| 294 |                 JSArray* array = constructEmptyArray(exec: m_exec); | 
| 295 |                 objectStack.append(v: array); | 
| 296 |                 // fallthrough | 
| 297 |             } | 
| 298 |             doParseArrayStartExpression: | 
| 299 |             case DoParseArrayStartExpression: { | 
| 300 |                 TokenType lastToken = m_lexer.currentToken().type; | 
| 301 |                 if (m_lexer.next() == TokRBracket) { | 
| 302 |                     if (lastToken == TokComma) | 
| 303 |                         return JSValue(); | 
| 304 |                     m_lexer.next(); | 
| 305 |                     lastValue = objectStack.last(); | 
| 306 |                     objectStack.removeLast(); | 
| 307 |                     break; | 
| 308 |                 } | 
| 309 |  | 
| 310 |                 stateStack.append(val: DoParseArrayEndExpression); | 
| 311 |                 goto startParseExpression; | 
| 312 |             } | 
| 313 |             case DoParseArrayEndExpression: { | 
| 314 |                  asArray(value: objectStack.last())->push(m_exec, lastValue); | 
| 315 |                  | 
| 316 |                 if (m_lexer.currentToken().type == TokComma) | 
| 317 |                     goto doParseArrayStartExpression; | 
| 318 |  | 
| 319 |                 if (m_lexer.currentToken().type != TokRBracket) | 
| 320 |                     return JSValue(); | 
| 321 |                  | 
| 322 |                 m_lexer.next(); | 
| 323 |                 lastValue = objectStack.last(); | 
| 324 |                 objectStack.removeLast(); | 
| 325 |                 break; | 
| 326 |             } | 
| 327 |             startParseObject: | 
| 328 |             case StartParseObject: { | 
| 329 |                 JSObject* object = constructEmptyObject(exec: m_exec); | 
| 330 |                 objectStack.append(v: object); | 
| 331 |  | 
| 332 |                 TokenType type = m_lexer.next(); | 
| 333 |                 if (type == TokString) { | 
| 334 |                     Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); | 
| 335 |  | 
| 336 |                     // Check for colon | 
| 337 |                     if (m_lexer.next() != TokColon) | 
| 338 |                         return JSValue(); | 
| 339 |                      | 
| 340 |                     m_lexer.next(); | 
| 341 |                     identifierStack.append(val: Identifier(m_exec, identifierToken.stringToken)); | 
| 342 |                     stateStack.append(val: DoParseObjectEndExpression); | 
| 343 |                     goto startParseExpression; | 
| 344 |                 } else if (type != TokRBrace)  | 
| 345 |                     return JSValue(); | 
| 346 |                 m_lexer.next(); | 
| 347 |                 lastValue = objectStack.last(); | 
| 348 |                 objectStack.removeLast(); | 
| 349 |                 break; | 
| 350 |             } | 
| 351 |             doParseObjectStartExpression: | 
| 352 |             case DoParseObjectStartExpression: { | 
| 353 |                 TokenType type = m_lexer.next(); | 
| 354 |                 if (type != TokString) | 
| 355 |                     return JSValue(); | 
| 356 |                 Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); | 
| 357 |  | 
| 358 |                 // Check for colon | 
| 359 |                 if (m_lexer.next() != TokColon) | 
| 360 |                     return JSValue(); | 
| 361 |  | 
| 362 |                 m_lexer.next(); | 
| 363 |                 identifierStack.append(val: Identifier(m_exec, identifierToken.stringToken)); | 
| 364 |                 stateStack.append(val: DoParseObjectEndExpression); | 
| 365 |                 goto startParseExpression; | 
| 366 |             } | 
| 367 |             case DoParseObjectEndExpression: | 
| 368 |             { | 
| 369 |                 asObject(value: objectStack.last())->putDirect(propertyName: identifierStack.last(), value: lastValue); | 
| 370 |                 identifierStack.removeLast(); | 
| 371 |                 if (m_lexer.currentToken().type == TokComma) | 
| 372 |                     goto doParseObjectStartExpression; | 
| 373 |                 if (m_lexer.currentToken().type != TokRBrace) | 
| 374 |                     return JSValue(); | 
| 375 |                 m_lexer.next(); | 
| 376 |                 lastValue = objectStack.last(); | 
| 377 |                 objectStack.removeLast(); | 
| 378 |                 break; | 
| 379 |             } | 
| 380 |             startParseExpression: | 
| 381 |             case StartParseExpression: { | 
| 382 |                 switch (m_lexer.currentToken().type) { | 
| 383 |                     case TokLBracket: | 
| 384 |                         goto startParseArray; | 
| 385 |                     case TokLBrace: | 
| 386 |                         goto startParseObject; | 
| 387 |                     case TokString: { | 
| 388 |                         Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); | 
| 389 |                         m_lexer.next(); | 
| 390 |                         lastValue = jsString(exec: m_exec, s: stringToken.stringToken); | 
| 391 |                         break; | 
| 392 |                     } | 
| 393 |                     case TokNumber: { | 
| 394 |                         Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); | 
| 395 |                         m_lexer.next(); | 
| 396 |                         lastValue = jsNumber(exec: m_exec, d: numberToken.numberToken); | 
| 397 |                         break; | 
| 398 |                     } | 
| 399 |                     case TokNull: | 
| 400 |                         m_lexer.next(); | 
| 401 |                         lastValue = jsNull(); | 
| 402 |                         break; | 
| 403 |  | 
| 404 |                     case TokTrue: | 
| 405 |                         m_lexer.next(); | 
| 406 |                         lastValue = jsBoolean(b: true); | 
| 407 |                         break; | 
| 408 |  | 
| 409 |                     case TokFalse: | 
| 410 |                         m_lexer.next(); | 
| 411 |                         lastValue = jsBoolean(b: false); | 
| 412 |                         break; | 
| 413 |  | 
| 414 |                     default: | 
| 415 |                         // Error | 
| 416 |                         return JSValue(); | 
| 417 |                 } | 
| 418 |                 break; | 
| 419 |             } | 
| 420 |             case StartParseStatement: { | 
| 421 |                 switch (m_lexer.currentToken().type) { | 
| 422 |                     case TokLBracket: | 
| 423 |                     case TokNumber: | 
| 424 |                     case TokString: | 
| 425 |                         goto startParseExpression; | 
| 426 |  | 
| 427 |                     case TokLParen: { | 
| 428 |                         m_lexer.next(); | 
| 429 |                         stateStack.append(val: StartParseStatementEndStatement); | 
| 430 |                         goto startParseExpression; | 
| 431 |                     } | 
| 432 |                     default: | 
| 433 |                         return JSValue(); | 
| 434 |                 } | 
| 435 |             } | 
| 436 |             case StartParseStatementEndStatement: { | 
| 437 |                 ASSERT(stateStack.isEmpty()); | 
| 438 |                 if (m_lexer.currentToken().type != TokRParen) | 
| 439 |                     return JSValue(); | 
| 440 |                 if (m_lexer.next() == TokEnd) | 
| 441 |                     return lastValue; | 
| 442 |                 return JSValue(); | 
| 443 |             } | 
| 444 |             default: | 
| 445 |                 ASSERT_NOT_REACHED(); | 
| 446 |         } | 
| 447 |         if (stateStack.isEmpty()) | 
| 448 |             return lastValue; | 
| 449 |         state = stateStack.last(); | 
| 450 |         stateStack.removeLast(); | 
| 451 |         continue; | 
| 452 |     } | 
| 453 | } | 
| 454 |  | 
| 455 | } | 
| 456 |  |