| 1 | /* | 
| 2 |  * Copyright (C) 2009 Apple Inc. All rights reserved. | 
| 3 |  * | 
| 4 |  * Redistribution and use in source and binary forms, with or without | 
| 5 |  * modification, are permitted provided that the following conditions | 
| 6 |  * are met: | 
| 7 |  * 1. Redistributions of source code must retain the above copyright | 
| 8 |  *    notice, this list of conditions and the following disclaimer. | 
| 9 |  * 2. Redistributions in binary form must reproduce the above copyright | 
| 10 |  *    notice, this list of conditions and the following disclaimer in the | 
| 11 |  *    documentation and/or other materials provided with the distribution. | 
| 12 |  * | 
| 13 |  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 
| 14 |  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 15 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 
| 16 |  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR | 
| 17 |  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 
| 18 |  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 
| 19 |  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| 20 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 
| 21 |  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
| 22 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
| 23 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| 24 |  */ | 
| 25 |  | 
| 26 | #include "config.h" | 
| 27 | #include "RegexInterpreter.h" | 
| 28 |  | 
| 29 | #include "RegexCompiler.h" | 
| 30 | #include "RegexPattern.h" | 
| 31 |  | 
| 32 | #ifndef NDEBUG | 
| 33 | #include <stdio.h> | 
| 34 | #endif | 
| 35 |  | 
| 36 | #if ENABLE(YARR) | 
| 37 |  | 
| 38 | using namespace WTF; | 
| 39 |  | 
| 40 | namespace JSC { namespace Yarr { | 
| 41 |  | 
| 42 | class Interpreter { | 
| 43 | public: | 
| 44 |     struct ParenthesesDisjunctionContext; | 
| 45 |  | 
| 46 |     struct BackTrackInfoPatternCharacter { | 
| 47 |         uintptr_t matchAmount; | 
| 48 |     }; | 
| 49 |     struct BackTrackInfoCharacterClass { | 
| 50 |         uintptr_t matchAmount; | 
| 51 |     }; | 
| 52 |     struct BackTrackInfoBackReference { | 
| 53 |         uintptr_t begin; // Not really needed for greedy quantifiers. | 
| 54 |         uintptr_t matchAmount; // Not really needed for fixed quantifiers. | 
| 55 |     }; | 
| 56 |     struct BackTrackInfoAlternative { | 
| 57 |         uintptr_t offset; | 
| 58 |     }; | 
| 59 |     struct BackTrackInfoParentheticalAssertion { | 
| 60 |         uintptr_t begin; | 
| 61 |     }; | 
| 62 |     struct BackTrackInfoParenthesesOnce { | 
| 63 |         uintptr_t inParentheses; | 
| 64 |     }; | 
| 65 |     struct BackTrackInfoParentheses { | 
| 66 |         uintptr_t matchAmount; | 
| 67 |         ParenthesesDisjunctionContext* lastContext; | 
| 68 |         uintptr_t prevBegin; | 
| 69 |         uintptr_t prevEnd; | 
| 70 |     }; | 
| 71 |  | 
| 72 |     static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context) | 
| 73 |     { | 
| 74 |         context->next = backTrack->lastContext; | 
| 75 |         backTrack->lastContext = context; | 
| 76 |         ++backTrack->matchAmount; | 
| 77 |     } | 
| 78 |  | 
| 79 |     static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack) | 
| 80 |     { | 
| 81 |         ASSERT(backTrack->matchAmount); | 
| 82 |         ASSERT(backTrack->lastContext); | 
| 83 |         backTrack->lastContext = backTrack->lastContext->next; | 
| 84 |         --backTrack->matchAmount; | 
| 85 |     } | 
| 86 |  | 
| 87 |     struct DisjunctionContext | 
| 88 |     { | 
| 89 |         DisjunctionContext() | 
| 90 |             : term(0) | 
| 91 |         { | 
| 92 |         } | 
| 93 |  | 
| 94 |         void* operator new(size_t, void* where) | 
| 95 |         { | 
| 96 |             return where; | 
| 97 |         } | 
| 98 |  | 
| 99 |         int term; | 
| 100 |         unsigned matchBegin; | 
| 101 |         unsigned matchEnd; | 
| 102 |         uintptr_t frame[1]; | 
| 103 |     }; | 
| 104 |  | 
| 105 |     DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction) | 
| 106 |     { | 
| 107 |         return new(malloc(size: sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext(); | 
| 108 |     } | 
| 109 |  | 
| 110 |     void freeDisjunctionContext(DisjunctionContext* context) | 
| 111 |     { | 
| 112 |         free(ptr: context); | 
| 113 |     } | 
| 114 |  | 
| 115 |     struct ParenthesesDisjunctionContext | 
| 116 |     { | 
| 117 |         ParenthesesDisjunctionContext(int* output, ByteTerm& term) | 
| 118 |             : next(0) | 
| 119 |         { | 
| 120 |             unsigned firstSubpatternId = term.atom.subpatternId; | 
| 121 |             unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns; | 
| 122 |  | 
| 123 |             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) { | 
| 124 |                 subpatternBackup[i] = output[(firstSubpatternId << 1) + i]; | 
| 125 |                 output[(firstSubpatternId << 1) + i] = -1; | 
| 126 |             } | 
| 127 |  | 
| 128 |             new(getDisjunctionContext(term)) DisjunctionContext(); | 
| 129 |         } | 
| 130 |  | 
| 131 |         void* operator new(size_t, void* where) | 
| 132 |         { | 
| 133 |             return where; | 
| 134 |         } | 
| 135 |  | 
| 136 |         void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns) | 
| 137 |         { | 
| 138 |             for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) | 
| 139 |                 output[(firstSubpatternId << 1) + i] = subpatternBackup[i]; | 
| 140 |         } | 
| 141 |  | 
| 142 |         DisjunctionContext* getDisjunctionContext(ByteTerm& term) | 
| 143 |         { | 
| 144 |             return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1])); | 
| 145 |         } | 
| 146 |  | 
| 147 |         ParenthesesDisjunctionContext* next; | 
| 148 |         int subpatternBackup[1]; | 
| 149 |     }; | 
| 150 |  | 
| 151 |     ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term) | 
| 152 |     { | 
| 153 |         return new(malloc(size: sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term); | 
| 154 |     } | 
| 155 |  | 
| 156 |     void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context) | 
| 157 |     { | 
| 158 |         free(ptr: context); | 
| 159 |     } | 
| 160 |  | 
| 161 |     class InputStream { | 
| 162 |     public: | 
| 163 |         InputStream(const UChar* input, unsigned start, unsigned length) | 
| 164 |             : input(input) | 
| 165 |             , pos(start) | 
| 166 |             , length(length) | 
| 167 |         { | 
| 168 |         } | 
| 169 |  | 
| 170 |         void next() | 
| 171 |         { | 
| 172 |             ++pos; | 
| 173 |         } | 
| 174 |  | 
| 175 |         void rewind(unsigned amount) | 
| 176 |         { | 
| 177 |             ASSERT(pos >= amount); | 
| 178 |             pos -= amount; | 
| 179 |         } | 
| 180 |  | 
| 181 |         int read() | 
| 182 |         { | 
| 183 |             ASSERT(pos < length); | 
| 184 |             if (pos < length) | 
| 185 |                 return input[pos]; | 
| 186 |             return -1; | 
| 187 |         } | 
| 188 |  | 
| 189 |         int readChecked(int position) | 
| 190 |         { | 
| 191 |             ASSERT(position < 0); | 
| 192 |             ASSERT((unsigned)-position <= pos); | 
| 193 |             unsigned p = pos + position; | 
| 194 |             ASSERT(p < length); | 
| 195 |             return input[p]; | 
| 196 |         } | 
| 197 |  | 
| 198 |         int reread(unsigned from) | 
| 199 |         { | 
| 200 |             ASSERT(from < length); | 
| 201 |             return input[from]; | 
| 202 |         } | 
| 203 |  | 
| 204 |         int prev() | 
| 205 |         { | 
| 206 |             ASSERT(!(pos > length)); | 
| 207 |             if (pos && length) | 
| 208 |                 return input[pos - 1]; | 
| 209 |             return -1; | 
| 210 |         } | 
| 211 |  | 
| 212 |         unsigned getPos() | 
| 213 |         { | 
| 214 |             return pos; | 
| 215 |         } | 
| 216 |  | 
| 217 |         void setPos(unsigned p) | 
| 218 |         { | 
| 219 |             pos = p; | 
| 220 |         } | 
| 221 |  | 
| 222 |         bool atStart() | 
| 223 |         { | 
| 224 |             return pos == 0; | 
| 225 |         } | 
| 226 |  | 
| 227 |         bool atEnd() | 
| 228 |         { | 
| 229 |             return pos == length; | 
| 230 |         } | 
| 231 |  | 
| 232 |         bool checkInput(int count) | 
| 233 |         { | 
| 234 |             if ((pos + count) <= length) { | 
| 235 |                 pos += count; | 
| 236 |                 return true; | 
| 237 |             } else | 
| 238 |                 return false; | 
| 239 |         } | 
| 240 |  | 
| 241 |         void uncheckInput(int count) | 
| 242 |         { | 
| 243 |             pos -= count; | 
| 244 |         } | 
| 245 |  | 
| 246 |         bool atStart(int position) | 
| 247 |         { | 
| 248 |             return (pos + position) == 0; | 
| 249 |         } | 
| 250 |  | 
| 251 |         bool atEnd(int position) | 
| 252 |         { | 
| 253 |             return (pos + position) == length; | 
| 254 |         } | 
| 255 |  | 
| 256 |     private: | 
| 257 |         const UChar* input; | 
| 258 |         unsigned pos; | 
| 259 |         unsigned length; | 
| 260 |     }; | 
| 261 |  | 
| 262 |     bool testCharacterClass(CharacterClass* characterClass, int ch) | 
| 263 |     { | 
| 264 |         if (ch & 0xFF80) { | 
| 265 |             for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i) | 
| 266 |                 if (ch == characterClass->m_matchesUnicode[i]) | 
| 267 |                     return true; | 
| 268 |             for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i) | 
| 269 |                 if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end)) | 
| 270 |                     return true; | 
| 271 |         } else { | 
| 272 |             for (unsigned i = 0; i < characterClass->m_matches.size(); ++i) | 
| 273 |                 if (ch == characterClass->m_matches[i]) | 
| 274 |                     return true; | 
| 275 |             for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i) | 
| 276 |                 if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end)) | 
| 277 |                     return true; | 
| 278 |         } | 
| 279 |  | 
| 280 |         return false; | 
| 281 |     } | 
| 282 |  | 
| 283 |     bool tryConsumeCharacter(int testChar) | 
| 284 |     { | 
| 285 |         if (input.atEnd()) | 
| 286 |             return false; | 
| 287 |  | 
| 288 |         int ch = input.read(); | 
| 289 |  | 
| 290 |         if (pattern->m_ignoreCase ? ((Unicode::toLower(ch: testChar) == ch) || (Unicode::toUpper(ch: testChar) == ch)) : (testChar == ch)) { | 
| 291 |             input.next(); | 
| 292 |             return true; | 
| 293 |         } | 
| 294 |         return false; | 
| 295 |     } | 
| 296 |  | 
| 297 |     bool checkCharacter(int testChar, int inputPosition) | 
| 298 |     { | 
| 299 |         return testChar == input.readChecked(position: inputPosition); | 
| 300 |     } | 
| 301 |  | 
| 302 |     bool checkCasedCharacter(int loChar, int hiChar, int inputPosition) | 
| 303 |     { | 
| 304 |         int ch = input.readChecked(position: inputPosition); | 
| 305 |         return (loChar == ch) || (hiChar == ch); | 
| 306 |     } | 
| 307 |  | 
| 308 |     bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert) | 
| 309 |     { | 
| 310 |         if (input.atEnd()) | 
| 311 |             return false; | 
| 312 |  | 
| 313 |         bool match = testCharacterClass(characterClass, ch: input.read()); | 
| 314 |  | 
| 315 |         if (invert) | 
| 316 |             match = !match; | 
| 317 |  | 
| 318 |         if (match) { | 
| 319 |             input.next(); | 
| 320 |             return true; | 
| 321 |         } | 
| 322 |         return false; | 
| 323 |     } | 
| 324 |  | 
| 325 |     bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition) | 
| 326 |     { | 
| 327 |         bool match = testCharacterClass(characterClass, ch: input.readChecked(position: inputPosition)); | 
| 328 |         return invert ? !match : match; | 
| 329 |     } | 
| 330 |  | 
| 331 |     bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset) | 
| 332 |     { | 
| 333 |         int matchSize = matchEnd - matchBegin; | 
| 334 |  | 
| 335 |         if (!input.checkInput(count: matchSize)) | 
| 336 |             return false; | 
| 337 |  | 
| 338 |         for (int i = 0; i < matchSize; ++i) { | 
| 339 |             if (!checkCharacter(testChar: input.reread(from: matchBegin + i), inputPosition: inputOffset - matchSize + i)) { | 
| 340 |                 input.uncheckInput(count: matchSize); | 
| 341 |                 return false; | 
| 342 |             } | 
| 343 |         } | 
| 344 |  | 
| 345 |         return true; | 
| 346 |     } | 
| 347 |  | 
| 348 |     bool matchAssertionBOL(ByteTerm& term) | 
| 349 |     { | 
| 350 |         return (input.atStart(position: term.inputPosition)) || (pattern->m_multiline && testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.readChecked(position: term.inputPosition - 1))); | 
| 351 |     } | 
| 352 |  | 
| 353 |     bool matchAssertionEOL(ByteTerm& term) | 
| 354 |     { | 
| 355 |         if (term.inputPosition) | 
| 356 |             return (input.atEnd(position: term.inputPosition)) || (pattern->m_multiline && testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.readChecked(position: term.inputPosition))); | 
| 357 |         else | 
| 358 |             return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.read())); | 
| 359 |     } | 
| 360 |  | 
| 361 |     bool matchAssertionWordBoundary(ByteTerm& term) | 
| 362 |     { | 
| 363 |         bool prevIsWordchar = !input.atStart(position: term.inputPosition) && testCharacterClass(characterClass: pattern->wordcharCharacterClass, ch: input.readChecked(position: term.inputPosition - 1)); | 
| 364 |         bool readIsWordchar; | 
| 365 |         if (term.inputPosition) | 
| 366 |             readIsWordchar = !input.atEnd(position: term.inputPosition) && testCharacterClass(characterClass: pattern->wordcharCharacterClass, ch: input.readChecked(position: term.inputPosition)); | 
| 367 |         else | 
| 368 |             readIsWordchar = !input.atEnd() && testCharacterClass(characterClass: pattern->wordcharCharacterClass, ch: input.read()); | 
| 369 |  | 
| 370 |         bool wordBoundary = prevIsWordchar != readIsWordchar; | 
| 371 |         return term.invert() ? !wordBoundary : wordBoundary; | 
| 372 |     } | 
| 373 |  | 
| 374 |     bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context) | 
| 375 |     { | 
| 376 |         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); | 
| 377 |  | 
| 378 |         switch (term.atom.quantityType) { | 
| 379 |         case QuantifierFixedCount: | 
| 380 |             break; | 
| 381 |  | 
| 382 |         case QuantifierGreedy: | 
| 383 |             if (backTrack->matchAmount) { | 
| 384 |                 --backTrack->matchAmount; | 
| 385 |                 input.uncheckInput(count: 1); | 
| 386 |                 return true; | 
| 387 |             } | 
| 388 |             break; | 
| 389 |  | 
| 390 |         case QuantifierNonGreedy: | 
| 391 |             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(count: 1)) { | 
| 392 |                 ++backTrack->matchAmount; | 
| 393 |                 if (checkCharacter(testChar: term.atom.patternCharacter, inputPosition: term.inputPosition - 1)) | 
| 394 |                     return true; | 
| 395 |             } | 
| 396 |             input.uncheckInput(count: backTrack->matchAmount); | 
| 397 |             break; | 
| 398 |         } | 
| 399 |  | 
| 400 |         return false; | 
| 401 |     } | 
| 402 |  | 
| 403 |     bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context) | 
| 404 |     { | 
| 405 |         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); | 
| 406 |  | 
| 407 |         switch (term.atom.quantityType) { | 
| 408 |         case QuantifierFixedCount: | 
| 409 |             break; | 
| 410 |  | 
| 411 |         case QuantifierGreedy: | 
| 412 |             if (backTrack->matchAmount) { | 
| 413 |                 --backTrack->matchAmount; | 
| 414 |                 input.uncheckInput(count: 1); | 
| 415 |                 return true; | 
| 416 |             } | 
| 417 |             break; | 
| 418 |  | 
| 419 |         case QuantifierNonGreedy: | 
| 420 |             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(count: 1)) { | 
| 421 |                 ++backTrack->matchAmount; | 
| 422 |                 if (checkCasedCharacter(loChar: term.atom.casedCharacter.lo, hiChar: term.atom.casedCharacter.hi, inputPosition: term.inputPosition - 1)) | 
| 423 |                     return true; | 
| 424 |             } | 
| 425 |             input.uncheckInput(count: backTrack->matchAmount); | 
| 426 |             break; | 
| 427 |         } | 
| 428 |  | 
| 429 |         return false; | 
| 430 |     } | 
| 431 |  | 
| 432 |     bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context) | 
| 433 |     { | 
| 434 |         ASSERT(term.type == ByteTerm::TypeCharacterClass); | 
| 435 |         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); | 
| 436 |  | 
| 437 |         switch (term.atom.quantityType) { | 
| 438 |         case QuantifierFixedCount: { | 
| 439 |             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { | 
| 440 |                 if (!checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), inputPosition: term.inputPosition + matchAmount)) | 
| 441 |                     return false; | 
| 442 |             } | 
| 443 |             return true; | 
| 444 |         } | 
| 445 |  | 
| 446 |         case QuantifierGreedy: { | 
| 447 |             unsigned matchAmount = 0; | 
| 448 |             while ((matchAmount < term.atom.quantityCount) && input.checkInput(count: 1)) { | 
| 449 |                 if (!checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), inputPosition: term.inputPosition - 1)) { | 
| 450 |                     input.uncheckInput(count: 1); | 
| 451 |                     break; | 
| 452 |                 } | 
| 453 |                 ++matchAmount; | 
| 454 |             } | 
| 455 |             backTrack->matchAmount = matchAmount; | 
| 456 |  | 
| 457 |             return true; | 
| 458 |         } | 
| 459 |  | 
| 460 |         case QuantifierNonGreedy: | 
| 461 |             backTrack->matchAmount = 0; | 
| 462 |             return true; | 
| 463 |         } | 
| 464 |  | 
| 465 |         ASSERT_NOT_REACHED(); | 
| 466 |         return false; | 
| 467 |     } | 
| 468 |  | 
| 469 |     bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context) | 
| 470 |     { | 
| 471 |         ASSERT(term.type == ByteTerm::TypeCharacterClass); | 
| 472 |         BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation); | 
| 473 |  | 
| 474 |         switch (term.atom.quantityType) { | 
| 475 |         case QuantifierFixedCount: | 
| 476 |             break; | 
| 477 |  | 
| 478 |         case QuantifierGreedy: | 
| 479 |             if (backTrack->matchAmount) { | 
| 480 |                 --backTrack->matchAmount; | 
| 481 |                 input.uncheckInput(count: 1); | 
| 482 |                 return true; | 
| 483 |             } | 
| 484 |             break; | 
| 485 |  | 
| 486 |         case QuantifierNonGreedy: | 
| 487 |             if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(count: 1)) { | 
| 488 |                 ++backTrack->matchAmount; | 
| 489 |                 if (checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), inputPosition: term.inputPosition - 1)) | 
| 490 |                     return true; | 
| 491 |             } | 
| 492 |             input.uncheckInput(count: backTrack->matchAmount); | 
| 493 |             break; | 
| 494 |         } | 
| 495 |  | 
| 496 |         return false; | 
| 497 |     } | 
| 498 |  | 
| 499 |     bool matchBackReference(ByteTerm& term, DisjunctionContext* context) | 
| 500 |     { | 
| 501 |         ASSERT(term.type == ByteTerm::TypeBackReference); | 
| 502 |         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); | 
| 503 |  | 
| 504 |         int matchBegin = output[(term.atom.subpatternId << 1)]; | 
| 505 |         int matchEnd = output[(term.atom.subpatternId << 1) + 1]; | 
| 506 |         ASSERT((matchBegin == -1) == (matchEnd == -1)); | 
| 507 |         ASSERT(matchBegin <= matchEnd); | 
| 508 |  | 
| 509 |         if (matchBegin == matchEnd) | 
| 510 |             return true; | 
| 511 |  | 
| 512 |         switch (term.atom.quantityType) { | 
| 513 |         case QuantifierFixedCount: { | 
| 514 |             backTrack->begin = input.getPos(); | 
| 515 |             for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) { | 
| 516 |                 if (!tryConsumeBackReference(matchBegin, matchEnd, inputOffset: term.inputPosition)) { | 
| 517 |                     input.setPos(backTrack->begin); | 
| 518 |                     return false; | 
| 519 |                 } | 
| 520 |             } | 
| 521 |             return true; | 
| 522 |         } | 
| 523 |  | 
| 524 |         case QuantifierGreedy: { | 
| 525 |             unsigned matchAmount = 0; | 
| 526 |             while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, inputOffset: term.inputPosition)) | 
| 527 |                 ++matchAmount; | 
| 528 |             backTrack->matchAmount = matchAmount; | 
| 529 |             return true; | 
| 530 |         } | 
| 531 |  | 
| 532 |         case QuantifierNonGreedy: | 
| 533 |             backTrack->begin = input.getPos(); | 
| 534 |             backTrack->matchAmount = 0; | 
| 535 |             return true; | 
| 536 |         } | 
| 537 |  | 
| 538 |         ASSERT_NOT_REACHED(); | 
| 539 |         return false; | 
| 540 |     } | 
| 541 |  | 
| 542 |     bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context) | 
| 543 |     { | 
| 544 |         ASSERT(term.type == ByteTerm::TypeBackReference); | 
| 545 |         BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation); | 
| 546 |  | 
| 547 |         int matchBegin = output[(term.atom.subpatternId << 1)]; | 
| 548 |         int matchEnd = output[(term.atom.subpatternId << 1) + 1]; | 
| 549 |         ASSERT((matchBegin == -1) == (matchEnd == -1)); | 
| 550 |         ASSERT(matchBegin <= matchEnd); | 
| 551 |  | 
| 552 |         if (matchBegin == matchEnd) | 
| 553 |             return false; | 
| 554 |  | 
| 555 |         switch (term.atom.quantityType) { | 
| 556 |         case QuantifierFixedCount: | 
| 557 |             // for quantityCount == 1, could rewind. | 
| 558 |             input.setPos(backTrack->begin); | 
| 559 |             break; | 
| 560 |  | 
| 561 |         case QuantifierGreedy: | 
| 562 |             if (backTrack->matchAmount) { | 
| 563 |                 --backTrack->matchAmount; | 
| 564 |                 input.rewind(amount: matchEnd - matchBegin); | 
| 565 |                 return true; | 
| 566 |             } | 
| 567 |             break; | 
| 568 |  | 
| 569 |         case QuantifierNonGreedy: | 
| 570 |             if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, inputOffset: term.inputPosition)) { | 
| 571 |                 ++backTrack->matchAmount; | 
| 572 |                 return true; | 
| 573 |             } else | 
| 574 |                 input.setPos(backTrack->begin); | 
| 575 |             break; | 
| 576 |         } | 
| 577 |  | 
| 578 |         return false; | 
| 579 |     } | 
| 580 |  | 
| 581 |     void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context) | 
| 582 |     { | 
| 583 |         if (term.capture()) { | 
| 584 |             unsigned subpatternId = term.atom.subpatternId; | 
| 585 |             output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition; | 
| 586 |             output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition; | 
| 587 |         } | 
| 588 |     } | 
| 589 |     void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context) | 
| 590 |     { | 
| 591 |         unsigned firstSubpatternId = term.atom.subpatternId; | 
| 592 |         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; | 
| 593 |         context->restoreOutput(output, firstSubpatternId, numNestedSubpatterns: count); | 
| 594 |     } | 
| 595 |     void resetAssertionMatches(ByteTerm& term) | 
| 596 |     { | 
| 597 |         unsigned firstSubpatternId = term.atom.subpatternId; | 
| 598 |         unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns; | 
| 599 |         for (unsigned i = 0; i < (count << 1); ++i) | 
| 600 |             output[(firstSubpatternId << 1) + i] = -1; | 
| 601 |     } | 
| 602 |     bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack) | 
| 603 |     { | 
| 604 |         while (backTrack->matchAmount) { | 
| 605 |             ParenthesesDisjunctionContext* context = backTrack->lastContext; | 
| 606 |  | 
| 607 |             if (matchDisjunction(disjunction: term.atom.parenthesesDisjunction, context: context->getDisjunctionContext(term), btrack: true)) | 
| 608 |                 return true; | 
| 609 |  | 
| 610 |             resetMatches(term, context); | 
| 611 |             popParenthesesDisjunctionContext(backTrack); | 
| 612 |             freeParenthesesDisjunctionContext(context); | 
| 613 |         } | 
| 614 |  | 
| 615 |         return false; | 
| 616 |     } | 
| 617 |  | 
| 618 |     bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) | 
| 619 |     { | 
| 620 |         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); | 
| 621 |         ASSERT(term.atom.quantityCount == 1); | 
| 622 |  | 
| 623 |         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); | 
| 624 |  | 
| 625 |         switch (term.atom.quantityType) { | 
| 626 |         case QuantifierGreedy: { | 
| 627 |             // set this speculatively; if we get to the parens end this will be true. | 
| 628 |             backTrack->inParentheses = 1; | 
| 629 |             break; | 
| 630 |         } | 
| 631 |         case QuantifierNonGreedy: { | 
| 632 |             backTrack->inParentheses = 0; | 
| 633 |             context->term += term.atom.parenthesesWidth; | 
| 634 |             return true; | 
| 635 |         } | 
| 636 |         case QuantifierFixedCount: | 
| 637 |             break; | 
| 638 |         } | 
| 639 |  | 
| 640 |         if (term.capture()) { | 
| 641 |             unsigned subpatternId = term.atom.subpatternId; | 
| 642 |             output[(subpatternId << 1)] = input.getPos() + term.inputPosition; | 
| 643 |         } | 
| 644 |  | 
| 645 |         return true; | 
| 646 |     } | 
| 647 |  | 
| 648 |     bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*) | 
| 649 |     { | 
| 650 |         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); | 
| 651 |         ASSERT(term.atom.quantityCount == 1); | 
| 652 |  | 
| 653 |         if (term.capture()) { | 
| 654 |             unsigned subpatternId = term.atom.subpatternId; | 
| 655 |             output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; | 
| 656 |         } | 
| 657 |         return true; | 
| 658 |     } | 
| 659 |  | 
| 660 |     bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context) | 
| 661 |     { | 
| 662 |         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); | 
| 663 |         ASSERT(term.atom.quantityCount == 1); | 
| 664 |  | 
| 665 |         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); | 
| 666 |  | 
| 667 |         if (term.capture()) { | 
| 668 |             unsigned subpatternId = term.atom.subpatternId; | 
| 669 |             output[(subpatternId << 1)] = -1; | 
| 670 |             output[(subpatternId << 1) + 1] = -1; | 
| 671 |         } | 
| 672 |  | 
| 673 |         switch (term.atom.quantityType) { | 
| 674 |         case QuantifierGreedy: | 
| 675 |             // if we backtrack to this point, there is another chance - try matching nothing. | 
| 676 |             ASSERT(backTrack->inParentheses); | 
| 677 |             backTrack->inParentheses = 0; | 
| 678 |             context->term += term.atom.parenthesesWidth; | 
| 679 |             return true; | 
| 680 |         case QuantifierNonGreedy: | 
| 681 |             ASSERT(backTrack->inParentheses); | 
| 682 |         case QuantifierFixedCount: | 
| 683 |             break; | 
| 684 |         } | 
| 685 |  | 
| 686 |         return false; | 
| 687 |     } | 
| 688 |  | 
| 689 |     bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context) | 
| 690 |     { | 
| 691 |         ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd); | 
| 692 |         ASSERT(term.atom.quantityCount == 1); | 
| 693 |  | 
| 694 |         BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation); | 
| 695 |  | 
| 696 |         switch (term.atom.quantityType) { | 
| 697 |         case QuantifierGreedy: | 
| 698 |             if (!backTrack->inParentheses) { | 
| 699 |                 context->term -= term.atom.parenthesesWidth; | 
| 700 |                 return false; | 
| 701 |             } | 
| 702 |         case QuantifierNonGreedy: | 
| 703 |             if (!backTrack->inParentheses) { | 
| 704 |                 // now try to match the parens; set this speculatively. | 
| 705 |                 backTrack->inParentheses = 1; | 
| 706 |                 if (term.capture()) { | 
| 707 |                     unsigned subpatternId = term.atom.subpatternId; | 
| 708 |                     output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition; | 
| 709 |                 } | 
| 710 |                 context->term -= term.atom.parenthesesWidth; | 
| 711 |                 return true; | 
| 712 |             } | 
| 713 |         case QuantifierFixedCount: | 
| 714 |             break; | 
| 715 |         } | 
| 716 |  | 
| 717 |         return false; | 
| 718 |     } | 
| 719 |  | 
| 720 |     bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) | 
| 721 |     { | 
| 722 |         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); | 
| 723 |         ASSERT(term.atom.quantityCount == 1); | 
| 724 |  | 
| 725 |         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); | 
| 726 |  | 
| 727 |         backTrack->begin = input.getPos(); | 
| 728 |         return true; | 
| 729 |     } | 
| 730 |  | 
| 731 |     bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) | 
| 732 |     { | 
| 733 |         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); | 
| 734 |         ASSERT(term.atom.quantityCount == 1); | 
| 735 |  | 
| 736 |         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); | 
| 737 |  | 
| 738 |         input.setPos(backTrack->begin); | 
| 739 |  | 
| 740 |         // We've reached the end of the parens; if they are inverted, this is failure. | 
| 741 |         if (term.invert()) { | 
| 742 |             context->term -= term.atom.parenthesesWidth; | 
| 743 |             return false; | 
| 744 |         } | 
| 745 |  | 
| 746 |         return true; | 
| 747 |     } | 
| 748 |  | 
| 749 |     bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context) | 
| 750 |     { | 
| 751 |         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin); | 
| 752 |         ASSERT(term.atom.quantityCount == 1); | 
| 753 |  | 
| 754 |         // We've failed to match parens; if they are inverted, this is win! | 
| 755 |         if (term.invert()) { | 
| 756 |             context->term += term.atom.parenthesesWidth; | 
| 757 |             return true; | 
| 758 |         } | 
| 759 |  | 
| 760 |         return false; | 
| 761 |     } | 
| 762 |  | 
| 763 |     bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context) | 
| 764 |     { | 
| 765 |         ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd); | 
| 766 |         ASSERT(term.atom.quantityCount == 1); | 
| 767 |  | 
| 768 |         BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation); | 
| 769 |  | 
| 770 |         input.setPos(backTrack->begin); | 
| 771 |  | 
| 772 |         context->term -= term.atom.parenthesesWidth; | 
| 773 |         return false; | 
| 774 |     } | 
| 775 |  | 
| 776 |     bool matchParentheses(ByteTerm& term, DisjunctionContext* context) | 
| 777 |     { | 
| 778 |         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); | 
| 779 |  | 
| 780 |         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); | 
| 781 |  | 
| 782 |         unsigned subpatternId = term.atom.subpatternId; | 
| 783 |         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; | 
| 784 |  | 
| 785 |         backTrack->prevBegin = output[(subpatternId << 1)]; | 
| 786 |         backTrack->prevEnd = output[(subpatternId << 1) + 1]; | 
| 787 |  | 
| 788 |         backTrack->matchAmount = 0; | 
| 789 |         backTrack->lastContext = 0; | 
| 790 |  | 
| 791 |         switch (term.atom.quantityType) { | 
| 792 |         case QuantifierFixedCount: { | 
| 793 |             // While we haven't yet reached our fixed limit, | 
| 794 |             while (backTrack->matchAmount < term.atom.quantityCount) { | 
| 795 |                 // Try to do a match, and it it succeeds, add it to the list. | 
| 796 |                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term); | 
| 797 |                 if (matchDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term))) | 
| 798 |                     appendParenthesesDisjunctionContext(backTrack, context); | 
| 799 |                 else { | 
| 800 |                     // The match failed; try to find an alternate point to carry on from. | 
| 801 |                     resetMatches(term, context); | 
| 802 |                     freeParenthesesDisjunctionContext(context); | 
| 803 |                     if (!parenthesesDoBacktrack(term, backTrack)) | 
| 804 |                         return false; | 
| 805 |                 } | 
| 806 |             } | 
| 807 |  | 
| 808 |             ASSERT(backTrack->matchAmount == term.atom.quantityCount); | 
| 809 |             ParenthesesDisjunctionContext* context = backTrack->lastContext; | 
| 810 |             recordParenthesesMatch(term, context); | 
| 811 |             return true; | 
| 812 |         } | 
| 813 |  | 
| 814 |         case QuantifierGreedy: { | 
| 815 |             while (backTrack->matchAmount < term.atom.quantityCount) { | 
| 816 |                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term); | 
| 817 |                 if (matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term))) | 
| 818 |                     appendParenthesesDisjunctionContext(backTrack, context); | 
| 819 |                 else { | 
| 820 |                     resetMatches(term, context); | 
| 821 |                     freeParenthesesDisjunctionContext(context); | 
| 822 |                     break; | 
| 823 |                 } | 
| 824 |             } | 
| 825 |  | 
| 826 |             if (backTrack->matchAmount) { | 
| 827 |                 ParenthesesDisjunctionContext* context = backTrack->lastContext; | 
| 828 |                 recordParenthesesMatch(term, context); | 
| 829 |             } | 
| 830 |             return true; | 
| 831 |         } | 
| 832 |  | 
| 833 |         case QuantifierNonGreedy: | 
| 834 |             return true; | 
| 835 |         } | 
| 836 |  | 
| 837 |         ASSERT_NOT_REACHED(); | 
| 838 |         return false; | 
| 839 |     } | 
| 840 |  | 
| 841 |     // Rules for backtracking differ depending on whether this is greedy or non-greedy. | 
| 842 |     // | 
| 843 |     // Greedy matches never should try just adding more - you should already have done | 
| 844 |     // the 'more' cases.  Always backtrack, at least a leetle bit.  However cases where | 
| 845 |     // you backtrack an item off the list needs checking, since we'll never have matched | 
| 846 |     // the one less case.  Tracking forwards, still add as much as possible. | 
| 847 |     // | 
| 848 |     // Non-greedy, we've already done the one less case, so don't match on popping. | 
| 849 |     // We haven't done the one more case, so always try to add that. | 
| 850 |     // | 
| 851 |     bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context) | 
| 852 |     { | 
| 853 |         ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern); | 
| 854 |  | 
| 855 |         BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation); | 
| 856 |  | 
| 857 |         if (term.capture()) { | 
| 858 |             unsigned subpatternId = term.atom.subpatternId; | 
| 859 |             output[(subpatternId << 1)] = backTrack->prevBegin; | 
| 860 |             output[(subpatternId << 1) + 1] = backTrack->prevEnd; | 
| 861 |         } | 
| 862 |  | 
| 863 |         ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction; | 
| 864 |  | 
| 865 |         switch (term.atom.quantityType) { | 
| 866 |         case QuantifierFixedCount: { | 
| 867 |             ASSERT(backTrack->matchAmount == term.atom.quantityCount); | 
| 868 |  | 
| 869 |             ParenthesesDisjunctionContext* context = 0; | 
| 870 |  | 
| 871 |             if (!parenthesesDoBacktrack(term, backTrack)) | 
| 872 |                 return false; | 
| 873 |  | 
| 874 |             // While we haven't yet reached our fixed limit, | 
| 875 |             while (backTrack->matchAmount < term.atom.quantityCount) { | 
| 876 |                 // Try to do a match, and it it succeeds, add it to the list. | 
| 877 |                 context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term); | 
| 878 |                 if (matchDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term))) | 
| 879 |                     appendParenthesesDisjunctionContext(backTrack, context); | 
| 880 |                 else { | 
| 881 |                     // The match failed; try to find an alternate point to carry on from. | 
| 882 |                     resetMatches(term, context); | 
| 883 |                     freeParenthesesDisjunctionContext(context); | 
| 884 |                     if (!parenthesesDoBacktrack(term, backTrack)) | 
| 885 |                         return false; | 
| 886 |                 } | 
| 887 |             } | 
| 888 |  | 
| 889 |             ASSERT(backTrack->matchAmount == term.atom.quantityCount); | 
| 890 |             context = backTrack->lastContext; | 
| 891 |             recordParenthesesMatch(term, context); | 
| 892 |             return true; | 
| 893 |         } | 
| 894 |  | 
| 895 |         case QuantifierGreedy: { | 
| 896 |             if (!backTrack->matchAmount) | 
| 897 |                 return false; | 
| 898 |  | 
| 899 |             ParenthesesDisjunctionContext* context = backTrack->lastContext; | 
| 900 |             if (matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term), btrack: true)) { | 
| 901 |                 while (backTrack->matchAmount < term.atom.quantityCount) { | 
| 902 |                     ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term); | 
| 903 |                     if (matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term))) | 
| 904 |                         appendParenthesesDisjunctionContext(backTrack, context); | 
| 905 |                     else { | 
| 906 |                         resetMatches(term, context); | 
| 907 |                         freeParenthesesDisjunctionContext(context); | 
| 908 |                         break; | 
| 909 |                     } | 
| 910 |                 } | 
| 911 |             } else { | 
| 912 |                 resetMatches(term, context); | 
| 913 |                 popParenthesesDisjunctionContext(backTrack); | 
| 914 |                 freeParenthesesDisjunctionContext(context); | 
| 915 |             } | 
| 916 |  | 
| 917 |             if (backTrack->matchAmount) { | 
| 918 |                 ParenthesesDisjunctionContext* context = backTrack->lastContext; | 
| 919 |                 recordParenthesesMatch(term, context); | 
| 920 |             } | 
| 921 |             return true; | 
| 922 |         } | 
| 923 |  | 
| 924 |         case QuantifierNonGreedy: { | 
| 925 |             // If we've not reached the limit, try to add one more match. | 
| 926 |             if (backTrack->matchAmount < term.atom.quantityCount) { | 
| 927 |                 ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term); | 
| 928 |                 if (matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term))) { | 
| 929 |                     appendParenthesesDisjunctionContext(backTrack, context); | 
| 930 |                     recordParenthesesMatch(term, context); | 
| 931 |                     return true; | 
| 932 |                 } else { | 
| 933 |                     resetMatches(term, context); | 
| 934 |                     freeParenthesesDisjunctionContext(context); | 
| 935 |                 } | 
| 936 |             } | 
| 937 |  | 
| 938 |             // Nope - okay backtrack looking for an alternative. | 
| 939 |             while (backTrack->matchAmount) { | 
| 940 |                 ParenthesesDisjunctionContext* context = backTrack->lastContext; | 
| 941 |                 if (matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term), btrack: true)) { | 
| 942 |                     // successful backtrack! we're back in the game! | 
| 943 |                     if (backTrack->matchAmount) { | 
| 944 |                         context = backTrack->lastContext; | 
| 945 |                         recordParenthesesMatch(term, context); | 
| 946 |                     } | 
| 947 |                     return true; | 
| 948 |                 } | 
| 949 |  | 
| 950 |                 // pop a match off the stack | 
| 951 |                 resetMatches(term, context); | 
| 952 |                 popParenthesesDisjunctionContext(backTrack); | 
| 953 |                 freeParenthesesDisjunctionContext(context); | 
| 954 |             } | 
| 955 |  | 
| 956 |             return false; | 
| 957 |         } | 
| 958 |         } | 
| 959 |  | 
| 960 |         ASSERT_NOT_REACHED(); | 
| 961 |         return false; | 
| 962 |     } | 
| 963 |  | 
| 964 | #define MATCH_NEXT() { ++context->term; goto matchAgain; } | 
| 965 | #define BACKTRACK() { --context->term; goto backtrack; } | 
| 966 | #define currentTerm() (disjunction->terms[context->term]) | 
| 967 |     bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) | 
| 968 |     { | 
| 969 |         if (btrack) | 
| 970 |             BACKTRACK(); | 
| 971 |  | 
| 972 |         context->matchBegin = input.getPos(); | 
| 973 |         context->term = 0; | 
| 974 |  | 
| 975 |     matchAgain: | 
| 976 |         ASSERT(context->term < static_cast<int>(disjunction->terms.size())); | 
| 977 |  | 
| 978 |         switch (currentTerm().type) { | 
| 979 |         case ByteTerm::TypeSubpatternBegin: | 
| 980 |             MATCH_NEXT(); | 
| 981 |         case ByteTerm::TypeSubpatternEnd: | 
| 982 |             context->matchEnd = input.getPos(); | 
| 983 |             return true; | 
| 984 |  | 
| 985 |         case ByteTerm::TypeBodyAlternativeBegin: | 
| 986 |             MATCH_NEXT(); | 
| 987 |         case ByteTerm::TypeBodyAlternativeDisjunction: | 
| 988 |         case ByteTerm::TypeBodyAlternativeEnd: | 
| 989 |             context->matchEnd = input.getPos(); | 
| 990 |             return true; | 
| 991 |  | 
| 992 |         case ByteTerm::TypeAlternativeBegin: | 
| 993 |             MATCH_NEXT(); | 
| 994 |         case ByteTerm::TypeAlternativeDisjunction: | 
| 995 |         case ByteTerm::TypeAlternativeEnd: { | 
| 996 |             int offset = currentTerm().alternative.end; | 
| 997 |             BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); | 
| 998 |             backTrack->offset = offset; | 
| 999 |             context->term += offset; | 
| 1000 |             MATCH_NEXT(); | 
| 1001 |         } | 
| 1002 |  | 
| 1003 |         case ByteTerm::TypeAssertionBOL: | 
| 1004 |             if (matchAssertionBOL(currentTerm())) | 
| 1005 |                 MATCH_NEXT(); | 
| 1006 |             BACKTRACK(); | 
| 1007 |         case ByteTerm::TypeAssertionEOL: | 
| 1008 |             if (matchAssertionEOL(currentTerm())) | 
| 1009 |                 MATCH_NEXT(); | 
| 1010 |             BACKTRACK(); | 
| 1011 |         case ByteTerm::TypeAssertionWordBoundary: | 
| 1012 |             if (matchAssertionWordBoundary(currentTerm())) | 
| 1013 |                 MATCH_NEXT(); | 
| 1014 |             BACKTRACK(); | 
| 1015 |  | 
| 1016 |         case ByteTerm::TypePatternCharacterOnce: | 
| 1017 |         case ByteTerm::TypePatternCharacterFixed: { | 
| 1018 |             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { | 
| 1019 |                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount)) | 
| 1020 |                     BACKTRACK(); | 
| 1021 |             } | 
| 1022 |             MATCH_NEXT(); | 
| 1023 |         } | 
| 1024 |         case ByteTerm::TypePatternCharacterGreedy: { | 
| 1025 |             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); | 
| 1026 |             unsigned matchAmount = 0; | 
| 1027 |             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(count: 1)) { | 
| 1028 |                 if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) { | 
| 1029 |                     input.uncheckInput(count: 1); | 
| 1030 |                     break; | 
| 1031 |                 } | 
| 1032 |                 ++matchAmount; | 
| 1033 |             } | 
| 1034 |             backTrack->matchAmount = matchAmount; | 
| 1035 |  | 
| 1036 |             MATCH_NEXT(); | 
| 1037 |         } | 
| 1038 |         case ByteTerm::TypePatternCharacterNonGreedy: { | 
| 1039 |             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); | 
| 1040 |             backTrack->matchAmount = 0; | 
| 1041 |             MATCH_NEXT(); | 
| 1042 |         } | 
| 1043 |  | 
| 1044 |         case ByteTerm::TypePatternCasedCharacterOnce: | 
| 1045 |         case ByteTerm::TypePatternCasedCharacterFixed: { | 
| 1046 |             for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) { | 
| 1047 |                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount)) | 
| 1048 |                     BACKTRACK(); | 
| 1049 |             } | 
| 1050 |             MATCH_NEXT(); | 
| 1051 |         } | 
| 1052 |         case ByteTerm::TypePatternCasedCharacterGreedy: { | 
| 1053 |             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); | 
| 1054 |             unsigned matchAmount = 0; | 
| 1055 |             while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(count: 1)) { | 
| 1056 |                 if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) { | 
| 1057 |                     input.uncheckInput(count: 1); | 
| 1058 |                     break; | 
| 1059 |                 } | 
| 1060 |                 ++matchAmount; | 
| 1061 |             } | 
| 1062 |             backTrack->matchAmount = matchAmount; | 
| 1063 |  | 
| 1064 |             MATCH_NEXT(); | 
| 1065 |         } | 
| 1066 |         case ByteTerm::TypePatternCasedCharacterNonGreedy: { | 
| 1067 |             BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation); | 
| 1068 |             backTrack->matchAmount = 0; | 
| 1069 |             MATCH_NEXT(); | 
| 1070 |         } | 
| 1071 |  | 
| 1072 |         case ByteTerm::TypeCharacterClass: | 
| 1073 |             if (matchCharacterClass(currentTerm(), context)) | 
| 1074 |                 MATCH_NEXT(); | 
| 1075 |             BACKTRACK(); | 
| 1076 |         case ByteTerm::TypeBackReference: | 
| 1077 |             if (matchBackReference(currentTerm(), context)) | 
| 1078 |                 MATCH_NEXT(); | 
| 1079 |             BACKTRACK(); | 
| 1080 |         case ByteTerm::TypeParenthesesSubpattern: | 
| 1081 |             if (matchParentheses(currentTerm(), context)) | 
| 1082 |                 MATCH_NEXT(); | 
| 1083 |             BACKTRACK(); | 
| 1084 |         case ByteTerm::TypeParenthesesSubpatternOnceBegin: | 
| 1085 |             if (matchParenthesesOnceBegin(currentTerm(), context)) | 
| 1086 |                 MATCH_NEXT(); | 
| 1087 |             BACKTRACK(); | 
| 1088 |         case ByteTerm::TypeParenthesesSubpatternOnceEnd: | 
| 1089 |             if (matchParenthesesOnceEnd(currentTerm(), context)) | 
| 1090 |                 MATCH_NEXT(); | 
| 1091 |             BACKTRACK(); | 
| 1092 |         case ByteTerm::TypeParentheticalAssertionBegin: | 
| 1093 |             if (matchParentheticalAssertionBegin(currentTerm(), context)) | 
| 1094 |                 MATCH_NEXT(); | 
| 1095 |             BACKTRACK(); | 
| 1096 |         case ByteTerm::TypeParentheticalAssertionEnd: | 
| 1097 |             if (matchParentheticalAssertionEnd(currentTerm(), context)) | 
| 1098 |                 MATCH_NEXT(); | 
| 1099 |             BACKTRACK(); | 
| 1100 |  | 
| 1101 |         case ByteTerm::TypeCheckInput: | 
| 1102 |             if (input.checkInput(currentTerm().checkInputCount)) | 
| 1103 |                 MATCH_NEXT(); | 
| 1104 |             BACKTRACK(); | 
| 1105 |         } | 
| 1106 |  | 
| 1107 |         // We should never fall-through to here. | 
| 1108 |         ASSERT_NOT_REACHED(); | 
| 1109 |  | 
| 1110 |     backtrack: | 
| 1111 |         ASSERT(context->term < static_cast<int>(disjunction->terms.size())); | 
| 1112 |  | 
| 1113 |         switch (currentTerm().type) { | 
| 1114 |         case ByteTerm::TypeSubpatternBegin: | 
| 1115 |             return false; | 
| 1116 |         case ByteTerm::TypeSubpatternEnd: | 
| 1117 |             ASSERT_NOT_REACHED(); | 
| 1118 |  | 
| 1119 |         case ByteTerm::TypeBodyAlternativeBegin: | 
| 1120 |         case ByteTerm::TypeBodyAlternativeDisjunction: { | 
| 1121 |             int offset = currentTerm().alternative.next; | 
| 1122 |             context->term += offset; | 
| 1123 |             if (offset > 0) | 
| 1124 |                 MATCH_NEXT(); | 
| 1125 |  | 
| 1126 |             if (input.atEnd()) | 
| 1127 |                 return false; | 
| 1128 |  | 
| 1129 |             input.next(); | 
| 1130 |             context->matchBegin = input.getPos(); | 
| 1131 |             MATCH_NEXT(); | 
| 1132 |         } | 
| 1133 |         case ByteTerm::TypeBodyAlternativeEnd: | 
| 1134 |             ASSERT_NOT_REACHED(); | 
| 1135 |  | 
| 1136 |             case ByteTerm::TypeAlternativeBegin: | 
| 1137 |             case ByteTerm::TypeAlternativeDisjunction: { | 
| 1138 |                 int offset = currentTerm().alternative.next; | 
| 1139 |                 context->term += offset; | 
| 1140 |                 if (offset > 0) | 
| 1141 |                     MATCH_NEXT(); | 
| 1142 |                 BACKTRACK(); | 
| 1143 |             } | 
| 1144 |             case ByteTerm::TypeAlternativeEnd: { | 
| 1145 |                 // We should never backtrack back into an alternative of the main body of the regex. | 
| 1146 |                 BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation); | 
| 1147 |                 unsigned offset = backTrack->offset; | 
| 1148 |                 context->term -= offset; | 
| 1149 |                 BACKTRACK(); | 
| 1150 |             } | 
| 1151 |  | 
| 1152 |             case ByteTerm::TypeAssertionBOL: | 
| 1153 |             case ByteTerm::TypeAssertionEOL: | 
| 1154 |             case ByteTerm::TypeAssertionWordBoundary: | 
| 1155 |                 BACKTRACK(); | 
| 1156 |  | 
| 1157 |             case ByteTerm::TypePatternCharacterOnce: | 
| 1158 |             case ByteTerm::TypePatternCharacterFixed: | 
| 1159 |             case ByteTerm::TypePatternCharacterGreedy: | 
| 1160 |             case ByteTerm::TypePatternCharacterNonGreedy: | 
| 1161 |                 if (backtrackPatternCharacter(currentTerm(), context)) | 
| 1162 |                     MATCH_NEXT(); | 
| 1163 |                 BACKTRACK(); | 
| 1164 |             case ByteTerm::TypePatternCasedCharacterOnce: | 
| 1165 |             case ByteTerm::TypePatternCasedCharacterFixed: | 
| 1166 |             case ByteTerm::TypePatternCasedCharacterGreedy: | 
| 1167 |             case ByteTerm::TypePatternCasedCharacterNonGreedy: | 
| 1168 |                 if (backtrackPatternCasedCharacter(currentTerm(), context)) | 
| 1169 |                     MATCH_NEXT(); | 
| 1170 |                 BACKTRACK(); | 
| 1171 |             case ByteTerm::TypeCharacterClass: | 
| 1172 |                 if (backtrackCharacterClass(currentTerm(), context)) | 
| 1173 |                     MATCH_NEXT(); | 
| 1174 |                 BACKTRACK(); | 
| 1175 |             case ByteTerm::TypeBackReference: | 
| 1176 |                 if (backtrackBackReference(currentTerm(), context)) | 
| 1177 |                     MATCH_NEXT(); | 
| 1178 |                 BACKTRACK(); | 
| 1179 |             case ByteTerm::TypeParenthesesSubpattern: | 
| 1180 |                 if (backtrackParentheses(currentTerm(), context)) | 
| 1181 |                     MATCH_NEXT(); | 
| 1182 |                 BACKTRACK(); | 
| 1183 |             case ByteTerm::TypeParenthesesSubpatternOnceBegin: | 
| 1184 |                 if (backtrackParenthesesOnceBegin(currentTerm(), context)) | 
| 1185 |                     MATCH_NEXT(); | 
| 1186 |                 BACKTRACK(); | 
| 1187 |             case ByteTerm::TypeParenthesesSubpatternOnceEnd: | 
| 1188 |                 if (backtrackParenthesesOnceEnd(currentTerm(), context)) | 
| 1189 |                     MATCH_NEXT(); | 
| 1190 |                 BACKTRACK(); | 
| 1191 |             case ByteTerm::TypeParentheticalAssertionBegin: | 
| 1192 |                 if (backtrackParentheticalAssertionBegin(currentTerm(), context)) | 
| 1193 |                     MATCH_NEXT(); | 
| 1194 |                 BACKTRACK(); | 
| 1195 |             case ByteTerm::TypeParentheticalAssertionEnd: | 
| 1196 |                 if (backtrackParentheticalAssertionEnd(currentTerm(), context)) | 
| 1197 |                     MATCH_NEXT(); | 
| 1198 |                 BACKTRACK(); | 
| 1199 |  | 
| 1200 |             case ByteTerm::TypeCheckInput: | 
| 1201 |                 input.uncheckInput(currentTerm().checkInputCount); | 
| 1202 |                 BACKTRACK(); | 
| 1203 |         } | 
| 1204 |  | 
| 1205 |         ASSERT_NOT_REACHED(); | 
| 1206 |         return false; | 
| 1207 |     } | 
| 1208 |  | 
| 1209 |     bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false) | 
| 1210 |     { | 
| 1211 |         if (matchDisjunction(disjunction, context, btrack)) { | 
| 1212 |             while (context->matchBegin == context->matchEnd) { | 
| 1213 |                 if (!matchDisjunction(disjunction, context, btrack: true)) | 
| 1214 |                     return false; | 
| 1215 |             } | 
| 1216 |             return true; | 
| 1217 |         } | 
| 1218 |  | 
| 1219 |         return false; | 
| 1220 |     } | 
| 1221 |  | 
| 1222 |     int interpret() | 
| 1223 |     { | 
| 1224 |         for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i) | 
| 1225 |             output[i] = -1; | 
| 1226 |  | 
| 1227 |         DisjunctionContext* context = allocDisjunctionContext(disjunction: pattern->m_body.get()); | 
| 1228 |  | 
| 1229 |         if (matchDisjunction(disjunction: pattern->m_body.get(), context)) { | 
| 1230 |             output[0] = context->matchBegin; | 
| 1231 |             output[1] = context->matchEnd; | 
| 1232 |         } | 
| 1233 |  | 
| 1234 |         freeDisjunctionContext(context); | 
| 1235 |  | 
| 1236 |         return output[0]; | 
| 1237 |     } | 
| 1238 |  | 
| 1239 |     Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length) | 
| 1240 |         : pattern(pattern) | 
| 1241 |         , output(output) | 
| 1242 |         , input(inputChar, start, length) | 
| 1243 |     { | 
| 1244 |     } | 
| 1245 |  | 
| 1246 | private: | 
| 1247 |     BytecodePattern *pattern; | 
| 1248 |     int* output; | 
| 1249 |     InputStream input; | 
| 1250 | }; | 
| 1251 |  | 
| 1252 |  | 
| 1253 |  | 
| 1254 | class ByteCompiler { | 
| 1255 |     struct ParenthesesStackEntry { | 
| 1256 |         unsigned beginTerm; | 
| 1257 |         unsigned savedAlternativeIndex; | 
| 1258 |         ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/) | 
| 1259 |             : beginTerm(beginTerm) | 
| 1260 |             , savedAlternativeIndex(savedAlternativeIndex) | 
| 1261 |         { | 
| 1262 |         } | 
| 1263 |     }; | 
| 1264 |  | 
| 1265 | public: | 
| 1266 |     ByteCompiler(RegexPattern& pattern) | 
| 1267 |         : m_pattern(pattern) | 
| 1268 |     { | 
| 1269 |         m_bodyDisjunction = 0; | 
| 1270 |         m_currentAlternativeIndex = 0; | 
| 1271 |     } | 
| 1272 |  | 
| 1273 |     BytecodePattern* compile() | 
| 1274 |     { | 
| 1275 |         regexBegin(numSubpatterns: m_pattern.m_numSubpatterns, callFrameSize: m_pattern.m_body->m_callFrameSize); | 
| 1276 |         emitDisjunction(disjunction: m_pattern.m_body); | 
| 1277 |         regexEnd(); | 
| 1278 |  | 
| 1279 |         return new BytecodePattern(m_bodyDisjunction, m_allParenthesesInfo, m_pattern); | 
| 1280 |     } | 
| 1281 |  | 
| 1282 |     void checkInput(unsigned count) | 
| 1283 |     { | 
| 1284 |         m_bodyDisjunction->terms.append(val: ByteTerm::CheckInput(count)); | 
| 1285 |     } | 
| 1286 |  | 
| 1287 |     void assertionBOL(int inputPosition) | 
| 1288 |     { | 
| 1289 |         m_bodyDisjunction->terms.append(val: ByteTerm::BOL(inputPos: inputPosition)); | 
| 1290 |     } | 
| 1291 |  | 
| 1292 |     void assertionEOL(int inputPosition) | 
| 1293 |     { | 
| 1294 |         m_bodyDisjunction->terms.append(val: ByteTerm::EOL(inputPos: inputPosition)); | 
| 1295 |     } | 
| 1296 |  | 
| 1297 |     void assertionWordBoundary(bool invert, int inputPosition) | 
| 1298 |     { | 
| 1299 |         m_bodyDisjunction->terms.append(val: ByteTerm::WordBoundary(invert, inputPos: inputPosition)); | 
| 1300 |     } | 
| 1301 |  | 
| 1302 |     void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) | 
| 1303 |     { | 
| 1304 |         if (m_pattern.m_ignoreCase) { | 
| 1305 |             UChar lo = Unicode::toLower(ch); | 
| 1306 |             UChar hi = Unicode::toUpper(ch); | 
| 1307 |  | 
| 1308 |             if (lo != hi) { | 
| 1309 |                 m_bodyDisjunction->terms.append(val: ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType)); | 
| 1310 |                 return; | 
| 1311 |             } | 
| 1312 |         } | 
| 1313 |  | 
| 1314 |         m_bodyDisjunction->terms.append(val: ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType)); | 
| 1315 |     } | 
| 1316 |  | 
| 1317 |     void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) | 
| 1318 |     { | 
| 1319 |         m_bodyDisjunction->terms.append(val: ByteTerm(characterClass, invert, inputPosition)); | 
| 1320 |  | 
| 1321 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount; | 
| 1322 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; | 
| 1323 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; | 
| 1324 |     } | 
| 1325 |  | 
| 1326 |     void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType) | 
| 1327 |     { | 
| 1328 |         ASSERT(subpatternId); | 
| 1329 |  | 
| 1330 |         m_bodyDisjunction->terms.append(val: ByteTerm::BackReference(subpatternId, inputPos: inputPosition)); | 
| 1331 |  | 
| 1332 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount; | 
| 1333 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType; | 
| 1334 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; | 
| 1335 |     } | 
| 1336 |  | 
| 1337 |     void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation) | 
| 1338 |     { | 
| 1339 |         int beginTerm = m_bodyDisjunction->terms.size(); | 
| 1340 |  | 
| 1341 |         m_bodyDisjunction->terms.append(val: ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition)); | 
| 1342 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; | 
| 1343 |         m_bodyDisjunction->terms.append(val: ByteTerm::AlternativeBegin()); | 
| 1344 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; | 
| 1345 |  | 
| 1346 |         m_parenthesesStack.append(val: ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); | 
| 1347 |         m_currentAlternativeIndex = beginTerm + 1; | 
| 1348 |     } | 
| 1349 |  | 
| 1350 |     void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation) | 
| 1351 |     { | 
| 1352 |         int beginTerm = m_bodyDisjunction->terms.size(); | 
| 1353 |  | 
| 1354 |         m_bodyDisjunction->terms.append(val: ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0)); | 
| 1355 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = frameLocation; | 
| 1356 |         m_bodyDisjunction->terms.append(val: ByteTerm::AlternativeBegin()); | 
| 1357 |         m_bodyDisjunction->terms[m_bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation; | 
| 1358 |  | 
| 1359 |         m_parenthesesStack.append(val: ParenthesesStackEntry(beginTerm, m_currentAlternativeIndex)); | 
| 1360 |         m_currentAlternativeIndex = beginTerm + 1; | 
| 1361 |     } | 
| 1362 |  | 
| 1363 |     unsigned popParenthesesStack() | 
| 1364 |     { | 
| 1365 |         ASSERT(m_parenthesesStack.size()); | 
| 1366 |         int stackEnd = m_parenthesesStack.size() - 1; | 
| 1367 |         unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm; | 
| 1368 |         m_currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex; | 
| 1369 |         m_parenthesesStack.shrink(size: stackEnd); | 
| 1370 |  | 
| 1371 |         ASSERT(beginTerm < m_bodyDisjunction->terms.size()); | 
| 1372 |         ASSERT(m_currentAlternativeIndex < m_bodyDisjunction->terms.size()); | 
| 1373 |  | 
| 1374 |         return beginTerm; | 
| 1375 |     } | 
| 1376 |  | 
| 1377 | #ifndef NDEBUG | 
| 1378 |     void dumpDisjunction(ByteDisjunction* disjunction) | 
| 1379 |     { | 
| 1380 |         printf(format: "ByteDisjunction(%p):\n\t" , disjunction); | 
| 1381 |         for (unsigned i = 0; i < disjunction->terms.size(); ++i) | 
| 1382 |             printf(format: "{ %d } " , disjunction->terms[i].type); | 
| 1383 |         printf(format: "\n" ); | 
| 1384 |     } | 
| 1385 | #endif | 
| 1386 |  | 
| 1387 |     void closeAlternative(int beginTerm) | 
| 1388 |     { | 
| 1389 |         int origBeginTerm = beginTerm; | 
| 1390 |         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin); | 
| 1391 |         int endIndex = m_bodyDisjunction->terms.size(); | 
| 1392 |  | 
| 1393 |         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; | 
| 1394 |  | 
| 1395 |         if (!m_bodyDisjunction->terms[beginTerm].alternative.next) | 
| 1396 |             m_bodyDisjunction->terms.remove(position: beginTerm); | 
| 1397 |         else { | 
| 1398 |             while (m_bodyDisjunction->terms[beginTerm].alternative.next) { | 
| 1399 |                 beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; | 
| 1400 |                 ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction); | 
| 1401 |                 m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; | 
| 1402 |                 m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; | 
| 1403 |             } | 
| 1404 |  | 
| 1405 |             m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; | 
| 1406 |  | 
| 1407 |             m_bodyDisjunction->terms.append(val: ByteTerm::AlternativeEnd()); | 
| 1408 |             m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; | 
| 1409 |         } | 
| 1410 |     } | 
| 1411 |  | 
| 1412 |     void closeBodyAlternative() | 
| 1413 |     { | 
| 1414 |         int beginTerm = 0; | 
| 1415 |         int origBeginTerm = 0; | 
| 1416 |         ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin); | 
| 1417 |         int endIndex = m_bodyDisjunction->terms.size(); | 
| 1418 |  | 
| 1419 |         unsigned frameLocation = m_bodyDisjunction->terms[beginTerm].frameLocation; | 
| 1420 |  | 
| 1421 |         while (m_bodyDisjunction->terms[beginTerm].alternative.next) { | 
| 1422 |             beginTerm += m_bodyDisjunction->terms[beginTerm].alternative.next; | 
| 1423 |             ASSERT(m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction); | 
| 1424 |             m_bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm; | 
| 1425 |             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; | 
| 1426 |         } | 
| 1427 |  | 
| 1428 |         m_bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm; | 
| 1429 |  | 
| 1430 |         m_bodyDisjunction->terms.append(val: ByteTerm::BodyAlternativeEnd()); | 
| 1431 |         m_bodyDisjunction->terms[endIndex].frameLocation = frameLocation; | 
| 1432 |     } | 
| 1433 |  | 
| 1434 |     void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0) | 
| 1435 |     { | 
| 1436 |         unsigned beginTerm = popParenthesesStack(); | 
| 1437 |         closeAlternative(beginTerm: beginTerm + 1); | 
| 1438 |         unsigned endTerm = m_bodyDisjunction->terms.size(); | 
| 1439 |  | 
| 1440 |         bool isAssertion = m_bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin; | 
| 1441 |         bool invertOrCapture = m_bodyDisjunction->terms[beginTerm].invertOrCapture; | 
| 1442 |         unsigned subpatternId = m_bodyDisjunction->terms[beginTerm].atom.subpatternId; | 
| 1443 |  | 
| 1444 |         m_bodyDisjunction->terms.append(val: ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition)); | 
| 1445 |         m_bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm; | 
| 1446 |         m_bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm; | 
| 1447 |         m_bodyDisjunction->terms[endTerm].frameLocation = frameLocation; | 
| 1448 |  | 
| 1449 |         if (doInline) { | 
| 1450 |             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; | 
| 1451 |             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; | 
| 1452 |             m_bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount; | 
| 1453 |             m_bodyDisjunction->terms[endTerm].atom.quantityType = quantityType; | 
| 1454 |         } else { | 
| 1455 |             ByteTerm& parenthesesBegin = m_bodyDisjunction->terms[beginTerm]; | 
| 1456 |             ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin); | 
| 1457 |  | 
| 1458 |             bool invertOrCapture = parenthesesBegin.invertOrCapture; | 
| 1459 |             unsigned subpatternId = parenthesesBegin.atom.subpatternId; | 
| 1460 |  | 
| 1461 |             unsigned numSubpatterns = lastSubpatternId - subpatternId + 1; | 
| 1462 |             ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize); | 
| 1463 |  | 
| 1464 |             parenthesesDisjunction->terms.append(val: ByteTerm::SubpatternBegin()); | 
| 1465 |             for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses) | 
| 1466 |                 parenthesesDisjunction->terms.append(val: m_bodyDisjunction->terms[termInParentheses]); | 
| 1467 |             parenthesesDisjunction->terms.append(val: ByteTerm::SubpatternEnd()); | 
| 1468 |  | 
| 1469 |             m_bodyDisjunction->terms.shrink(size: beginTerm); | 
| 1470 |  | 
| 1471 |             m_allParenthesesInfo.append(val: parenthesesDisjunction); | 
| 1472 |             m_bodyDisjunction->terms.append(val: ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition)); | 
| 1473 |  | 
| 1474 |             m_bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount; | 
| 1475 |             m_bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType; | 
| 1476 |             m_bodyDisjunction->terms[beginTerm].frameLocation = frameLocation; | 
| 1477 |         } | 
| 1478 |     } | 
| 1479 |  | 
| 1480 |     void regexBegin(unsigned numSubpatterns, unsigned callFrameSize) | 
| 1481 |     { | 
| 1482 |         m_bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize); | 
| 1483 |         m_bodyDisjunction->terms.append(val: ByteTerm::BodyAlternativeBegin()); | 
| 1484 |         m_bodyDisjunction->terms[0].frameLocation = 0; | 
| 1485 |         m_currentAlternativeIndex = 0; | 
| 1486 |     } | 
| 1487 |  | 
| 1488 |     void regexEnd() | 
| 1489 |     { | 
| 1490 |         closeBodyAlternative(); | 
| 1491 |     } | 
| 1492 |  | 
| 1493 |     void alternativeBodyDisjunction() | 
| 1494 |     { | 
| 1495 |         int newAlternativeIndex = m_bodyDisjunction->terms.size(); | 
| 1496 |         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; | 
| 1497 |         m_bodyDisjunction->terms.append(val: ByteTerm::BodyAlternativeDisjunction()); | 
| 1498 |  | 
| 1499 |         m_currentAlternativeIndex = newAlternativeIndex; | 
| 1500 |     } | 
| 1501 |  | 
| 1502 |     void alternativeDisjunction() | 
| 1503 |     { | 
| 1504 |         int newAlternativeIndex = m_bodyDisjunction->terms.size(); | 
| 1505 |         m_bodyDisjunction->terms[m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex; | 
| 1506 |         m_bodyDisjunction->terms.append(val: ByteTerm::AlternativeDisjunction()); | 
| 1507 |  | 
| 1508 |         m_currentAlternativeIndex = newAlternativeIndex; | 
| 1509 |     } | 
| 1510 |  | 
| 1511 |     void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0) | 
| 1512 |     { | 
| 1513 |         for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) { | 
| 1514 |             unsigned currentCountAlreadyChecked = inputCountAlreadyChecked; | 
| 1515 |  | 
| 1516 |             if (alt) { | 
| 1517 |                 if (disjunction == m_pattern.m_body) | 
| 1518 |                     alternativeBodyDisjunction(); | 
| 1519 |                 else | 
| 1520 |                     alternativeDisjunction(); | 
| 1521 |             } | 
| 1522 |  | 
| 1523 |             PatternAlternative* alternative = disjunction->m_alternatives[alt]; | 
| 1524 |             unsigned minimumSize = alternative->m_minimumSize; | 
| 1525 |  | 
| 1526 |             ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked); | 
| 1527 |             unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked; | 
| 1528 |             if (countToCheck) | 
| 1529 |                 checkInput(count: countToCheck); | 
| 1530 |             currentCountAlreadyChecked += countToCheck; | 
| 1531 |  | 
| 1532 |             for (unsigned i = 0; i < alternative->m_terms.size(); ++i) { | 
| 1533 |                 PatternTerm& term = alternative->m_terms[i]; | 
| 1534 |  | 
| 1535 |                 switch (term.type) { | 
| 1536 |                 case PatternTerm::TypeAssertionBOL: | 
| 1537 |                     assertionBOL(inputPosition: term.inputPosition - currentCountAlreadyChecked); | 
| 1538 |                     break; | 
| 1539 |  | 
| 1540 |                 case PatternTerm::TypeAssertionEOL: | 
| 1541 |                     assertionEOL(inputPosition: term.inputPosition - currentCountAlreadyChecked); | 
| 1542 |                     break; | 
| 1543 |  | 
| 1544 |                 case PatternTerm::TypeAssertionWordBoundary: | 
| 1545 |                     assertionWordBoundary(invert: term.invertOrCapture, inputPosition: term.inputPosition - currentCountAlreadyChecked); | 
| 1546 |                     break; | 
| 1547 |  | 
| 1548 |                 case PatternTerm::TypePatternCharacter: | 
| 1549 |                     atomPatternCharacter(ch: term.patternCharacter, inputPosition: term.inputPosition - currentCountAlreadyChecked, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType); | 
| 1550 |                     break; | 
| 1551 |  | 
| 1552 |                 case PatternTerm::TypeCharacterClass: | 
| 1553 |                     atomCharacterClass(characterClass: term.characterClass, invert: term.invertOrCapture, inputPosition: term.inputPosition - currentCountAlreadyChecked, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType); | 
| 1554 |                     break; | 
| 1555 |  | 
| 1556 |                 case PatternTerm::TypeBackReference: | 
| 1557 |                     atomBackReference(subpatternId: term.subpatternId, inputPosition: term.inputPosition - currentCountAlreadyChecked, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType); | 
| 1558 |                         break; | 
| 1559 |  | 
| 1560 |                 case PatternTerm::TypeForwardReference: | 
| 1561 |                     break; | 
| 1562 |  | 
| 1563 |                 case PatternTerm::TypeParenthesesSubpattern: { | 
| 1564 |                     unsigned disjunctionAlreadyCheckedCount = 0; | 
| 1565 |                     if ((term.quantityCount == 1) && !term.parentheses.isCopy) { | 
| 1566 |                         if (term.quantityType == QuantifierFixedCount) { | 
| 1567 |                             disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize; | 
| 1568 |                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; | 
| 1569 |                             atomParenthesesSubpatternBegin(subpatternId: term.parentheses.subpatternId, capture: term.invertOrCapture, inputPosition: delegateEndInputOffset - disjunctionAlreadyCheckedCount, frameLocation: term.frameLocation, alternativeFrameLocation: term.frameLocation); | 
| 1570 |                             emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: term.parentheses.disjunction->m_minimumSize); | 
| 1571 |                             atomParenthesesEnd(doInline: true, lastSubpatternId: term.parentheses.lastSubpatternId, inputPosition: delegateEndInputOffset, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType, callFrameSize: term.parentheses.disjunction->m_callFrameSize); | 
| 1572 |                         } else { | 
| 1573 |                             unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; | 
| 1574 |                             atomParenthesesSubpatternBegin(subpatternId: term.parentheses.subpatternId, capture: term.invertOrCapture, inputPosition: delegateEndInputOffset - disjunctionAlreadyCheckedCount, frameLocation: term.frameLocation, alternativeFrameLocation: term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce); | 
| 1575 |                             emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: 0); | 
| 1576 |                             atomParenthesesEnd(doInline: true, lastSubpatternId: term.parentheses.lastSubpatternId, inputPosition: delegateEndInputOffset, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType, callFrameSize: term.parentheses.disjunction->m_callFrameSize); | 
| 1577 |                         } | 
| 1578 |                     } else { | 
| 1579 |                         unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked; | 
| 1580 |                         atomParenthesesSubpatternBegin(subpatternId: term.parentheses.subpatternId, capture: term.invertOrCapture, inputPosition: delegateEndInputOffset - disjunctionAlreadyCheckedCount, frameLocation: term.frameLocation, alternativeFrameLocation: 0); | 
| 1581 |                         emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: 0); | 
| 1582 |                         atomParenthesesEnd(doInline: false, lastSubpatternId: term.parentheses.lastSubpatternId, inputPosition: delegateEndInputOffset, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType, callFrameSize: term.parentheses.disjunction->m_callFrameSize); | 
| 1583 |                     } | 
| 1584 |                     break; | 
| 1585 |                 } | 
| 1586 |  | 
| 1587 |                 case PatternTerm::TypeParentheticalAssertion: { | 
| 1588 |                     unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion; | 
| 1589 |  | 
| 1590 |                     atomParentheticalAssertionBegin(subpatternId: term.parentheses.subpatternId, invert: term.invertOrCapture, frameLocation: term.frameLocation, alternativeFrameLocation); | 
| 1591 |                     emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: 0); | 
| 1592 |                     atomParenthesesEnd(doInline: true, lastSubpatternId: term.parentheses.lastSubpatternId, inputPosition: 0, frameLocation: term.frameLocation, quantityCount: term.quantityCount, quantityType: term.quantityType); | 
| 1593 |                     break; | 
| 1594 |                 } | 
| 1595 |                 } | 
| 1596 |             } | 
| 1597 |         } | 
| 1598 |     } | 
| 1599 |  | 
| 1600 | private: | 
| 1601 |     RegexPattern& m_pattern; | 
| 1602 |     ByteDisjunction* m_bodyDisjunction; | 
| 1603 |     unsigned m_currentAlternativeIndex; | 
| 1604 |     Vector<ParenthesesStackEntry> m_parenthesesStack; | 
| 1605 |     Vector<ByteDisjunction*> m_allParenthesesInfo; | 
| 1606 | }; | 
| 1607 |  | 
| 1608 |  | 
| 1609 | BytecodePattern* byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline) | 
| 1610 | { | 
| 1611 |     RegexPattern pattern(ignoreCase, multiline); | 
| 1612 |  | 
| 1613 |     if ((error = compileRegex(patternString, pattern))) | 
| 1614 |         return 0; | 
| 1615 |  | 
| 1616 |     numSubpatterns = pattern.m_numSubpatterns; | 
| 1617 |  | 
| 1618 |     return ByteCompiler(pattern).compile(); | 
| 1619 | } | 
| 1620 |  | 
| 1621 | int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output) | 
| 1622 | { | 
| 1623 |     return Interpreter(regex, output, input, start, length).interpret(); | 
| 1624 | } | 
| 1625 |  | 
| 1626 |  | 
| 1627 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter); | 
| 1628 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass); | 
| 1629 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference); | 
| 1630 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative); | 
| 1631 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion); | 
| 1632 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce); | 
| 1633 | COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses); | 
| 1634 |  | 
| 1635 |  | 
| 1636 | } } | 
| 1637 |  | 
| 1638 | #endif | 
| 1639 |  |