1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "RegexJIT.h"
28
29#include "ASCIICType.h"
30#include "JSGlobalData.h"
31#include "LinkBuffer.h"
32#include "MacroAssembler.h"
33#include "RegexCompiler.h"
34
35#include "pcre.h" // temporary, remove when fallback is removed.
36
37#if ENABLE(YARR_JIT)
38
39using namespace WTF;
40
41namespace JSC { namespace Yarr {
42
43
44class RegexGenerator : private MacroAssembler {
45 friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
46
47#if CPU(ARM)
48 static const RegisterID input = ARMRegisters::r0;
49 static const RegisterID index = ARMRegisters::r1;
50 static const RegisterID length = ARMRegisters::r2;
51 static const RegisterID output = ARMRegisters::r4;
52
53 static const RegisterID regT0 = ARMRegisters::r5;
54 static const RegisterID regT1 = ARMRegisters::r6;
55
56 static const RegisterID returnRegister = ARMRegisters::r0;
57#elif CPU(X86)
58 static const RegisterID input = X86Registers::eax;
59 static const RegisterID index = X86Registers::edx;
60 static const RegisterID length = X86Registers::ecx;
61 static const RegisterID output = X86Registers::edi;
62
63 static const RegisterID regT0 = X86Registers::ebx;
64 static const RegisterID regT1 = X86Registers::esi;
65
66 static const RegisterID returnRegister = X86Registers::eax;
67#elif CPU(X86_64)
68 static const RegisterID input = X86Registers::edi;
69 static const RegisterID index = X86Registers::esi;
70 static const RegisterID length = X86Registers::edx;
71 static const RegisterID output = X86Registers::ecx;
72
73 static const RegisterID regT0 = X86Registers::eax;
74 static const RegisterID regT1 = X86Registers::ebx;
75
76 static const RegisterID returnRegister = X86Registers::eax;
77#endif
78
79 void optimizeAlternative(PatternAlternative* alternative)
80 {
81 if (!alternative->m_terms.size())
82 return;
83
84 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
85 PatternTerm& term = alternative->m_terms[i];
86 PatternTerm& nextTerm = alternative->m_terms[i + 1];
87
88 if ((term.type == PatternTerm::TypeCharacterClass)
89 && (term.quantityType == QuantifierFixedCount)
90 && (nextTerm.type == PatternTerm::TypePatternCharacter)
91 && (nextTerm.quantityType == QuantifierFixedCount)) {
92 PatternTerm termCopy = term;
93 alternative->m_terms[i] = nextTerm;
94 alternative->m_terms[i + 1] = termCopy;
95 }
96 }
97 }
98
99 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
100 {
101 do {
102 // pick which range we're going to generate
103 int which = count >> 1;
104 char lo = ranges[which].begin;
105 char hi = ranges[which].end;
106
107 // check if there are any ranges or matches below lo. If not, just jl to failure -
108 // if there is anything else to check, check that first, if it falls through jmp to failure.
109 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
110 Jump loOrAbove = branch32(cond: GreaterThanOrEqual, left: character, right: Imm32((unsigned short)lo));
111
112 // generate code for all ranges before this one
113 if (which)
114 matchCharacterClassRange(character, failures, matchDest, ranges, count: which, matchIndex, matches, matchCount);
115
116 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
117 matchDest.append(jump: branch32(cond: Equal, left: character, right: Imm32((unsigned short)matches[*matchIndex])));
118 ++*matchIndex;
119 }
120 failures.append(jump: jump());
121
122 loOrAbove.link(masm: this);
123 } else if (which) {
124 Jump loOrAbove = branch32(cond: GreaterThanOrEqual, left: character, right: Imm32((unsigned short)lo));
125
126 matchCharacterClassRange(character, failures, matchDest, ranges, count: which, matchIndex, matches, matchCount);
127 failures.append(jump: jump());
128
129 loOrAbove.link(masm: this);
130 } else
131 failures.append(jump: branch32(cond: LessThan, left: character, right: Imm32((unsigned short)lo)));
132
133 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
134 ++*matchIndex;
135
136 matchDest.append(jump: branch32(cond: LessThanOrEqual, left: character, right: Imm32((unsigned short)hi)));
137 // fall through to here, the value is above hi.
138
139 // shuffle along & loop around if there are any more matches to handle.
140 unsigned next = which + 1;
141 ranges += next;
142 count -= next;
143 } while (count);
144 }
145
146 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
147 {
148 Jump unicodeFail;
149 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
150 Jump isAscii = branch32(cond: LessThanOrEqual, left: character, right: Imm32(0x7f));
151
152 if (charClass->m_matchesUnicode.size()) {
153 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
154 UChar ch = charClass->m_matchesUnicode[i];
155 matchDest.append(jump: branch32(cond: Equal, left: character, right: Imm32(ch)));
156 }
157 }
158
159 if (charClass->m_rangesUnicode.size()) {
160 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
161 UChar lo = charClass->m_rangesUnicode[i].begin;
162 UChar hi = charClass->m_rangesUnicode[i].end;
163
164 Jump below = branch32(cond: LessThan, left: character, right: Imm32(lo));
165 matchDest.append(jump: branch32(cond: LessThanOrEqual, left: character, right: Imm32(hi)));
166 below.link(masm: this);
167 }
168 }
169
170 unicodeFail = jump();
171 isAscii.link(masm: this);
172 }
173
174 if (charClass->m_ranges.size()) {
175 unsigned matchIndex = 0;
176 JumpList failures;
177 matchCharacterClassRange(character, failures, matchDest, ranges: charClass->m_ranges.begin(), count: charClass->m_ranges.size(), matchIndex: &matchIndex, matches: charClass->m_matches.begin(), matchCount: charClass->m_matches.size());
178 while (matchIndex < charClass->m_matches.size())
179 matchDest.append(jump: branch32(cond: Equal, left: character, right: Imm32((unsigned short)charClass->m_matches[matchIndex++])));
180
181 failures.link(masm: this);
182 } else if (charClass->m_matches.size()) {
183 // optimization: gather 'a','A' etc back together, can mask & test once.
184 Vector<char> matchesAZaz;
185
186 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
187 char ch = charClass->m_matches[i];
188 if (m_pattern.m_ignoreCase) {
189 if (isASCIILower(c: ch)) {
190 matchesAZaz.append(val: ch);
191 continue;
192 }
193 if (isASCIIUpper(c: ch))
194 continue;
195 }
196 matchDest.append(jump: branch32(cond: Equal, left: character, right: Imm32((unsigned short)ch)));
197 }
198
199 if (unsigned countAZaz = matchesAZaz.size()) {
200 or32(imm: Imm32(32), dest: character);
201 for (unsigned i = 0; i < countAZaz; ++i)
202 matchDest.append(jump: branch32(cond: Equal, left: character, right: Imm32(matchesAZaz[i])));
203 }
204 }
205
206 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
207 unicodeFail.link(masm: this);
208 }
209
210 // Jumps if input not available; will have (incorrectly) incremented already!
211 Jump jumpIfNoAvailableInput(unsigned countToCheck)
212 {
213 add32(imm: Imm32(countToCheck), dest: index);
214 return branch32(cond: Above, left: index, right: length);
215 }
216
217 Jump jumpIfAvailableInput(unsigned countToCheck)
218 {
219 add32(imm: Imm32(countToCheck), dest: index);
220 return branch32(cond: BelowOrEqual, left: index, right: length);
221 }
222
223 Jump checkInput()
224 {
225 return branch32(cond: BelowOrEqual, left: index, right: length);
226 }
227
228 Jump atEndOfInput()
229 {
230 return branch32(cond: Equal, left: index, right: length);
231 }
232
233 Jump notAtEndOfInput()
234 {
235 return branch32(cond: NotEqual, left: index, right: length);
236 }
237
238 Jump jumpIfCharEquals(UChar ch, int inputPosition)
239 {
240 return branch16(cond: Equal, left: BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), right: Imm32(ch));
241 }
242
243 Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
244 {
245 return branch16(cond: NotEqual, left: BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), right: Imm32(ch));
246 }
247
248 void readCharacter(int inputPosition, RegisterID reg)
249 {
250 load16(address: BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), dest: reg);
251 }
252
253 void storeToFrame(RegisterID reg, unsigned frameLocation)
254 {
255 poke(src: reg, index: frameLocation);
256 }
257
258 void storeToFrame(Imm32 imm, unsigned frameLocation)
259 {
260 poke(value: imm, index: frameLocation);
261 }
262
263 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
264 {
265 return storePtrWithPatch(initialValue: ImmPtr(0), address: Address(stackPointerRegister, frameLocation * sizeof(void*)));
266 }
267
268 void loadFromFrame(unsigned frameLocation, RegisterID reg)
269 {
270 peek(dest: reg, index: frameLocation);
271 }
272
273 void loadFromFrameAndJump(unsigned frameLocation)
274 {
275 jump(address: Address(stackPointerRegister, frameLocation * sizeof(void*)));
276 }
277
278 struct AlternativeBacktrackRecord {
279 DataLabelPtr dataLabel;
280 Label backtrackLocation;
281
282 AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
283 : dataLabel(dataLabel)
284 , backtrackLocation(backtrackLocation)
285 {
286 }
287 };
288
289 struct TermGenerationState {
290 TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
291 : disjunction(disjunction)
292 , checkedTotal(checkedTotal)
293 {
294 }
295
296 void resetAlternative()
297 {
298 isBackTrackGenerated = false;
299 alt = 0;
300 }
301 bool alternativeValid()
302 {
303 return alt < disjunction->m_alternatives.size();
304 }
305 void nextAlternative()
306 {
307 ++alt;
308 }
309 PatternAlternative* alternative()
310 {
311 return disjunction->m_alternatives[alt];
312 }
313
314 void resetTerm()
315 {
316 ASSERT(alternativeValid());
317 t = 0;
318 }
319 bool termValid()
320 {
321 ASSERT(alternativeValid());
322 return t < alternative()->m_terms.size();
323 }
324 void nextTerm()
325 {
326 ASSERT(alternativeValid());
327 ++t;
328 }
329 PatternTerm& term()
330 {
331 ASSERT(alternativeValid());
332 return alternative()->m_terms[t];
333 }
334
335 PatternTerm& lookaheadTerm()
336 {
337 ASSERT(alternativeValid());
338 ASSERT((t + 1) < alternative()->m_terms.size());
339 return alternative()->m_terms[t + 1];
340 }
341 bool isSinglePatternCharacterLookaheadTerm()
342 {
343 ASSERT(alternativeValid());
344 return ((t + 1) < alternative()->m_terms.size())
345 && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
346 && (lookaheadTerm().quantityType == QuantifierFixedCount)
347 && (lookaheadTerm().quantityCount == 1);
348 }
349
350 int inputOffset()
351 {
352 return term().inputPosition - checkedTotal;
353 }
354
355 void jumpToBacktrack(Jump jump, MacroAssembler* masm)
356 {
357 if (isBackTrackGenerated)
358 jump.linkTo(label: backtrackLabel, masm);
359 else
360 backTrackJumps.append(jump);
361 }
362 void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
363 {
364 if (isBackTrackGenerated)
365 jumps.linkTo(label: backtrackLabel, masm);
366 else
367 backTrackJumps.append(other&: jumps);
368 }
369 bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
370 {
371 if (isBackTrackGenerated) {
372 masm->jump(target: backtrackLabel);
373 return true;
374 }
375 return false;
376 }
377 void addBacktrackJump(Jump jump)
378 {
379 backTrackJumps.append(jump);
380 }
381 void setBacktrackGenerated(Label label)
382 {
383 isBackTrackGenerated = true;
384 backtrackLabel = label;
385 }
386 void linkAlternativeBacktracks(MacroAssembler* masm)
387 {
388 isBackTrackGenerated = false;
389 backTrackJumps.link(masm);
390 }
391 void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
392 {
393 isBackTrackGenerated = false;
394 backTrackJumps.linkTo(label, masm);
395 }
396 void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
397 {
398 jumpToBacktrack(jumps&: nestedParenthesesState.backTrackJumps, masm);
399 if (nestedParenthesesState.isBackTrackGenerated)
400 setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
401 }
402
403 PatternDisjunction* disjunction;
404 int checkedTotal;
405 private:
406 unsigned alt;
407 unsigned t;
408 JumpList backTrackJumps;
409 Label backtrackLabel;
410 bool isBackTrackGenerated;
411 };
412
413 void generateAssertionBOL(TermGenerationState& state)
414 {
415 PatternTerm& term = state.term();
416
417 if (m_pattern.m_multiline) {
418 const RegisterID character = regT0;
419
420 JumpList matchDest;
421 if (!term.inputPosition)
422 matchDest.append(jump: branch32(cond: Equal, left: index, right: Imm32(state.checkedTotal)));
423
424 readCharacter(inputPosition: state.inputOffset() - 1, reg: character);
425 matchCharacterClass(character, matchDest, charClass: m_pattern.newlineCharacterClass());
426 state.jumpToBacktrack(jump: jump(), masm: this);
427
428 matchDest.link(masm: this);
429 } else {
430 // Erk, really should poison out these alternatives early. :-/
431 if (term.inputPosition)
432 state.jumpToBacktrack(jump: jump(), masm: this);
433 else
434 state.jumpToBacktrack(jump: branch32(cond: NotEqual, left: index, right: Imm32(state.checkedTotal)), masm: this);
435 }
436 }
437
438 void generateAssertionEOL(TermGenerationState& state)
439 {
440 PatternTerm& term = state.term();
441
442 if (m_pattern.m_multiline) {
443 const RegisterID character = regT0;
444
445 JumpList matchDest;
446 if (term.inputPosition == state.checkedTotal)
447 matchDest.append(jump: atEndOfInput());
448
449 readCharacter(inputPosition: state.inputOffset(), reg: character);
450 matchCharacterClass(character, matchDest, charClass: m_pattern.newlineCharacterClass());
451 state.jumpToBacktrack(jump: jump(), masm: this);
452
453 matchDest.link(masm: this);
454 } else {
455 if (term.inputPosition == state.checkedTotal)
456 state.jumpToBacktrack(jump: notAtEndOfInput(), masm: this);
457 // Erk, really should poison out these alternatives early. :-/
458 else
459 state.jumpToBacktrack(jump: jump(), masm: this);
460 }
461 }
462
463 // Also falls though on nextIsNotWordChar.
464 void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
465 {
466 const RegisterID character = regT0;
467 PatternTerm& term = state.term();
468
469 if (term.inputPosition == state.checkedTotal)
470 nextIsNotWordChar.append(jump: atEndOfInput());
471
472 readCharacter(inputPosition: state.inputOffset(), reg: character);
473 matchCharacterClass(character, matchDest&: nextIsWordChar, charClass: m_pattern.wordcharCharacterClass());
474 }
475
476 void generateAssertionWordBoundary(TermGenerationState& state)
477 {
478 const RegisterID character = regT0;
479 PatternTerm& term = state.term();
480
481 Jump atBegin;
482 JumpList matchDest;
483 if (!term.inputPosition)
484 atBegin = branch32(cond: Equal, left: index, right: Imm32(state.checkedTotal));
485 readCharacter(inputPosition: state.inputOffset() - 1, reg: character);
486 matchCharacterClass(character, matchDest, charClass: m_pattern.wordcharCharacterClass());
487 if (!term.inputPosition)
488 atBegin.link(masm: this);
489
490 // We fall through to here if the last character was not a wordchar.
491 JumpList nonWordCharThenWordChar;
492 JumpList nonWordCharThenNonWordChar;
493 if (term.invertOrCapture) {
494 matchAssertionWordchar(state, nextIsWordChar&: nonWordCharThenNonWordChar, nextIsNotWordChar&: nonWordCharThenWordChar);
495 nonWordCharThenWordChar.append(jump: jump());
496 } else {
497 matchAssertionWordchar(state, nextIsWordChar&: nonWordCharThenWordChar, nextIsNotWordChar&: nonWordCharThenNonWordChar);
498 nonWordCharThenNonWordChar.append(jump: jump());
499 }
500 state.jumpToBacktrack(jumps&: nonWordCharThenNonWordChar, masm: this);
501
502 // We jump here if the last character was a wordchar.
503 matchDest.link(masm: this);
504 JumpList wordCharThenWordChar;
505 JumpList wordCharThenNonWordChar;
506 if (term.invertOrCapture) {
507 matchAssertionWordchar(state, nextIsWordChar&: wordCharThenNonWordChar, nextIsNotWordChar&: wordCharThenWordChar);
508 wordCharThenWordChar.append(jump: jump());
509 } else {
510 matchAssertionWordchar(state, nextIsWordChar&: wordCharThenWordChar, nextIsNotWordChar&: wordCharThenNonWordChar);
511 // This can fall-though!
512 }
513
514 state.jumpToBacktrack(jumps&: wordCharThenWordChar, masm: this);
515
516 nonWordCharThenWordChar.link(masm: this);
517 wordCharThenNonWordChar.link(masm: this);
518 }
519
520 void generatePatternCharacterSingle(TermGenerationState& state)
521 {
522 const RegisterID character = regT0;
523 UChar ch = state.term().patternCharacter;
524
525 if (m_pattern.m_ignoreCase && isASCIIAlpha(c: ch)) {
526 readCharacter(inputPosition: state.inputOffset(), reg: character);
527 or32(imm: Imm32(32), dest: character);
528 state.jumpToBacktrack(jump: branch32(cond: NotEqual, left: character, right: Imm32(Unicode::toLower(ch))), masm: this);
529 } else {
530 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
531 state.jumpToBacktrack(jump: jumpIfCharNotEquals(ch, inputPosition: state.inputOffset()), masm: this);
532 }
533 }
534
535 void generatePatternCharacterPair(TermGenerationState& state)
536 {
537 const RegisterID character = regT0;
538 UChar ch1 = state.term().patternCharacter;
539 UChar ch2 = state.lookaheadTerm().patternCharacter;
540
541 int mask = 0;
542 int chPair = ch1 | (ch2 << 16);
543
544 if (m_pattern.m_ignoreCase) {
545 if (isASCIIAlpha(c: ch1))
546 mask |= 32;
547 if (isASCIIAlpha(c: ch2))
548 mask |= 32 << 16;
549 }
550
551 if (mask) {
552 load32WithUnalignedHalfWords(address: BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), dest: character);
553 or32(imm: Imm32(mask), dest: character);
554 state.jumpToBacktrack(jump: branch32(cond: NotEqual, left: character, right: Imm32(chPair | mask)), masm: this);
555 } else
556 state.jumpToBacktrack(jump: branch32WithUnalignedHalfWords(cond: NotEqual, left: BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), right: Imm32(chPair)), masm: this);
557 }
558
559 void generatePatternCharacterFixed(TermGenerationState& state)
560 {
561 const RegisterID character = regT0;
562 const RegisterID countRegister = regT1;
563 PatternTerm& term = state.term();
564 UChar ch = term.patternCharacter;
565
566 move(src: index, dest: countRegister);
567 sub32(imm: Imm32(term.quantityCount), dest: countRegister);
568
569 Label loop(this);
570 if (m_pattern.m_ignoreCase && isASCIIAlpha(c: ch)) {
571 load16(address: BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), dest: character);
572 or32(imm: Imm32(32), dest: character);
573 state.jumpToBacktrack(jump: branch32(cond: NotEqual, left: character, right: Imm32(Unicode::toLower(ch))), masm: this);
574 } else {
575 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
576 state.jumpToBacktrack(jump: branch16(cond: NotEqual, left: BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), right: Imm32(ch)), masm: this);
577 }
578 add32(imm: Imm32(1), dest: countRegister);
579 branch32(cond: NotEqual, left: countRegister, right: index).linkTo(label: loop, masm: this);
580 }
581
582 void generatePatternCharacterGreedy(TermGenerationState& state)
583 {
584 const RegisterID character = regT0;
585 const RegisterID countRegister = regT1;
586 PatternTerm& term = state.term();
587 UChar ch = term.patternCharacter;
588
589 move(imm: Imm32(0), dest: countRegister);
590
591 JumpList failures;
592 Label loop(this);
593 failures.append(jump: atEndOfInput());
594 if (m_pattern.m_ignoreCase && isASCIIAlpha(c: ch)) {
595 readCharacter(inputPosition: state.inputOffset(), reg: character);
596 or32(imm: Imm32(32), dest: character);
597 failures.append(jump: branch32(cond: NotEqual, left: character, right: Imm32(Unicode::toLower(ch))));
598 } else {
599 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
600 failures.append(jump: jumpIfCharNotEquals(ch, inputPosition: state.inputOffset()));
601 }
602 add32(imm: Imm32(1), dest: countRegister);
603 add32(imm: Imm32(1), dest: index);
604 branch32(cond: NotEqual, left: countRegister, right: Imm32(term.quantityCount)).linkTo(label: loop, masm: this);
605 failures.append(jump: jump());
606
607 Label backtrackBegin(this);
608 loadFromFrame(frameLocation: term.frameLocation, reg: countRegister);
609 state.jumpToBacktrack(jump: branchTest32(cond: Zero, reg: countRegister), masm: this);
610 sub32(imm: Imm32(1), dest: countRegister);
611 sub32(imm: Imm32(1), dest: index);
612
613 failures.link(masm: this);
614
615 storeToFrame(reg: countRegister, frameLocation: term.frameLocation);
616
617 state.setBacktrackGenerated(backtrackBegin);
618 }
619
620 void generatePatternCharacterNonGreedy(TermGenerationState& state)
621 {
622 const RegisterID character = regT0;
623 const RegisterID countRegister = regT1;
624 PatternTerm& term = state.term();
625 UChar ch = term.patternCharacter;
626
627 move(imm: Imm32(0), dest: countRegister);
628
629 Jump firstTimeDoNothing = jump();
630
631 Label hardFail(this);
632 sub32(src: countRegister, dest: index);
633 state.jumpToBacktrack(jump: jump(), masm: this);
634
635 Label backtrackBegin(this);
636 loadFromFrame(frameLocation: term.frameLocation, reg: countRegister);
637
638 atEndOfInput().linkTo(label: hardFail, masm: this);
639 branch32(cond: Equal, op1: countRegister, imm: Imm32(term.quantityCount), target: hardFail);
640 if (m_pattern.m_ignoreCase && isASCIIAlpha(c: ch)) {
641 readCharacter(inputPosition: state.inputOffset(), reg: character);
642 or32(imm: Imm32(32), dest: character);
643 branch32(cond: NotEqual, left: character, right: Imm32(Unicode::toLower(ch))).linkTo(label: hardFail, masm: this);
644 } else {
645 ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
646 jumpIfCharNotEquals(ch, inputPosition: state.inputOffset()).linkTo(label: hardFail, masm: this);
647 }
648
649 add32(imm: Imm32(1), dest: countRegister);
650 add32(imm: Imm32(1), dest: index);
651
652 firstTimeDoNothing.link(masm: this);
653 storeToFrame(reg: countRegister, frameLocation: term.frameLocation);
654
655 state.setBacktrackGenerated(backtrackBegin);
656 }
657
658 void generateCharacterClassSingle(TermGenerationState& state)
659 {
660 const RegisterID character = regT0;
661 PatternTerm& term = state.term();
662
663 JumpList matchDest;
664 readCharacter(inputPosition: state.inputOffset(), reg: character);
665 matchCharacterClass(character, matchDest, charClass: term.characterClass);
666
667 if (term.invertOrCapture)
668 state.jumpToBacktrack(jumps&: matchDest, masm: this);
669 else {
670 state.jumpToBacktrack(jump: jump(), masm: this);
671 matchDest.link(masm: this);
672 }
673 }
674
675 void generateCharacterClassFixed(TermGenerationState& state)
676 {
677 const RegisterID character = regT0;
678 const RegisterID countRegister = regT1;
679 PatternTerm& term = state.term();
680
681 move(src: index, dest: countRegister);
682 sub32(imm: Imm32(term.quantityCount), dest: countRegister);
683
684 Label loop(this);
685 JumpList matchDest;
686 load16(address: BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), dest: character);
687 matchCharacterClass(character, matchDest, charClass: term.characterClass);
688
689 if (term.invertOrCapture)
690 state.jumpToBacktrack(jumps&: matchDest, masm: this);
691 else {
692 state.jumpToBacktrack(jump: jump(), masm: this);
693 matchDest.link(masm: this);
694 }
695
696 add32(imm: Imm32(1), dest: countRegister);
697 branch32(cond: NotEqual, left: countRegister, right: index).linkTo(label: loop, masm: this);
698 }
699
700 void generateCharacterClassGreedy(TermGenerationState& state)
701 {
702 const RegisterID character = regT0;
703 const RegisterID countRegister = regT1;
704 PatternTerm& term = state.term();
705
706 move(imm: Imm32(0), dest: countRegister);
707
708 JumpList failures;
709 Label loop(this);
710 failures.append(jump: atEndOfInput());
711
712 if (term.invertOrCapture) {
713 readCharacter(inputPosition: state.inputOffset(), reg: character);
714 matchCharacterClass(character, matchDest&: failures, charClass: term.characterClass);
715 } else {
716 JumpList matchDest;
717 readCharacter(inputPosition: state.inputOffset(), reg: character);
718 matchCharacterClass(character, matchDest, charClass: term.characterClass);
719 failures.append(jump: jump());
720 matchDest.link(masm: this);
721 }
722
723 add32(imm: Imm32(1), dest: countRegister);
724 add32(imm: Imm32(1), dest: index);
725 branch32(cond: NotEqual, left: countRegister, right: Imm32(term.quantityCount)).linkTo(label: loop, masm: this);
726 failures.append(jump: jump());
727
728 Label backtrackBegin(this);
729 loadFromFrame(frameLocation: term.frameLocation, reg: countRegister);
730 state.jumpToBacktrack(jump: branchTest32(cond: Zero, reg: countRegister), masm: this);
731 sub32(imm: Imm32(1), dest: countRegister);
732 sub32(imm: Imm32(1), dest: index);
733
734 failures.link(masm: this);
735
736 storeToFrame(reg: countRegister, frameLocation: term.frameLocation);
737
738 state.setBacktrackGenerated(backtrackBegin);
739 }
740
741 void generateCharacterClassNonGreedy(TermGenerationState& state)
742 {
743 const RegisterID character = regT0;
744 const RegisterID countRegister = regT1;
745 PatternTerm& term = state.term();
746
747 move(imm: Imm32(0), dest: countRegister);
748
749 Jump firstTimeDoNothing = jump();
750
751 Label hardFail(this);
752 sub32(src: countRegister, dest: index);
753 state.jumpToBacktrack(jump: jump(), masm: this);
754
755 Label backtrackBegin(this);
756 loadFromFrame(frameLocation: term.frameLocation, reg: countRegister);
757
758 atEndOfInput().linkTo(label: hardFail, masm: this);
759 branch32(cond: Equal, op1: countRegister, imm: Imm32(term.quantityCount), target: hardFail);
760
761 JumpList matchDest;
762 readCharacter(inputPosition: state.inputOffset(), reg: character);
763 matchCharacterClass(character, matchDest, charClass: term.characterClass);
764
765 if (term.invertOrCapture)
766 matchDest.linkTo(label: hardFail, masm: this);
767 else {
768 jump(target: hardFail);
769 matchDest.link(masm: this);
770 }
771
772 add32(imm: Imm32(1), dest: countRegister);
773 add32(imm: Imm32(1), dest: index);
774
775 firstTimeDoNothing.link(masm: this);
776 storeToFrame(reg: countRegister, frameLocation: term.frameLocation);
777
778 state.setBacktrackGenerated(backtrackBegin);
779 }
780
781 void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
782 {
783 ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
784 ASSERT(parenthesesTerm.quantityCount == 1);
785
786 PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
787 unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
788
789 if (disjunction->m_alternatives.size() == 1) {
790 state.resetAlternative();
791 ASSERT(state.alternativeValid());
792 PatternAlternative* alternative = state.alternative();
793 optimizeAlternative(alternative);
794
795 int countToCheck = alternative->m_minimumSize - preCheckedCount;
796 if (countToCheck) {
797 ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
798
799 // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
800 // will be forced to always trampoline into here, just to decrement the index.
801 // Ick.
802 Jump skip = jump();
803
804 Label backtrackBegin(this);
805 sub32(imm: Imm32(countToCheck), dest: index);
806 state.addBacktrackJump(jump: jump());
807
808 skip.link(masm: this);
809
810 state.setBacktrackGenerated(backtrackBegin);
811
812 state.jumpToBacktrack(jump: jumpIfNoAvailableInput(countToCheck), masm: this);
813 state.checkedTotal += countToCheck;
814 }
815
816 for (state.resetTerm(); state.termValid(); state.nextTerm())
817 generateTerm(state);
818
819 state.checkedTotal -= countToCheck;
820 } else {
821 JumpList successes;
822
823 for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
824
825 PatternAlternative* alternative = state.alternative();
826 optimizeAlternative(alternative);
827
828 ASSERT(alternative->m_minimumSize >= preCheckedCount);
829 int countToCheck = alternative->m_minimumSize - preCheckedCount;
830 if (countToCheck) {
831 state.addBacktrackJump(jump: jumpIfNoAvailableInput(countToCheck));
832 state.checkedTotal += countToCheck;
833 }
834
835 for (state.resetTerm(); state.termValid(); state.nextTerm())
836 generateTerm(state);
837
838 // Matched an alternative.
839 DataLabelPtr dataLabel = storeToFrameWithPatch(frameLocation: alternativeFrameLocation);
840 successes.append(jump: jump());
841
842 // Alternative did not match.
843 Label backtrackLocation(this);
844
845 // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
846 state.plantJumpToBacktrackIfExists(masm: this);
847
848 state.linkAlternativeBacktracks(masm: this);
849
850 if (countToCheck) {
851 sub32(imm: Imm32(countToCheck), dest: index);
852 state.checkedTotal -= countToCheck;
853 }
854
855 m_backtrackRecords.append(val: AlternativeBacktrackRecord(dataLabel, backtrackLocation));
856 }
857 // We fall through to here when the last alternative fails.
858 // Add a backtrack out of here for the parenthese handling code to link up.
859 state.addBacktrackJump(jump: jump());
860
861 // Generate a trampoline for the parens code to backtrack to, to retry the
862 // next alternative.
863 state.setBacktrackGenerated(label());
864 loadFromFrameAndJump(frameLocation: alternativeFrameLocation);
865
866 // FIXME: both of the above hooks are a little inefficient, in that you
867 // may end up trampolining here, just to trampoline back out to the
868 // parentheses code, or vice versa. We can probably eliminate a jump
869 // by restructuring, but coding this way for now for simplicity during
870 // development.
871
872 successes.link(masm: this);
873 }
874 }
875
876 void generateParenthesesSingle(TermGenerationState& state)
877 {
878 const RegisterID indexTemporary = regT0;
879 PatternTerm& term = state.term();
880 PatternDisjunction* disjunction = term.parentheses.disjunction;
881 ASSERT(term.quantityCount == 1);
882
883 unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
884
885 unsigned parenthesesFrameLocation = term.frameLocation;
886 unsigned alternativeFrameLocation = parenthesesFrameLocation;
887 if (term.quantityType != QuantifierFixedCount)
888 alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
889
890 // optimized case - no capture & no quantifier can be handled in a light-weight manner.
891 if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
892 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
893 generateParenthesesDisjunction(parenthesesTerm&: state.term(), state&: parenthesesState, alternativeFrameLocation);
894 // this expects that any backtracks back out of the parentheses will be in the
895 // parenthesesState's backTrackJumps vector, and that if they need backtracking
896 // they will have set an entry point on the parenthesesState's backtrackLabel.
897 state.propagateBacktrackingFrom(nestedParenthesesState&: parenthesesState, masm: this);
898 } else {
899 Jump nonGreedySkipParentheses;
900 Label nonGreedyTryParentheses;
901 if (term.quantityType == QuantifierGreedy)
902 storeToFrame(imm: Imm32(1), frameLocation: parenthesesFrameLocation);
903 else if (term.quantityType == QuantifierNonGreedy) {
904 storeToFrame(imm: Imm32(0), frameLocation: parenthesesFrameLocation);
905 nonGreedySkipParentheses = jump();
906 nonGreedyTryParentheses = label();
907 storeToFrame(imm: Imm32(1), frameLocation: parenthesesFrameLocation);
908 }
909
910 // store the match start index
911 if (term.invertOrCapture) {
912 int inputOffset = state.inputOffset() - preCheckedCount;
913 if (inputOffset) {
914 move(src: index, dest: indexTemporary);
915 add32(imm: Imm32(inputOffset), dest: indexTemporary);
916 store32(src: indexTemporary, address: Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
917 } else
918 store32(src: index, address: Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
919 }
920
921 // generate the body of the parentheses
922 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
923 generateParenthesesDisjunction(parenthesesTerm&: state.term(), state&: parenthesesState, alternativeFrameLocation);
924
925 // store the match end index
926 if (term.invertOrCapture) {
927 int inputOffset = state.inputOffset();
928 if (inputOffset) {
929 move(src: index, dest: indexTemporary);
930 add32(imm: Imm32(state.inputOffset()), dest: indexTemporary);
931 store32(src: indexTemporary, address: Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
932 } else
933 store32(src: index, address: Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
934 }
935 Jump success = jump();
936
937 // A failure AFTER the parens jumps here
938 Label backtrackFromAfterParens(this);
939
940 if (term.quantityType == QuantifierGreedy) {
941 // If this is zero we have now tested with both with and without the parens.
942 loadFromFrame(frameLocation: parenthesesFrameLocation, reg: indexTemporary);
943 state.jumpToBacktrack(jump: branchTest32(cond: Zero, reg: indexTemporary), masm: this);
944 } else if (term.quantityType == QuantifierNonGreedy) {
945 // If this is zero we have now tested with both with and without the parens.
946 loadFromFrame(frameLocation: parenthesesFrameLocation, reg: indexTemporary);
947 branchTest32(cond: Zero, reg: indexTemporary).linkTo(label: nonGreedyTryParentheses, masm: this);
948 }
949
950 parenthesesState.plantJumpToBacktrackIfExists(masm: this);
951 // A failure WITHIN the parens jumps here
952 parenthesesState.linkAlternativeBacktracks(masm: this);
953 if (term.invertOrCapture) {
954 store32(imm: Imm32(-1), address: Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
955 store32(imm: Imm32(-1), address: Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
956 }
957
958 if (term.quantityType == QuantifierGreedy)
959 storeToFrame(imm: Imm32(0), frameLocation: parenthesesFrameLocation);
960 else
961 state.jumpToBacktrack(jump: jump(), masm: this);
962
963 state.setBacktrackGenerated(backtrackFromAfterParens);
964 if (term.quantityType == QuantifierNonGreedy)
965 nonGreedySkipParentheses.link(masm: this);
966 success.link(masm: this);
967 }
968 }
969
970 void generateParentheticalAssertion(TermGenerationState& state)
971 {
972 PatternTerm& term = state.term();
973 PatternDisjunction* disjunction = term.parentheses.disjunction;
974 ASSERT(term.quantityCount == 1);
975 ASSERT(term.quantityType == QuantifierFixedCount);
976
977 unsigned parenthesesFrameLocation = term.frameLocation;
978 unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
979
980 int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
981
982 if (term.invertOrCapture) {
983 // Inverted case
984 storeToFrame(reg: index, frameLocation: parenthesesFrameLocation);
985
986 state.checkedTotal -= countCheckedAfterAssertion;
987 if (countCheckedAfterAssertion)
988 sub32(imm: Imm32(countCheckedAfterAssertion), dest: index);
989
990 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
991 generateParenthesesDisjunction(parenthesesTerm&: state.term(), state&: parenthesesState, alternativeFrameLocation);
992 // Success! - which means - Fail!
993 loadFromFrame(frameLocation: parenthesesFrameLocation, reg: index);
994 state.jumpToBacktrack(jump: jump(), masm: this);
995
996 // And fail means success.
997 parenthesesState.linkAlternativeBacktracks(masm: this);
998 loadFromFrame(frameLocation: parenthesesFrameLocation, reg: index);
999
1000 state.checkedTotal += countCheckedAfterAssertion;
1001 } else {
1002 // Normal case
1003 storeToFrame(reg: index, frameLocation: parenthesesFrameLocation);
1004
1005 state.checkedTotal -= countCheckedAfterAssertion;
1006 if (countCheckedAfterAssertion)
1007 sub32(imm: Imm32(countCheckedAfterAssertion), dest: index);
1008
1009 TermGenerationState parenthesesState(disjunction, state.checkedTotal);
1010 generateParenthesesDisjunction(parenthesesTerm&: state.term(), state&: parenthesesState, alternativeFrameLocation);
1011 // Success! - which means - Success!
1012 loadFromFrame(frameLocation: parenthesesFrameLocation, reg: index);
1013 Jump success = jump();
1014
1015 parenthesesState.linkAlternativeBacktracks(masm: this);
1016 loadFromFrame(frameLocation: parenthesesFrameLocation, reg: index);
1017 state.jumpToBacktrack(jump: jump(), masm: this);
1018
1019 success.link(masm: this);
1020
1021 state.checkedTotal += countCheckedAfterAssertion;
1022 }
1023 }
1024
1025 void generateTerm(TermGenerationState& state)
1026 {
1027 PatternTerm& term = state.term();
1028
1029 switch (term.type) {
1030 case PatternTerm::TypeAssertionBOL:
1031 generateAssertionBOL(state);
1032 break;
1033
1034 case PatternTerm::TypeAssertionEOL:
1035 generateAssertionEOL(state);
1036 break;
1037
1038 case PatternTerm::TypeAssertionWordBoundary:
1039 generateAssertionWordBoundary(state);
1040 break;
1041
1042 case PatternTerm::TypePatternCharacter:
1043 switch (term.quantityType) {
1044 case QuantifierFixedCount:
1045 if (term.quantityCount == 1) {
1046 if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
1047 generatePatternCharacterPair(state);
1048 state.nextTerm();
1049 } else
1050 generatePatternCharacterSingle(state);
1051 } else
1052 generatePatternCharacterFixed(state);
1053 break;
1054 case QuantifierGreedy:
1055 generatePatternCharacterGreedy(state);
1056 break;
1057 case QuantifierNonGreedy:
1058 generatePatternCharacterNonGreedy(state);
1059 break;
1060 }
1061 break;
1062
1063 case PatternTerm::TypeCharacterClass:
1064 switch (term.quantityType) {
1065 case QuantifierFixedCount:
1066 if (term.quantityCount == 1)
1067 generateCharacterClassSingle(state);
1068 else
1069 generateCharacterClassFixed(state);
1070 break;
1071 case QuantifierGreedy:
1072 generateCharacterClassGreedy(state);
1073 break;
1074 case QuantifierNonGreedy:
1075 generateCharacterClassNonGreedy(state);
1076 break;
1077 }
1078 break;
1079
1080 case PatternTerm::TypeBackReference:
1081 m_generationFailed = true;
1082 break;
1083
1084 case PatternTerm::TypeForwardReference:
1085 break;
1086
1087 case PatternTerm::TypeParenthesesSubpattern:
1088 if ((term.quantityCount == 1) && !term.parentheses.isCopy)
1089 generateParenthesesSingle(state);
1090 else
1091 m_generationFailed = true;
1092 break;
1093
1094 case PatternTerm::TypeParentheticalAssertion:
1095 generateParentheticalAssertion(state);
1096 break;
1097 }
1098 }
1099
1100 void generateDisjunction(PatternDisjunction* disjunction)
1101 {
1102 TermGenerationState state(disjunction, 0);
1103 state.resetAlternative();
1104
1105 // Plant a check to see if there is sufficient input available to run the first alternative.
1106 // Jumping back to the label 'firstAlternative' will get to this check, jumping to
1107 // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
1108 // skipped this check.
1109
1110 Label firstAlternative(this);
1111
1112 // check availability for the next alternative
1113 int countCheckedForCurrentAlternative = 0;
1114 int countToCheckForFirstAlternative = 0;
1115 bool hasShorterAlternatives = false;
1116 JumpList notEnoughInputForPreviousAlternative;
1117
1118 if (state.alternativeValid()) {
1119 PatternAlternative* alternative = state.alternative();
1120 countToCheckForFirstAlternative = alternative->m_minimumSize;
1121 state.checkedTotal += countToCheckForFirstAlternative;
1122 if (countToCheckForFirstAlternative)
1123 notEnoughInputForPreviousAlternative.append(jump: jumpIfNoAvailableInput(countToCheck: countToCheckForFirstAlternative));
1124 countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
1125 }
1126
1127 Label firstAlternativeInputChecked(this);
1128
1129 while (state.alternativeValid()) {
1130 // Track whether any alternatives are shorter than the first one.
1131 hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
1132
1133 PatternAlternative* alternative = state.alternative();
1134 optimizeAlternative(alternative);
1135
1136 for (state.resetTerm(); state.termValid(); state.nextTerm())
1137 generateTerm(state);
1138
1139 // If we get here, the alternative matched.
1140 if (m_pattern.m_body->m_callFrameSize)
1141 addPtr(imm: Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), srcDest: stackPointerRegister);
1142
1143 ASSERT(index != returnRegister);
1144 if (m_pattern.m_body->m_hasFixedSize) {
1145 move(src: index, dest: returnRegister);
1146 if (alternative->m_minimumSize)
1147 sub32(imm: Imm32(alternative->m_minimumSize), dest: returnRegister);
1148 } else
1149 pop(dest: returnRegister);
1150 store32(src: index, address: Address(output, 4));
1151 store32(src: returnRegister, address: output);
1152
1153 generateReturn();
1154
1155 state.nextAlternative();
1156
1157 // if there are any more alternatives, plant the check for input before looping.
1158 if (state.alternativeValid()) {
1159 PatternAlternative* nextAlternative = state.alternative();
1160 int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
1161
1162 if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
1163 // If we get here, there the last input checked failed.
1164 notEnoughInputForPreviousAlternative.link(masm: this);
1165
1166 // Check if sufficent input available to run the next alternative
1167 notEnoughInputForPreviousAlternative.append(jump: jumpIfNoAvailableInput(countToCheck: countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1168 // We are now in the correct state to enter the next alternative; this add is only required
1169 // to mirror and revert operation of the sub32, just below.
1170 add32(imm: Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), dest: index);
1171
1172 // If we get here, there the last input checked passed.
1173 state.linkAlternativeBacktracks(masm: this);
1174 // No need to check if we can run the next alternative, since it is shorter -
1175 // just update index.
1176 sub32(imm: Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), dest: index);
1177 } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
1178 // If we get here, there the last input checked failed.
1179 // If there is insufficient input to run the current alternative, and the next alternative is longer,
1180 // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
1181 // we had checked.
1182 notEnoughInputForPreviousAlternative.link(masm: this);
1183 add32(imm: Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), dest: index);
1184 notEnoughInputForPreviousAlternative.append(jump: jump());
1185
1186 // The next alternative is longer than the current one; check the difference.
1187 state.linkAlternativeBacktracks(masm: this);
1188 notEnoughInputForPreviousAlternative.append(jump: jumpIfNoAvailableInput(countToCheck: countToCheckForNextAlternative - countCheckedForCurrentAlternative));
1189 } else { // CASE 3: Both alternatives are the same length.
1190 ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
1191
1192 // If the next alterative is the same length as this one, then no need to check the input -
1193 // if there was sufficent input to run the current alternative then there is sufficient
1194 // input to run the next one; if not, there isn't.
1195 state.linkAlternativeBacktracks(masm: this);
1196 }
1197
1198 state.checkedTotal -= countCheckedForCurrentAlternative;
1199 countCheckedForCurrentAlternative = countToCheckForNextAlternative;
1200 state.checkedTotal += countCheckedForCurrentAlternative;
1201 }
1202 }
1203
1204 // If we get here, all Alternatives failed...
1205
1206 state.checkedTotal -= countCheckedForCurrentAlternative;
1207
1208 // How much more input need there be to be able to retry from the first alternative?
1209 // examples:
1210 // /yarr_jit/ or /wrec|pcre/
1211 // In these examples we need check for one more input before looping.
1212 // /yarr_jit|pcre/
1213 // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
1214 // being four longer than the last alternative checked, and another +1 to effectively move
1215 // the start position along by one).
1216 // /yarr|rules/ or /wrec|notsomuch/
1217 // In these examples, provided that there was sufficient input to have just been matching for
1218 // the second alternative we can loop without checking for available input (since the second
1219 // alternative is longer than the first). In the latter example we need to decrement index
1220 // (by 4) so the start position is only progressed by 1 from the last iteration.
1221 int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
1222
1223 // First, deal with the cases where there was sufficient input to try the last alternative.
1224 if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
1225 state.linkAlternativeBacktracks(masm: this);
1226 else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
1227 state.linkAlternativeBacktracksTo(label: firstAlternativeInputChecked, masm: this);
1228 else { // no need to check the input, but we do have some bookkeeping to do first.
1229 state.linkAlternativeBacktracks(masm: this);
1230
1231 // Where necessary update our preserved start position.
1232 if (!m_pattern.m_body->m_hasFixedSize) {
1233 move(src: index, dest: regT0);
1234 sub32(imm: Imm32(countCheckedForCurrentAlternative - 1), dest: regT0);
1235 poke(src: regT0, index: m_pattern.m_body->m_callFrameSize);
1236 }
1237
1238 // Update index if necessary, and loop (without checking).
1239 if (incrementForNextIter)
1240 add32(imm: Imm32(incrementForNextIter), dest: index);
1241 jump().linkTo(label: firstAlternativeInputChecked, masm: this);
1242 }
1243
1244 notEnoughInputForPreviousAlternative.link(masm: this);
1245 // Update our idea of the start position, if we're tracking this.
1246 if (!m_pattern.m_body->m_hasFixedSize) {
1247 if (countCheckedForCurrentAlternative - 1) {
1248 move(src: index, dest: regT0);
1249 sub32(imm: Imm32(countCheckedForCurrentAlternative - 1), dest: regT0);
1250 poke(src: regT0, index: m_pattern.m_body->m_callFrameSize);
1251 } else
1252 poke(src: index, index: m_pattern.m_body->m_callFrameSize);
1253 }
1254 // Check if there is sufficent input to run the first alternative again.
1255 jumpIfAvailableInput(countToCheck: incrementForNextIter).linkTo(label: firstAlternativeInputChecked, masm: this);
1256 // No - insufficent input to run the first alteranative, are there any other alternatives we
1257 // might need to check? If so, the last check will have left the index incremented by
1258 // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
1259 // LESS input is available, to have the effect of just progressing the start position by 1
1260 // from the last iteration. If this check passes we can just jump up to the check associated
1261 // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
1262 // first alternative again, and this check will fail (otherwise the check planted just above
1263 // here would have passed). This is a bit sad, however it saves trying to do something more
1264 // complex here in compilation, and in the common case we should end up coallescing the checks.
1265 //
1266 // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
1267 // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
1268 // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
1269 // is sufficient input to run either alternative (constantly failing). If there had been only
1270 // one alternative, or if the shorter alternative had come first, we would have terminated
1271 // immediately. :-/
1272 if (hasShorterAlternatives)
1273 jumpIfAvailableInput(countToCheck: -countToCheckForFirstAlternative).linkTo(label: firstAlternative, masm: this);
1274 // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
1275 // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
1276 // but since we're about to return a failure this doesn't really matter!)
1277
1278 unsigned frameSize = m_pattern.m_body->m_callFrameSize;
1279 if (!m_pattern.m_body->m_hasFixedSize)
1280 ++frameSize;
1281 if (frameSize)
1282 addPtr(imm: Imm32(frameSize * sizeof(void*)), srcDest: stackPointerRegister);
1283
1284 move(imm: Imm32(-1), dest: returnRegister);
1285
1286 generateReturn();
1287 }
1288
1289 void generateEnter()
1290 {
1291#if CPU(X86_64)
1292 push(src: X86Registers::ebp);
1293 move(src: stackPointerRegister, dest: X86Registers::ebp);
1294 push(src: X86Registers::ebx);
1295#elif CPU(X86)
1296 push(X86Registers::ebp);
1297 move(stackPointerRegister, X86Registers::ebp);
1298 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
1299 push(X86Registers::ebx);
1300 push(X86Registers::edi);
1301 push(X86Registers::esi);
1302 // load output into edi (2 = saved ebp + return address).
1303 #if COMPILER(MSVC)
1304 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
1305 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
1306 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
1307 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
1308 #else
1309 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
1310 #endif
1311#elif CPU(ARM)
1312 push(ARMRegisters::r4);
1313 push(ARMRegisters::r5);
1314 push(ARMRegisters::r6);
1315#if CPU(ARM_TRADITIONAL)
1316 push(ARMRegisters::r8); // scratch register
1317#endif
1318 move(ARMRegisters::r3, output);
1319#endif
1320 }
1321
1322 void generateReturn()
1323 {
1324#if CPU(X86_64)
1325 pop(dest: X86Registers::ebx);
1326 pop(dest: X86Registers::ebp);
1327#elif CPU(X86)
1328 pop(X86Registers::esi);
1329 pop(X86Registers::edi);
1330 pop(X86Registers::ebx);
1331 pop(X86Registers::ebp);
1332#elif CPU(ARM)
1333#if CPU(ARM_TRADITIONAL)
1334 pop(ARMRegisters::r8); // scratch register
1335#endif
1336 pop(ARMRegisters::r6);
1337 pop(ARMRegisters::r5);
1338 pop(ARMRegisters::r4);
1339#endif
1340 ret();
1341 }
1342
1343public:
1344 RegexGenerator(RegexPattern& pattern)
1345 : m_pattern(pattern)
1346 , m_generationFailed(false)
1347 {
1348 }
1349
1350 void generate()
1351 {
1352 generateEnter();
1353
1354 // TODO: do I really want this on the stack?
1355 if (!m_pattern.m_body->m_hasFixedSize)
1356 push(src: index);
1357
1358 if (m_pattern.m_body->m_callFrameSize)
1359 subPtr(imm: Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), dest: stackPointerRegister);
1360
1361 generateDisjunction(disjunction: m_pattern.m_body);
1362 }
1363
1364 void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
1365 {
1366 generate();
1367
1368 LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(n: size()));
1369
1370 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
1371 patchBuffer.patch(label: m_backtrackRecords[i].dataLabel, value: patchBuffer.locationOf(label: m_backtrackRecords[i].backtrackLocation));
1372
1373 jitObject.set(patchBuffer.finalizeCode());
1374 }
1375
1376 bool generationFailed()
1377 {
1378 return m_generationFailed;
1379 }
1380
1381private:
1382 RegexPattern& m_pattern;
1383 Vector<AlternativeBacktrackRecord> m_backtrackRecords;
1384 bool m_generationFailed;
1385};
1386
1387void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
1388{
1389 RegexPattern pattern(ignoreCase, multiline);
1390
1391 if ((error = compileRegex(patternString, pattern)))
1392 return;
1393
1394 numSubpatterns = pattern.m_numSubpatterns;
1395
1396 RegexGenerator generator(pattern);
1397 generator.compile(globalData, jitObject);
1398
1399 if (generator.generationFailed()) {
1400 JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
1401 JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
1402 jitObject.setFallback(jsRegExpCompile(pattern: reinterpret_cast<const UChar*>(patternString.data()), patternLength: patternString.size(), ignoreCaseOption, multilineOption, numSubpatterns: &numSubpatterns, errorMessage: &error));
1403 }
1404}
1405
1406}}
1407
1408#endif
1409
1410
1411
1412
1413
1414

source code of qtscript/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp