1/*
2 * Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "YarrJIT.h"
28
29#include <wtf/ASCIICType.h>
30#include "LinkBuffer.h"
31#include "Options.h"
32#include "VM.h"
33#include "Yarr.h"
34#include "YarrCanonicalize.h"
35
36#include <private/qv4functiontable_p.h>
37
38#if ENABLE(YARR_JIT)
39
40namespace JSC { namespace Yarr {
41
42template<YarrJITCompileMode compileMode>
43class YarrGenerator : private DefaultMacroAssembler {
44
45#if CPU(ARM_THUMB2)
46 static const RegisterID input = ARMRegisters::r0;
47 static const RegisterID index = ARMRegisters::r1;
48 static const RegisterID length = ARMRegisters::r2;
49 static const RegisterID output = ARMRegisters::r3;
50
51 static const RegisterID regT0 = ARMRegisters::r4;
52 static const RegisterID regT1 = ARMRegisters::r5;
53 static const RegisterID initialStart = ARMRegisters::r8;
54
55 static const RegisterID returnRegister = ARMRegisters::r0;
56 static const RegisterID returnRegister2 = ARMRegisters::r1;
57
58#define HAVE_INITIAL_START_REG
59#elif CPU(ARM64)
60 // Argument registers
61 static const RegisterID input = ARM64Registers::x0;
62 static const RegisterID index = ARM64Registers::x1;
63 static const RegisterID length = ARM64Registers::x2;
64 static const RegisterID output = ARM64Registers::x3;
65 static const RegisterID freelistRegister = ARM64Registers::x4;
66 static const RegisterID freelistSizeRegister = ARM64Registers::x5;
67
68 // Scratch registers
69 static const RegisterID regT0 = ARM64Registers::x6;
70 static const RegisterID regT1 = ARM64Registers::x7;
71 static const RegisterID regT2 = ARM64Registers::x8;
72 static const RegisterID remainingMatchCount = ARM64Registers::x9;
73 static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
74 static const RegisterID initialStart = ARM64Registers::x11;
75 static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
76 static const RegisterID surrogateTagMask = ARM64Registers::x13;
77 static const RegisterID leadingSurrogateTag = ARM64Registers::x14;
78 static const RegisterID trailingSurrogateTag = ARM64Registers::x15;
79
80 static const RegisterID returnRegister = ARM64Registers::x0;
81 static const RegisterID returnRegister2 = ARM64Registers::x1;
82
83#define HAVE_INITIAL_START_REG
84#define JIT_UNICODE_EXPRESSIONS
85#elif CPU(MIPS)
86 static const RegisterID input = MIPSRegisters::a0;
87 static const RegisterID index = MIPSRegisters::a1;
88 static const RegisterID length = MIPSRegisters::a2;
89 static const RegisterID output = MIPSRegisters::a3;
90
91 static const RegisterID regT0 = MIPSRegisters::t4;
92 static const RegisterID regT1 = MIPSRegisters::t5;
93 static const RegisterID initialStart = MIPSRegisters::t6;
94
95 static const RegisterID returnRegister = MIPSRegisters::v0;
96 static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98#define HAVE_INITIAL_START_REG
99#elif CPU(X86)
100 static const RegisterID input = X86Registers::eax;
101 static const RegisterID index = X86Registers::edx;
102 static const RegisterID length = X86Registers::ecx;
103 static const RegisterID output = X86Registers::edi;
104
105 static const RegisterID regT0 = X86Registers::ebx;
106 static const RegisterID regT1 = X86Registers::esi;
107
108 static const RegisterID returnRegister = X86Registers::eax;
109 static const RegisterID returnRegister2 = X86Registers::edx;
110#elif CPU(X86_64)
111#if !OS(WINDOWS)
112 // Argument registers
113 static const RegisterID input = X86Registers::edi;
114 static const RegisterID index = X86Registers::esi;
115 static const RegisterID length = X86Registers::edx;
116 static const RegisterID output = X86Registers::ecx;
117 static const RegisterID freelistRegister = X86Registers::r8;
118 static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119#else
120 // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121 // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122 COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123 static const RegisterID input = X86Registers::edx;
124 static const RegisterID index = X86Registers::r8;
125 static const RegisterID length = X86Registers::r9;
126 static const RegisterID output = X86Registers::r10;
127#endif
128
129 // Scratch registers
130 static const RegisterID regT0 = X86Registers::eax;
131#if !OS(WINDOWS)
132 static const RegisterID regT1 = X86Registers::r9;
133 static const RegisterID regT2 = X86Registers::r10;
134#else
135 static const RegisterID regT1 = X86Registers::ecx;
136 static const RegisterID regT2 = X86Registers::edi;
137#endif
138
139 static const RegisterID initialStart = X86Registers::ebx;
140#if !OS(WINDOWS)
141 static const RegisterID remainingMatchCount = X86Registers::r12;
142#else
143 static const RegisterID remainingMatchCount = X86Registers::esi;
144#endif
145 static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146 static const RegisterID leadingSurrogateTag = X86Registers::r14;
147 static const RegisterID trailingSurrogateTag = X86Registers::r15;
148
149 static const RegisterID returnRegister = X86Registers::eax;
150 static const RegisterID returnRegister2 = X86Registers::edx;
151
152 const TrustedImm32 supplementaryPlanesBase = TrustedImm32(0x10000);
153 const TrustedImm32 surrogateTagMask = TrustedImm32(0xfffffc00);
154#define HAVE_INITIAL_START_REG
155#define JIT_UNICODE_EXPRESSIONS
156#endif
157
158#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
159 struct ParenContextSizes {
160 size_t m_numSubpatterns;
161 size_t m_frameSlots;
162
163 ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
164 : m_numSubpatterns(numSubpatterns)
165 , m_frameSlots(frameSlots)
166 {
167 }
168
169 size_t numSubpatterns() { return m_numSubpatterns; }
170
171 size_t frameSlots() { return m_frameSlots; }
172 };
173
174 struct ParenContext {
175 struct ParenContext* next;
176 uint32_t begin;
177 uint32_t matchAmount;
178 uintptr_t returnAddress;
179#if OS(INTEGRITY)
180 union {
181 struct Subpatterns {
182 unsigned start;
183 unsigned end;
184 } subpatterns[1];
185 uintptr_t frameSlots[1];
186 };
187#else
188 struct Subpatterns {
189 unsigned start;
190 unsigned end;
191 } subpatterns[0];
192 uintptr_t frameSlots[0];
193#endif
194
195 static size_t sizeFor(ParenContextSizes& parenContextSizes)
196 {
197 return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
198 }
199
200 static ptrdiff_t nextOffset()
201 {
202 return offsetof(ParenContext, next);
203 }
204
205 static ptrdiff_t beginOffset()
206 {
207 return offsetof(ParenContext, begin);
208 }
209
210 static ptrdiff_t matchAmountOffset()
211 {
212 return offsetof(ParenContext, matchAmount);
213 }
214
215 static ptrdiff_t returnAddressOffset()
216 {
217 return offsetof(ParenContext, returnAddress);
218 }
219
220 static ptrdiff_t subpatternOffset(size_t subpattern)
221 {
222 return offsetof(ParenContext, subpatterns) + (subpattern - 1) * sizeof(Subpatterns);
223 }
224
225 static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
226 {
227 return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
228 }
229 };
230
231 void initParenContextFreeList()
232 {
233 RegisterID parenContextPointer = regT0;
234 RegisterID nextParenContextPointer = regT2;
235
236 size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
237
238 parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(x: parenContextSize);
239
240 // Check that the paren context is a reasonable size.
241 if (parenContextSize > INT16_MAX)
242 m_abortExecution.append(jump());
243
244 Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
245 move(freelistRegister, parenContextPointer);
246 addPtr(TrustedImm32(parenContextSize), freelistRegister, nextParenContextPointer);
247 addPtr(freelistRegister, freelistSizeRegister);
248 subPtr(TrustedImm32(parenContextSize), freelistSizeRegister);
249
250 Label loopTop(this);
251 Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
252 storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
253 move(nextParenContextPointer, parenContextPointer);
254 addPtr(TrustedImm32(parenContextSize), parenContextPointer, nextParenContextPointer);
255 jump(loopTop);
256
257 initDone.link(masm: this);
258 storePtr(TrustedImmPtr(nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
259 emptyFreeList.link(masm: this);
260 }
261
262 void allocateParenContext(RegisterID result)
263 {
264 m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
265 sub32(TrustedImm32(1), remainingMatchCount);
266 m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
267 move(freelistRegister, result);
268 loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
269 }
270
271 void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
272 {
273 loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
274 storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
275 move(headPtrRegister, freelistRegister);
276 }
277
278 void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
279 {
280 store32(index, Address(parenContextReg, ParenContext::beginOffset()));
281 loadFromFrame(frameLocation: subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: tempReg);
282 store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
283 loadFromFrame(frameLocation: subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), reg: tempReg);
284 storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
285 if (compileMode == IncludeSubpatterns) {
286 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
287 loadPtr(Address(output, (subpattern << 1) * sizeof(unsigned)), tempReg);
288 storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
289 clearSubpatternStart(subpattern);
290 }
291 }
292 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
293 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
294 loadFromFrame(frameLocation, reg: tempReg);
295 storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
296 }
297 }
298
299 void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
300 {
301 load32(Address(parenContextReg, ParenContext::beginOffset()), index);
302 storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
303 load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
304 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
305 loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
306 storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
307 if (compileMode == IncludeSubpatterns) {
308 for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
309 loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
310 storePtr(tempReg, Address(output, (subpattern << 1) * sizeof(unsigned)));
311 }
312 }
313 subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
314 for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
315 loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
316 storeToFrame(tempReg, frameLocation);
317 }
318 }
319#endif
320
321 void optimizeAlternative(PatternAlternative* alternative)
322 {
323 if (!alternative->m_terms.size())
324 return;
325
326 for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
327 PatternTerm& term = alternative->m_terms[i];
328 PatternTerm& nextTerm = alternative->m_terms[i + 1];
329
330 // We can move BMP only character classes after fixed character terms.
331 if ((term.type == PatternTerm::TypeCharacterClass)
332 && (term.quantityType == QuantifierFixedCount)
333 && (!m_decodeSurrogatePairs || (!term.characterClass->m_hasNonBMPCharacters && !term.m_invert))
334 && (nextTerm.type == PatternTerm::TypePatternCharacter)
335 && (nextTerm.quantityType == QuantifierFixedCount)) {
336 PatternTerm termCopy = term;
337 alternative->m_terms[i] = nextTerm;
338 alternative->m_terms[i + 1] = termCopy;
339 }
340 }
341 }
342
343 void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
344 {
345 do {
346 // pick which range we're going to generate
347 int which = count >> 1;
348 char lo = ranges[which].begin;
349 char hi = ranges[which].end;
350
351 // check if there are any ranges or matches below lo. If not, just jl to failure -
352 // if there is anything else to check, check that first, if it falls through jmp to failure.
353 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
354 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
355
356 // generate code for all ranges before this one
357 if (which)
358 matchCharacterClassRange(character, failures, matchDest, ranges, count: which, matchIndex, matches, matchCount);
359
360 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
361 matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
362 ++*matchIndex;
363 }
364 failures.append(jump());
365
366 loOrAbove.link(masm: this);
367 } else if (which) {
368 Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
369
370 matchCharacterClassRange(character, failures, matchDest, ranges, count: which, matchIndex, matches, matchCount);
371 failures.append(jump());
372
373 loOrAbove.link(masm: this);
374 } else
375 failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
376
377 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
378 ++*matchIndex;
379
380 matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
381 // fall through to here, the value is above hi.
382
383 // shuffle along & loop around if there are any more matches to handle.
384 unsigned next = which + 1;
385 ranges += next;
386 count -= next;
387 } while (count);
388 }
389
390 void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
391 {
392 if (charClass->m_table && !m_decodeSurrogatePairs) {
393 ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
394 matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
395 return;
396 }
397 JumpList unicodeFail;
398 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
399 JumpList isAscii;
400 if (charClass->m_matches.size() || charClass->m_ranges.size())
401 isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32(0x7f)));
402
403 if (charClass->m_matchesUnicode.size()) {
404 for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
405 UChar32 ch = charClass->m_matchesUnicode[i];
406 matchDest.append(branch32(Equal, character, Imm32(ch)));
407 }
408 }
409
410 if (charClass->m_rangesUnicode.size()) {
411 for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
412 UChar32 lo = charClass->m_rangesUnicode[i].begin;
413 UChar32 hi = charClass->m_rangesUnicode[i].end;
414
415 Jump below = branch32(LessThan, character, Imm32(lo));
416 matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
417 below.link(masm: this);
418 }
419 }
420
421 if (charClass->m_matches.size() || charClass->m_ranges.size())
422 unicodeFail = jump();
423 isAscii.link(masm: this);
424 }
425
426 if (charClass->m_ranges.size()) {
427 unsigned matchIndex = 0;
428 JumpList failures;
429 matchCharacterClassRange(character, failures, matchDest, ranges: charClass->m_ranges.data(), count: charClass->m_ranges.size(),
430 matchIndex: &matchIndex, matches: charClass->m_matches.data(), matchCount: charClass->m_matches.size());
431 while (matchIndex < charClass->m_matches.size())
432 matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
433
434 failures.link(masm: this);
435 } else if (charClass->m_matches.size()) {
436 // optimization: gather 'a','A' etc back together, can mask & test once.
437 Vector<char> matchesAZaz;
438
439 for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
440 char ch = charClass->m_matches[i];
441 if (m_pattern.ignoreCase()) {
442 if (isASCIILower(c: ch)) {
443 matchesAZaz.append(value: ch);
444 continue;
445 }
446 if (isASCIIUpper(c: ch))
447 continue;
448 }
449 matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
450 }
451
452 if (unsigned countAZaz = matchesAZaz.size()) {
453 or32(TrustedImm32(32), character);
454 for (unsigned i = 0; i < countAZaz; ++i)
455 matchDest.append(branch32(Equal, character, TrustedImm32(matchesAZaz[i])));
456 }
457 }
458
459 if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
460 unicodeFail.link(masm: this);
461 }
462
463 // Jumps if input not available; will have (incorrectly) incremented already!
464 Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
465 {
466 if (countToCheck)
467 add32(Imm32(countToCheck), index);
468 return branch32(Above, index, length);
469 }
470
471 Jump jumpIfAvailableInput(unsigned countToCheck)
472 {
473 add32(Imm32(countToCheck), index);
474 return branch32(BelowOrEqual, index, length);
475 }
476
477 Jump checkNotEnoughInput(RegisterID additionalAmount)
478 {
479 add32(index, additionalAmount);
480 return branch32(Above, additionalAmount, length);
481 }
482
483 Jump checkInput()
484 {
485 return branch32(BelowOrEqual, index, length);
486 }
487
488 Jump atEndOfInput()
489 {
490 return branch32(Equal, index, length);
491 }
492
493 Jump notAtEndOfInput()
494 {
495 return branch32(NotEqual, index, length);
496 }
497
498 BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
499 {
500 RegisterID base = input;
501
502 // BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
503 // expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
504 // for addressing in extreme cases where we might underflow. Therefore we check to see if
505 // negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
506 // If so, we do our own address calculating by adjusting the base, using the result register
507 // as a temp address register.
508 unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? 0x7fffffff : 0x3fffffff;
509 unsigned offsetAdjustAmount = 0x40000000;
510 if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
511 base = tempReg;
512 move(input, base);
513 while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
514 subPtr(TrustedImm32(offsetAdjustAmount), base);
515 if (m_charSize != Char8)
516 subPtr(TrustedImm32(offsetAdjustAmount), base);
517 negativeCharacterOffset -= offsetAdjustAmount;
518 }
519 }
520
521 Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
522
523 if (m_charSize == Char8)
524 return BaseIndex(input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
525
526 return BaseIndex(input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
527 }
528
529#ifdef JIT_UNICODE_EXPRESSIONS
530 void tryReadUnicodeCharImpl(RegisterID resultReg)
531 {
532 ASSERT(m_charSize == Char16);
533
534 JumpList notUnicode;
535 load16Unaligned(regUnicodeInputAndTrail, resultReg);
536 and32(surrogateTagMask, resultReg, regT2);
537 notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
538 addPtr(TrustedImm32(2), regUnicodeInputAndTrail);
539 getEffectiveAddress(address: BaseIndex(input, length, TimesTwo), dest: regT2);
540 notUnicode.append(branch32(AboveOrEqual, regUnicodeInputAndTrail, regT2));
541 load16Unaligned(Address(regUnicodeInputAndTrail), regUnicodeInputAndTrail);
542 and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
543 notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
544 sub32(leadingSurrogateTag, resultReg);
545 sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
546 lshift32(TrustedImm32(10), resultReg);
547 or32(regUnicodeInputAndTrail, resultReg);
548 add32(supplementaryPlanesBase, resultReg);
549 notUnicode.link(masm: this);
550 }
551
552 void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
553 {
554 ASSERT(m_charSize == Char16);
555
556 getEffectiveAddress(address, dest: regUnicodeInputAndTrail);
557
558 if (resultReg == regT0)
559 m_tryReadUnicodeCharacterCalls.append(nearCall());
560 else
561 tryReadUnicodeCharImpl(resultReg);
562 }
563#endif
564
565 void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
566 {
567 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, tempReg: resultReg, indexReg);
568
569 if (m_charSize == Char8)
570 load8(address, resultReg);
571 else
572 load16Unaligned(address, resultReg);
573 }
574
575 void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
576 {
577 BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, tempReg: resultReg, indexReg);
578
579 if (m_charSize == Char8)
580 load8(address, resultReg);
581#ifdef JIT_UNICODE_EXPRESSIONS
582 else if (m_decodeSurrogatePairs)
583 tryReadUnicodeChar(address, resultReg);
584#endif
585 else
586 load16Unaligned(address, resultReg);
587 }
588
589 Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
590 {
591 readCharacter(negativeCharacterOffset, resultReg: character);
592
593 // For case-insesitive compares, non-ascii characters that have different
594 // upper & lower case representations are converted to a character class.
595 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
596 if (m_pattern.ignoreCase() && isASCIIAlpha(c: ch)) {
597 or32(TrustedImm32(0x20), character);
598 ch |= 0x20;
599 }
600
601 return branch32(NotEqual, character, Imm32(ch));
602 }
603
604 void storeToFrame(RegisterID reg, unsigned frameLocation)
605 {
606 poke(reg, frameLocation);
607 }
608
609 void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
610 {
611 poke(imm, frameLocation);
612 }
613
614#if CPU(ARM64) || CPU(X86_64)
615 void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
616 {
617 poke(imm, frameLocation);
618 }
619#endif
620
621 DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
622 {
623 return storePtrWithPatch(initialValue: TrustedImmPtr(nullptr), address: Address(stackPointerRegister, frameLocation * sizeof(void*)));
624 }
625
626 void loadFromFrame(unsigned frameLocation, RegisterID reg)
627 {
628 peek(dest: reg, index: frameLocation);
629 }
630
631 void loadFromFrameAndJump(unsigned frameLocation)
632 {
633 jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
634 }
635
636 unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
637 {
638 if (!callFrameSize)
639 return 0;
640
641 callFrameSize *= sizeof(void*);
642 if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
643 CRASH();
644 callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
645 return callFrameSize;
646 }
647 void initCallFrame()
648 {
649 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(callFrameSize: m_pattern.m_body->m_callFrameSize);
650 if (callFrameSizeInBytes) {
651#if CPU(X86_64) || CPU(ARM64)
652 if (Options::zeroStackFrame()) {
653 // We need to start from the stack pointer, because we could have spilled callee saves
654 move(stackPointerRegister, regT0);
655 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
656 if (callFrameSizeInBytes <= 128) {
657 for (unsigned offset = 0; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
658 storePtr(TrustedImmPtr(0), Address(regT0, -8 - int(offset)));
659 } else {
660 Label zeroLoop = label();
661 subPtr(TrustedImm32(sizeof(intptr_t) * 2), regT0);
662#if CPU(ARM64)
663 storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
664#else
665 storePtr(TrustedImmPtr(0), Address(regT0));
666 storePtr(TrustedImmPtr(0), Address(regT0, sizeof(intptr_t)));
667#endif
668 branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
669 }
670 } else
671#endif
672 subPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
673
674 }
675 }
676 void removeCallFrame()
677 {
678 unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(callFrameSize: m_pattern.m_body->m_callFrameSize);
679 if (callFrameSizeInBytes)
680 addPtr(Imm32(callFrameSizeInBytes), stackPointerRegister);
681 }
682
683 void generateFailReturn()
684 {
685 move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
686 move(TrustedImm32(0), returnRegister2);
687 generateReturn();
688 }
689
690 void generateJITFailReturn()
691 {
692 if (m_abortExecution.empty() && m_hitMatchLimit.empty())
693 return;
694
695 JumpList finishExiting;
696 if (!m_abortExecution.empty()) {
697 m_abortExecution.link(masm: this);
698 move(TrustedImmPtr((void*)static_cast<size_t>(-2)), returnRegister);
699 finishExiting.append(jump());
700 }
701
702 if (!m_hitMatchLimit.empty()) {
703 m_hitMatchLimit.link(masm: this);
704 move(TrustedImmPtr((void*)static_cast<size_t>(-1)), returnRegister);
705 }
706
707 finishExiting.link(masm: this);
708 removeCallFrame();
709 move(TrustedImm32(0), returnRegister2);
710 generateReturn();
711 }
712
713 // Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
714 void setSubpatternStart(RegisterID reg, unsigned subpattern)
715 {
716 ASSERT(subpattern);
717 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
718 store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
719 }
720 void setSubpatternEnd(RegisterID reg, unsigned subpattern)
721 {
722 ASSERT(subpattern);
723 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
724 store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
725 }
726 void clearSubpatternStart(unsigned subpattern)
727 {
728 ASSERT(subpattern);
729 // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
730 store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
731 }
732
733 void clearMatches(unsigned subpattern, unsigned lastSubpattern)
734 {
735 for (; subpattern <= lastSubpattern; subpattern++)
736 clearSubpatternStart(subpattern);
737 }
738
739 // We use one of three different strategies to track the start of the current match,
740 // while matching.
741 // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
742 // at the end of matching. This is irrespective of compileMode, and in this case
743 // these methods should never be called.
744 // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
745 // vector, store the match start in the output vector.
746 // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
747 // in this register.
748 void setMatchStart(RegisterID reg)
749 {
750 ASSERT(!m_pattern.m_body->m_hasFixedSize);
751 if (compileMode == IncludeSubpatterns)
752 store32(reg, output);
753 else
754 move(reg, output);
755 }
756 void getMatchStart(RegisterID reg)
757 {
758 ASSERT(!m_pattern.m_body->m_hasFixedSize);
759 if (compileMode == IncludeSubpatterns)
760 load32(output, reg);
761 else
762 move(output, reg);
763 }
764
765 enum YarrOpCode {
766 // These nodes wrap body alternatives - those in the main disjunction,
767 // rather than subpatterns or assertions. These are chained together in
768 // a doubly linked list, with a 'begin' node for the first alternative,
769 // a 'next' node for each subsequent alternative, and an 'end' node at
770 // the end. In the case of repeating alternatives, the 'end' node also
771 // has a reference back to 'begin'.
772 OpBodyAlternativeBegin,
773 OpBodyAlternativeNext,
774 OpBodyAlternativeEnd,
775 // Similar to the body alternatives, but used for subpatterns with two
776 // or more alternatives.
777 OpNestedAlternativeBegin,
778 OpNestedAlternativeNext,
779 OpNestedAlternativeEnd,
780 // Used for alternatives in subpatterns where there is only a single
781 // alternative (backtracking is easier in these cases), or for alternatives
782 // which never need to be backtracked (those in parenthetical assertions,
783 // terminal subpatterns).
784 OpSimpleNestedAlternativeBegin,
785 OpSimpleNestedAlternativeNext,
786 OpSimpleNestedAlternativeEnd,
787 // Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
788 OpParenthesesSubpatternOnceBegin,
789 OpParenthesesSubpatternOnceEnd,
790 // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
791 OpParenthesesSubpatternTerminalBegin,
792 OpParenthesesSubpatternTerminalEnd,
793 // Used to wrap generic captured matches
794 OpParenthesesSubpatternBegin,
795 OpParenthesesSubpatternEnd,
796 // Used to wrap parenthetical assertions.
797 OpParentheticalAssertionBegin,
798 OpParentheticalAssertionEnd,
799 // Wraps all simple terms (pattern characters, character classes).
800 OpTerm,
801 // Where an expression contains only 'once through' body alternatives
802 // and no repeating ones, this op is used to return match failure.
803 OpMatchFailed
804 };
805
806 // This structure is used to hold the compiled opcode information,
807 // including reference back to the original PatternTerm/PatternAlternatives,
808 // and JIT compilation data structures.
809 struct YarrOp {
810 explicit YarrOp(PatternTerm* term)
811 : m_op(OpTerm)
812 , m_term(term)
813 , m_isDeadCode(false)
814 {
815 }
816
817 explicit YarrOp(YarrOpCode op)
818 : m_op(op)
819 , m_isDeadCode(false)
820 {
821 }
822
823 // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
824 YarrOpCode m_op;
825 PatternTerm* m_term = nullptr;
826
827 // For alternatives, this holds the PatternAlternative and doubly linked
828 // references to this alternative's siblings. In the case of the
829 // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
830 // m_nextOp will reference the OpBodyAlternativeBegin node of the first
831 // repeating alternative.
832 PatternAlternative* m_alternative = nullptr;
833 size_t m_previousOp = 0;
834 size_t m_nextOp = 0;
835
836 // Used to record a set of Jumps out of the generated code, typically
837 // used for jumps out to backtracking code, and a single reentry back
838 // into the code for a node (likely where a backtrack will trigger
839 // rematching).
840 Label m_reentry;
841 JumpList m_jumps;
842
843 // Used for backtracking when the prior alternative did not consume any
844 // characters but matched.
845 Jump m_zeroLengthMatch;
846
847 // This flag is used to null out the second pattern character, when
848 // two are fused to match a pair together.
849 bool m_isDeadCode;
850
851 // Currently used in the case of some of the more complex management of
852 // 'm_checkedOffset', to cache the offset used in this alternative, to avoid
853 // recalculating it.
854 Checked<unsigned> m_checkAdjust;
855
856 // Used by OpNestedAlternativeNext/End to hold the pointer to the
857 // value that will be pushed into the pattern's frame to return to,
858 // upon backtracking back into the disjunction.
859 DataLabelPtr m_returnAddress;
860 };
861
862 // BacktrackingState
863 // This class encapsulates information about the state of code generation
864 // whilst generating the code for backtracking, when a term fails to match.
865 // Upon entry to code generation of the backtracking code for a given node,
866 // the Backtracking state will hold references to all control flow sources
867 // that are outputs in need of further backtracking from the prior node
868 // generated (which is the subsequent operation in the regular expression,
869 // and in the m_ops Vector, since we generated backtracking backwards).
870 // These references to control flow take the form of:
871 // - A jump list of jumps, to be linked to code that will backtrack them
872 // further.
873 // - A set of DataLabelPtr values, to be populated with values to be
874 // treated effectively as return addresses backtracking into complex
875 // subpatterns.
876 // - A flag indicating that the current sequence of generated code up to
877 // this point requires backtracking.
878 class BacktrackingState {
879 public:
880 BacktrackingState()
881 : m_pendingFallthrough(false)
882 {
883 }
884
885 // Add a jump or jumps, a return address, or set the flag indicating
886 // that the current 'fallthrough' control flow requires backtracking.
887 void append(const Jump& jump)
888 {
889 m_laterFailures.append(jump);
890 }
891 void append(JumpList& jumpList)
892 {
893 m_laterFailures.append(other: jumpList);
894 }
895 void append(const DataLabelPtr& returnAddress)
896 {
897 m_pendingReturns.append(value: returnAddress);
898 }
899 void fallthrough()
900 {
901 ASSERT(!m_pendingFallthrough);
902 m_pendingFallthrough = true;
903 }
904
905 // These methods clear the backtracking state, either linking to the
906 // current location, a provided label, or copying the backtracking out
907 // to a JumpList. All actions may require code generation to take place,
908 // and as such are passed a pointer to the assembler.
909 void link(MacroAssembler* assembler)
910 {
911 if (m_pendingReturns.size()) {
912 Label here(assembler);
913 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
914 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
915 m_pendingReturns.clear();
916 }
917 m_laterFailures.link(masm: assembler);
918 m_laterFailures.clear();
919 m_pendingFallthrough = false;
920 }
921 void linkTo(Label label, MacroAssembler* assembler)
922 {
923 if (m_pendingReturns.size()) {
924 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
925 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
926 m_pendingReturns.clear();
927 }
928 if (m_pendingFallthrough)
929 assembler->jump(target: label);
930 m_laterFailures.linkTo(label, masm: assembler);
931 m_laterFailures.clear();
932 m_pendingFallthrough = false;
933 }
934 void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
935 {
936 if (m_pendingReturns.size()) {
937 Label here(assembler);
938 for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
939 m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
940 m_pendingReturns.clear();
941 m_pendingFallthrough = true;
942 }
943 if (m_pendingFallthrough)
944 jumpList.append(jump: assembler->jump());
945 jumpList.append(other: m_laterFailures);
946 m_laterFailures.clear();
947 m_pendingFallthrough = false;
948 }
949
950 bool isEmpty()
951 {
952 return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
953 }
954
955 // Called at the end of code generation to link all return addresses.
956 void linkDataLabels(DefaultLinkBuffer& linkBuffer)
957 {
958 ASSERT(isEmpty());
959 for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
960 linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
961 }
962
963 private:
964 struct ReturnAddressRecord {
965 ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
966 : m_dataLabel(dataLabel)
967 , m_backtrackLocation(backtrackLocation)
968 {
969 }
970
971 DataLabelPtr m_dataLabel;
972 Label m_backtrackLocation;
973 };
974
975 JumpList m_laterFailures;
976 bool m_pendingFallthrough;
977 Vector<DataLabelPtr, 4> m_pendingReturns;
978 Vector<ReturnAddressRecord, 4> m_backtrackRecords;
979 };
980
981 // Generation methods:
982 // ===================
983
984 // This method provides a default implementation of backtracking common
985 // to many terms; terms commonly jump out of the forwards matching path
986 // on any failed conditions, and add these jumps to the m_jumps list. If
987 // no special handling is required we can often just backtrack to m_jumps.
988 void backtrackTermDefault(size_t opIndex)
989 {
990 YarrOp& op = m_ops[opIndex];
991 m_backtrackingState.append(op.m_jumps);
992 }
993
994 void generateAssertionBOL(size_t opIndex)
995 {
996 YarrOp& op = m_ops[opIndex];
997 PatternTerm* term = op.m_term;
998
999 if (m_pattern.multiline()) {
1000 const RegisterID character = regT0;
1001
1002 JumpList matchDest;
1003 if (!term->inputPosition)
1004 matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1005
1006 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition + 1, resultReg: character);
1007 matchCharacterClass(character, matchDest, charClass: m_pattern.newlineCharacterClass());
1008 op.m_jumps.append(jump());
1009
1010 matchDest.link(masm: this);
1011 } else {
1012 // Erk, really should poison out these alternatives early. :-/
1013 if (term->inputPosition)
1014 op.m_jumps.append(jump());
1015 else
1016 op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1017 }
1018 }
1019 void backtrackAssertionBOL(size_t opIndex)
1020 {
1021 backtrackTermDefault(opIndex);
1022 }
1023
1024 void generateAssertionEOL(size_t opIndex)
1025 {
1026 YarrOp& op = m_ops[opIndex];
1027 PatternTerm* term = op.m_term;
1028
1029 if (m_pattern.multiline()) {
1030 const RegisterID character = regT0;
1031
1032 JumpList matchDest;
1033 if (term->inputPosition == m_checkedOffset.unsafeGet())
1034 matchDest.append(atEndOfInput());
1035
1036 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1037 matchCharacterClass(character, matchDest, charClass: m_pattern.newlineCharacterClass());
1038 op.m_jumps.append(jump());
1039
1040 matchDest.link(masm: this);
1041 } else {
1042 if (term->inputPosition == m_checkedOffset.unsafeGet())
1043 op.m_jumps.append(notAtEndOfInput());
1044 // Erk, really should poison out these alternatives early. :-/
1045 else
1046 op.m_jumps.append(jump());
1047 }
1048 }
1049 void backtrackAssertionEOL(size_t opIndex)
1050 {
1051 backtrackTermDefault(opIndex);
1052 }
1053
1054 // Also falls though on nextIsNotWordChar.
1055 void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1056 {
1057 YarrOp& op = m_ops[opIndex];
1058 PatternTerm* term = op.m_term;
1059
1060 const RegisterID character = regT0;
1061
1062 if (term->inputPosition == m_checkedOffset.unsafeGet())
1063 nextIsNotWordChar.append(atEndOfInput());
1064
1065 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1066
1067 CharacterClass* wordcharCharacterClass;
1068
1069 if (m_unicodeIgnoreCase)
1070 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1071 else
1072 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1073
1074 matchCharacterClass(character, matchDest&: nextIsWordChar, charClass: wordcharCharacterClass);
1075 }
1076
1077 void generateAssertionWordBoundary(size_t opIndex)
1078 {
1079 YarrOp& op = m_ops[opIndex];
1080 PatternTerm* term = op.m_term;
1081
1082 const RegisterID character = regT0;
1083
1084 Jump atBegin;
1085 JumpList matchDest;
1086 if (!term->inputPosition)
1087 atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1088 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition + 1, resultReg: character);
1089
1090 CharacterClass* wordcharCharacterClass;
1091
1092 if (m_unicodeIgnoreCase)
1093 wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1094 else
1095 wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1096
1097 matchCharacterClass(character, matchDest, charClass: wordcharCharacterClass);
1098 if (!term->inputPosition)
1099 atBegin.link(masm: this);
1100
1101 // We fall through to here if the last character was not a wordchar.
1102 JumpList nonWordCharThenWordChar;
1103 JumpList nonWordCharThenNonWordChar;
1104 if (term->invert()) {
1105 matchAssertionWordchar(opIndex, nextIsWordChar&: nonWordCharThenNonWordChar, nextIsNotWordChar&: nonWordCharThenWordChar);
1106 nonWordCharThenWordChar.append(jump());
1107 } else {
1108 matchAssertionWordchar(opIndex, nextIsWordChar&: nonWordCharThenWordChar, nextIsNotWordChar&: nonWordCharThenNonWordChar);
1109 nonWordCharThenNonWordChar.append(jump());
1110 }
1111 op.m_jumps.append(nonWordCharThenNonWordChar);
1112
1113 // We jump here if the last character was a wordchar.
1114 matchDest.link(masm: this);
1115 JumpList wordCharThenWordChar;
1116 JumpList wordCharThenNonWordChar;
1117 if (term->invert()) {
1118 matchAssertionWordchar(opIndex, nextIsWordChar&: wordCharThenNonWordChar, nextIsNotWordChar&: wordCharThenWordChar);
1119 wordCharThenWordChar.append(jump());
1120 } else {
1121 matchAssertionWordchar(opIndex, nextIsWordChar&: wordCharThenWordChar, nextIsNotWordChar&: wordCharThenNonWordChar);
1122 // This can fall-though!
1123 }
1124
1125 op.m_jumps.append(wordCharThenWordChar);
1126
1127 nonWordCharThenWordChar.link(masm: this);
1128 wordCharThenNonWordChar.link(masm: this);
1129 }
1130 void backtrackAssertionWordBoundary(size_t opIndex)
1131 {
1132 backtrackTermDefault(opIndex);
1133 }
1134
1135#if ENABLE(YARR_JIT_BACKREFERENCES)
1136 void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1137 {
1138 YarrOp& op = m_ops[opIndex];
1139 PatternTerm* term = op.m_term;
1140 unsigned subpatternId = term->backReferenceSubpatternId;
1141
1142 Label loop(this);
1143
1144 readCharacterDontDecodeSurrogates(negativeCharacterOffset: 0, resultReg: patternCharacter, indexReg: patternIndex);
1145 readCharacterDontDecodeSurrogates(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1146
1147 if (!m_pattern.ignoreCase())
1148 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1149 else {
1150 Jump charactersMatch = branch32(Equal, character, patternCharacter);
1151 ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1152 load16(characterTableEntry, character);
1153 ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1154 load16(patternTableEntry, patternCharacter);
1155 characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1156 charactersMatch.link(masm: this);
1157 }
1158
1159
1160 add32(TrustedImm32(1), index);
1161 add32(TrustedImm32(1), patternIndex);
1162
1163 branch32(NotEqual, patternIndex, Address(output, ((subpatternId << 1) + 1) * sizeof(int))).linkTo(loop, this);
1164 }
1165
1166 void generateBackReference(size_t opIndex)
1167 {
1168 YarrOp& op = m_ops[opIndex];
1169 PatternTerm* term = op.m_term;
1170
1171 if (m_pattern.ignoreCase() && m_charSize != Char8) {
1172 m_failureReason = JITFailureReason::BackReference;
1173 return;
1174 }
1175
1176 unsigned subpatternId = term->backReferenceSubpatternId;
1177 unsigned parenthesesFrameLocation = term->frameLocation;
1178
1179 const RegisterID characterOrTemp = regT0;
1180 const RegisterID patternIndex = regT1;
1181 const RegisterID patternTemp = regT2;
1182
1183 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1184 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1)
1185 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1186
1187 JumpList matches;
1188
1189 if (term->quantityType != QuantifierNonGreedy) {
1190 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1191 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1192
1193 // An empty match is successful without consuming characters
1194 if (term->quantityType != QuantifierFixedCount || term->quantityMaxCount != 1) {
1195 matches.append(branch32(Equal, TrustedImm32(-1), patternIndex));
1196 matches.append(branch32(Equal, patternIndex, patternTemp));
1197 } else {
1198 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1199 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1200 zeroLengthMatch.link(masm: this);
1201 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1202 matches.append(jump());
1203 tryNonZeroMatch.link(masm: this);
1204 }
1205 }
1206
1207 switch (term->quantityType) {
1208 case QuantifierFixedCount: {
1209 Label outerLoop(this);
1210
1211 // PatternTemp should contain pattern end index at this point
1212 sub32(patternIndex, patternTemp);
1213 if (m_checkedOffset - term->inputPosition)
1214 sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
1215 op.m_jumps.append(checkNotEnoughInput(additionalAmount: patternTemp));
1216
1217 matchBackreference(opIndex, characterMatchFails&: op.m_jumps, character: characterOrTemp, patternIndex, patternCharacter: patternTemp);
1218
1219 if (term->quantityMaxCount != 1) {
1220 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: characterOrTemp);
1221 add32(TrustedImm32(1), characterOrTemp);
1222 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1223 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1224 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1225 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1226 jump(outerLoop);
1227 }
1228 matches.link(masm: this);
1229 break;
1230 }
1231
1232 case QuantifierGreedy: {
1233 JumpList incompleteMatches;
1234
1235 Label outerLoop(this);
1236
1237 // PatternTemp should contain pattern end index at this point
1238 sub32(patternIndex, patternTemp);
1239 if (m_checkedOffset - term->inputPosition)
1240 sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
1241 matches.append(checkNotEnoughInput(additionalAmount: patternTemp));
1242
1243 matchBackreference(opIndex, characterMatchFails&: incompleteMatches, character: characterOrTemp, patternIndex, patternCharacter: patternTemp);
1244
1245 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: characterOrTemp);
1246 add32(TrustedImm32(1), characterOrTemp);
1247 storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1248 if (term->quantityMaxCount != quantifyInfinite)
1249 matches.append(branch32(Equal, Imm32(term->quantityMaxCount.unsafeGet()), characterOrTemp));
1250 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1251 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1252
1253 // Store current index in frame for restoring after a partial match
1254 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1255 jump(outerLoop);
1256
1257 incompleteMatches.link(masm: this);
1258 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), reg: index);
1259
1260 matches.link(masm: this);
1261 op.m_reentry = label();
1262 break;
1263 }
1264
1265 case QuantifierNonGreedy: {
1266 JumpList incompleteMatches;
1267
1268 matches.append(jump());
1269
1270 op.m_reentry = label();
1271
1272 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternIndex);
1273 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternTemp);
1274
1275 // An empty match is successful without consuming characters
1276 Jump zeroLengthMatch = branch32(Equal, TrustedImm32(-1), patternIndex);
1277 Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1278 zeroLengthMatch.link(masm: this);
1279 storeToFrame(TrustedImm32(1), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1280 matches.append(jump());
1281 tryNonZeroMatch.link(masm: this);
1282
1283 // Check if we have input remaining to match
1284 sub32(patternIndex, patternTemp);
1285 if (m_checkedOffset - term->inputPosition)
1286 sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
1287 matches.append(checkNotEnoughInput(additionalAmount: patternTemp));
1288
1289 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1290
1291 matchBackreference(opIndex, characterMatchFails&: incompleteMatches, character: characterOrTemp, patternIndex, patternCharacter: patternTemp);
1292
1293 matches.append(jump());
1294
1295 incompleteMatches.link(masm: this);
1296 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), reg: index);
1297
1298 matches.link(masm: this);
1299 break;
1300 }
1301 }
1302 }
1303 void backtrackBackReference(size_t opIndex)
1304 {
1305 YarrOp& op = m_ops[opIndex];
1306 PatternTerm* term = op.m_term;
1307
1308 unsigned subpatternId = term->backReferenceSubpatternId;
1309
1310 m_backtrackingState.link(this);
1311 op.m_jumps.link(this);
1312
1313 JumpList failures;
1314
1315 unsigned parenthesesFrameLocation = term->frameLocation;
1316 switch (term->quantityType) {
1317 case QuantifierFixedCount:
1318 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), reg: index);
1319 break;
1320
1321 case QuantifierGreedy: {
1322 const RegisterID matchAmount = regT0;
1323 const RegisterID patternStartIndex = regT1;
1324 const RegisterID patternEndIndexOrLen = regT2;
1325
1326 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: matchAmount);
1327 failures.append(branchTest32(Zero, matchAmount));
1328
1329 load32(Address(output, (subpatternId << 1) * sizeof(int)), patternStartIndex);
1330 load32(Address(output, ((subpatternId << 1) + 1) * sizeof(int)), patternEndIndexOrLen);
1331 sub32(patternStartIndex, patternEndIndexOrLen);
1332 sub32(patternEndIndexOrLen, index);
1333
1334 sub32(TrustedImm32(1), matchAmount);
1335 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1336 jump(op.m_reentry);
1337 break;
1338 }
1339
1340 case QuantifierNonGreedy: {
1341 const RegisterID matchAmount = regT0;
1342
1343 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: matchAmount);
1344 if (term->quantityMaxCount != quantifyInfinite)
1345 failures.append(branch32(AboveOrEqual, Imm32(term->quantityMaxCount.unsafeGet()), matchAmount));
1346 add32(TrustedImm32(1), matchAmount);
1347 storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1348 jump(op.m_reentry);
1349 break;
1350 }
1351 }
1352 failures.link(masm: this);
1353 m_backtrackingState.fallthrough();
1354 }
1355#endif
1356
1357 void generatePatternCharacterOnce(size_t opIndex)
1358 {
1359 YarrOp& op = m_ops[opIndex];
1360
1361 if (op.m_isDeadCode)
1362 return;
1363
1364 // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1365 // node, so there must always be at least one more node.
1366 ASSERT(opIndex + 1 < m_ops.size());
1367 YarrOp* nextOp = &m_ops[opIndex + 1];
1368
1369 PatternTerm* term = op.m_term;
1370 UChar32 ch = term->patternCharacter;
1371
1372 if ((ch > 0xff) && (m_charSize == Char8)) {
1373 // Have a 16 bit pattern character and an 8 bit string - short circuit
1374 op.m_jumps.append(jump());
1375 return;
1376 }
1377
1378 const RegisterID character = regT0;
1379#if CPU(X86_64) || CPU(ARM64)
1380 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 8 : 4;
1381#else
1382 unsigned maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
1383#endif
1384 uint64_t ignoreCaseMask = 0;
1385#if CPU(BIG_ENDIAN)
1386 uint64_t allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
1387#else
1388 uint64_t allCharacters = ch;
1389#endif
1390 unsigned numberCharacters;
1391 unsigned startTermPosition = term->inputPosition;
1392
1393 // For case-insesitive compares, non-ascii characters that have different
1394 // upper & lower case representations are converted to a character class.
1395 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1396
1397 if (m_pattern.ignoreCase() && isASCIIAlpha(c: ch)) {
1398#if CPU(BIG_ENDIAN)
1399 ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
1400#else
1401 ignoreCaseMask |= 32;
1402#endif
1403 }
1404
1405 for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1406 PatternTerm* nextTerm = nextOp->m_term;
1407
1408 // YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1409 // Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1410 if (nextTerm->type != PatternTerm::TypePatternCharacter
1411 || nextTerm->quantityType != QuantifierFixedCount
1412 || nextTerm->quantityMaxCount != 1
1413 || nextTerm->inputPosition != (startTermPosition + numberCharacters)
1414 || (U16_LENGTH(nextTerm->patternCharacter) != 1 && m_decodeSurrogatePairs))
1415 break;
1416
1417 nextOp->m_isDeadCode = true;
1418
1419#if CPU(BIG_ENDIAN)
1420 int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
1421#else
1422 int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
1423#endif
1424
1425 UChar32 currentCharacter = nextTerm->patternCharacter;
1426
1427 if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
1428 // Have a 16 bit pattern character and an 8 bit string - short circuit
1429 op.m_jumps.append(jump());
1430 return;
1431 }
1432
1433 // For case-insesitive compares, non-ascii characters that have different
1434 // upper & lower case representations are converted to a character class.
1435 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter, m_canonicalMode));
1436
1437 allCharacters |= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1438
1439 if ((m_pattern.ignoreCase()) && (isASCIIAlpha(c: currentCharacter)))
1440 ignoreCaseMask |= 32ULL << shiftAmount;
1441 }
1442
1443 if (m_decodeSurrogatePairs)
1444 op.m_jumps.append(jumpIfNoAvailableInput());
1445
1446 if (m_charSize == Char8) {
1447 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1448 op.m_jumps.append(jumpIfCharNotEquals(ch: characters, negativeCharacterOffset: offset, character));
1449 };
1450
1451 auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1452 load16Unaligned(negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), character);
1453 if (mask)
1454 or32(Imm32(mask), character);
1455 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1456 };
1457
1458 auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1459 if (mask) {
1460 load32WithUnalignedHalfWords(address: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), dest: character);
1461 if (mask)
1462 or32(Imm32(mask), character);
1463 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1464 return;
1465 }
1466 op.m_jumps.append(branch32WithUnalignedHalfWords(cond: NotEqual, left: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), right: TrustedImm32(characters)));
1467 };
1468
1469#if CPU(X86_64) || CPU(ARM64)
1470 auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1471 load64(negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), character);
1472 if (mask)
1473 or64(TrustedImm64(mask), character);
1474 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1475 };
1476#endif
1477
1478 switch (numberCharacters) {
1479 case 1:
1480 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1481 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1482 return;
1483 case 2: {
1484 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1485 return;
1486 }
1487 case 3: {
1488 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
1489 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
1490 return;
1491 }
1492 case 4: {
1493 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1494 return;
1495 }
1496#if CPU(X86_64) || CPU(ARM64)
1497 case 5: {
1498 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1499 check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
1500 return;
1501 }
1502 case 6: {
1503 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1504 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1505 return;
1506 }
1507 case 7: {
1508 check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1509 check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
1510 check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
1511 return;
1512 }
1513 case 8: {
1514 check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1515 return;
1516 }
1517#endif
1518 }
1519 } else {
1520 auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1521 op.m_jumps.append(jumpIfCharNotEquals(ch: characters, negativeCharacterOffset: offset, character));
1522 };
1523
1524 auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1525 if (mask) {
1526 load32WithUnalignedHalfWords(address: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), dest: character);
1527 if (mask)
1528 or32(Imm32(mask), character);
1529 op.m_jumps.append(branch32(NotEqual, character, Imm32(characters | mask)));
1530 return;
1531 }
1532 op.m_jumps.append(branch32WithUnalignedHalfWords(cond: NotEqual, left: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), right: TrustedImm32(characters)));
1533 };
1534
1535#if CPU(X86_64) || CPU(ARM64)
1536 auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1537 load64(negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), character);
1538 if (mask)
1539 or64(TrustedImm64(mask), character);
1540 op.m_jumps.append(branch64(NotEqual, character, TrustedImm64(characters | mask)));
1541 };
1542#endif
1543
1544 switch (numberCharacters) {
1545 case 1:
1546 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1547 check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
1548 return;
1549 case 2:
1550 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1551 return;
1552#if CPU(X86_64) || CPU(ARM64)
1553 case 3:
1554 check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
1555 check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
1556 return;
1557 case 4:
1558 check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1559 return;
1560#endif
1561 }
1562 }
1563 }
1564 void backtrackPatternCharacterOnce(size_t opIndex)
1565 {
1566 backtrackTermDefault(opIndex);
1567 }
1568
1569 void generatePatternCharacterFixed(size_t opIndex)
1570 {
1571 YarrOp& op = m_ops[opIndex];
1572 PatternTerm* term = op.m_term;
1573 UChar32 ch = term->patternCharacter;
1574
1575 const RegisterID character = regT0;
1576 const RegisterID countRegister = regT1;
1577
1578 if (m_decodeSurrogatePairs)
1579 op.m_jumps.append(jumpIfNoAvailableInput());
1580
1581 move(index, countRegister);
1582 Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1583 scaledMaxCount *= U_IS_BMP(ch) ? 1 : 2;
1584 sub32(Imm32(scaledMaxCount.unsafeGet()), countRegister);
1585
1586 Label loop(this);
1587 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition - scaledMaxCount, resultReg: character, indexReg: countRegister);
1588 // For case-insesitive compares, non-ascii characters that have different
1589 // upper & lower case representations are converted to a character class.
1590 ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
1591 if (m_pattern.ignoreCase() && isASCIIAlpha(c: ch)) {
1592 or32(TrustedImm32(0x20), character);
1593 ch |= 0x20;
1594 }
1595
1596 op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
1597#ifdef JIT_UNICODE_EXPRESSIONS
1598 if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1599 add32(TrustedImm32(2), countRegister);
1600 else
1601#endif
1602 add32(TrustedImm32(1), countRegister);
1603 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1604 }
1605 void backtrackPatternCharacterFixed(size_t opIndex)
1606 {
1607 backtrackTermDefault(opIndex);
1608 }
1609
1610 void generatePatternCharacterGreedy(size_t opIndex)
1611 {
1612 YarrOp& op = m_ops[opIndex];
1613 PatternTerm* term = op.m_term;
1614 UChar32 ch = term->patternCharacter;
1615
1616 const RegisterID character = regT0;
1617 const RegisterID countRegister = regT1;
1618
1619 move(TrustedImm32(0), countRegister);
1620
1621 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1622 if (!((ch > 0xff) && (m_charSize == Char8))) {
1623 JumpList failures;
1624 Label loop(this);
1625 failures.append(atEndOfInput());
1626 failures.append(jumpIfCharNotEquals(ch, negativeCharacterOffset: m_checkedOffset - term->inputPosition, character));
1627
1628 add32(TrustedImm32(1), index);
1629#ifdef JIT_UNICODE_EXPRESSIONS
1630 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1631 Jump surrogatePairOk = notAtEndOfInput();
1632 sub32(TrustedImm32(1), index);
1633 failures.append(jump());
1634 surrogatePairOk.link(masm: this);
1635 add32(TrustedImm32(1), index);
1636 }
1637#endif
1638 add32(TrustedImm32(1), countRegister);
1639
1640 if (term->quantityMaxCount == quantifyInfinite)
1641 jump(loop);
1642 else
1643 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1644
1645 failures.link(masm: this);
1646 }
1647 op.m_reentry = label();
1648
1649 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1650 }
1651 void backtrackPatternCharacterGreedy(size_t opIndex)
1652 {
1653 YarrOp& op = m_ops[opIndex];
1654 PatternTerm* term = op.m_term;
1655
1656 const RegisterID countRegister = regT1;
1657
1658 m_backtrackingState.link(this);
1659
1660 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), reg: countRegister);
1661 m_backtrackingState.append(branchTest32(Zero, countRegister));
1662 sub32(TrustedImm32(1), countRegister);
1663 if (!m_decodeSurrogatePairs || U_IS_BMP(term->patternCharacter))
1664 sub32(TrustedImm32(1), index);
1665 else
1666 sub32(TrustedImm32(2), index);
1667 jump(op.m_reentry);
1668 }
1669
1670 void generatePatternCharacterNonGreedy(size_t opIndex)
1671 {
1672 YarrOp& op = m_ops[opIndex];
1673 PatternTerm* term = op.m_term;
1674
1675 const RegisterID countRegister = regT1;
1676
1677 move(TrustedImm32(0), countRegister);
1678 op.m_reentry = label();
1679 storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1680 }
1681 void backtrackPatternCharacterNonGreedy(size_t opIndex)
1682 {
1683 YarrOp& op = m_ops[opIndex];
1684 PatternTerm* term = op.m_term;
1685 UChar32 ch = term->patternCharacter;
1686
1687 const RegisterID character = regT0;
1688 const RegisterID countRegister = regT1;
1689
1690 m_backtrackingState.link(this);
1691
1692 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), reg: countRegister);
1693
1694 // Unless have a 16 bit pattern character and an 8 bit string - short circuit
1695 if (!((ch > 0xff) && (m_charSize == Char8))) {
1696 JumpList nonGreedyFailures;
1697 nonGreedyFailures.append(atEndOfInput());
1698 if (term->quantityMaxCount != quantifyInfinite)
1699 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1700 nonGreedyFailures.append(jumpIfCharNotEquals(ch, negativeCharacterOffset: m_checkedOffset - term->inputPosition, character));
1701
1702 add32(TrustedImm32(1), index);
1703#ifdef JIT_UNICODE_EXPRESSIONS
1704 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1705 Jump surrogatePairOk = notAtEndOfInput();
1706 sub32(TrustedImm32(1), index);
1707 nonGreedyFailures.append(jump());
1708 surrogatePairOk.link(masm: this);
1709 add32(TrustedImm32(1), index);
1710 }
1711#endif
1712 add32(TrustedImm32(1), countRegister);
1713
1714 jump(op.m_reentry);
1715 nonGreedyFailures.link(masm: this);
1716 }
1717
1718 if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1719 // subtract countRegister*2 for non-BMP characters
1720 lshift32(TrustedImm32(1), countRegister);
1721 }
1722
1723 sub32(countRegister, index);
1724 m_backtrackingState.fallthrough();
1725 }
1726
1727 void generateCharacterClassOnce(size_t opIndex)
1728 {
1729 YarrOp& op = m_ops[opIndex];
1730 PatternTerm* term = op.m_term;
1731
1732 const RegisterID character = regT0;
1733
1734 if (m_decodeSurrogatePairs) {
1735 op.m_jumps.append(jumpIfNoAvailableInput());
1736 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1737 }
1738
1739 JumpList matchDest;
1740 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1741 // If we are matching the "any character" builtin class we only need to read the
1742 // character and don't need to match as it will always succeed.
1743 if (term->invert() || !term->characterClass->m_anyCharacter) {
1744 matchCharacterClass(character, matchDest, charClass: term->characterClass);
1745
1746 if (term->invert())
1747 op.m_jumps.append(matchDest);
1748 else {
1749 op.m_jumps.append(jump());
1750 matchDest.link(masm: this);
1751 }
1752 }
1753#ifdef JIT_UNICODE_EXPRESSIONS
1754 if (m_decodeSurrogatePairs) {
1755 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1756 add32(TrustedImm32(1), index);
1757 isBMPChar.link(masm: this);
1758 }
1759#endif
1760 }
1761 void backtrackCharacterClassOnce(size_t opIndex)
1762 {
1763#ifdef JIT_UNICODE_EXPRESSIONS
1764 if (m_decodeSurrogatePairs) {
1765 YarrOp& op = m_ops[opIndex];
1766 PatternTerm* term = op.m_term;
1767
1768 m_backtrackingState.link(this);
1769 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), reg: index);
1770 m_backtrackingState.fallthrough();
1771 }
1772#endif
1773 backtrackTermDefault(opIndex);
1774 }
1775
1776 void generateCharacterClassFixed(size_t opIndex)
1777 {
1778 YarrOp& op = m_ops[opIndex];
1779 PatternTerm* term = op.m_term;
1780
1781 const RegisterID character = regT0;
1782 const RegisterID countRegister = regT1;
1783
1784 if (m_decodeSurrogatePairs)
1785 op.m_jumps.append(jumpIfNoAvailableInput());
1786
1787 move(index, countRegister);
1788 sub32(Imm32(term->quantityMaxCount.unsafeGet()), countRegister);
1789
1790 Label loop(this);
1791 JumpList matchDest;
1792 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition - term->quantityMaxCount, resultReg: character, indexReg: countRegister);
1793 // If we are matching the "any character" builtin class we only need to read the
1794 // character and don't need to match as it will always succeed.
1795 if (term->invert() || !term->characterClass->m_anyCharacter) {
1796 matchCharacterClass(character, matchDest, charClass: term->characterClass);
1797
1798 if (term->invert())
1799 op.m_jumps.append(matchDest);
1800 else {
1801 op.m_jumps.append(jump());
1802 matchDest.link(masm: this);
1803 }
1804 }
1805
1806 add32(TrustedImm32(1), countRegister);
1807#ifdef JIT_UNICODE_EXPRESSIONS
1808 if (m_decodeSurrogatePairs) {
1809 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1810 op.m_jumps.append(atEndOfInput());
1811 add32(TrustedImm32(1), countRegister);
1812 add32(TrustedImm32(1), index);
1813 isBMPChar.link(masm: this);
1814 }
1815#endif
1816 branch32(NotEqual, countRegister, index).linkTo(loop, this);
1817 }
1818 void backtrackCharacterClassFixed(size_t opIndex)
1819 {
1820 backtrackTermDefault(opIndex);
1821 }
1822
1823 void generateCharacterClassGreedy(size_t opIndex)
1824 {
1825 YarrOp& op = m_ops[opIndex];
1826 PatternTerm* term = op.m_term;
1827
1828 const RegisterID character = regT0;
1829 const RegisterID countRegister = regT1;
1830
1831 if (m_decodeSurrogatePairs)
1832 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1833 move(TrustedImm32(0), countRegister);
1834
1835 JumpList failures;
1836 Label loop(this);
1837 failures.append(atEndOfInput());
1838
1839 if (term->invert()) {
1840 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1841 matchCharacterClass(character, matchDest&: failures, charClass: term->characterClass);
1842 } else {
1843 JumpList matchDest;
1844 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1845 // If we are matching the "any character" builtin class we only need to read the
1846 // character and don't need to match as it will always succeed.
1847 if (!term->characterClass->m_anyCharacter) {
1848 matchCharacterClass(character, matchDest, charClass: term->characterClass);
1849 failures.append(jump());
1850 }
1851 matchDest.link(masm: this);
1852 }
1853
1854 add32(TrustedImm32(1), index);
1855#ifdef JIT_UNICODE_EXPRESSIONS
1856 if (m_decodeSurrogatePairs) {
1857 failures.append(atEndOfInput());
1858 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1859 add32(TrustedImm32(1), index);
1860 isBMPChar.link(masm: this);
1861 }
1862#endif
1863 add32(TrustedImm32(1), countRegister);
1864
1865 if (term->quantityMaxCount != quantifyInfinite) {
1866 branch32(NotEqual, countRegister, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1867 failures.append(jump());
1868 } else
1869 jump(loop);
1870
1871 failures.link(masm: this);
1872 op.m_reentry = label();
1873
1874 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1875 }
1876 void backtrackCharacterClassGreedy(size_t opIndex)
1877 {
1878 YarrOp& op = m_ops[opIndex];
1879 PatternTerm* term = op.m_term;
1880
1881 const RegisterID countRegister = regT1;
1882
1883 m_backtrackingState.link(this);
1884
1885 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), reg: countRegister);
1886 m_backtrackingState.append(branchTest32(Zero, countRegister));
1887 sub32(TrustedImm32(1), countRegister);
1888 if (!m_decodeSurrogatePairs)
1889 sub32(TrustedImm32(1), index);
1890 else {
1891 const RegisterID character = regT0;
1892
1893 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), reg: index);
1894 // Rematch one less
1895 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1896
1897 Label rematchLoop(this);
1898 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1899
1900 sub32(TrustedImm32(1), countRegister);
1901 add32(TrustedImm32(1), index);
1902
1903#ifdef JIT_UNICODE_EXPRESSIONS
1904 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1905 add32(TrustedImm32(1), index);
1906 isBMPChar.link(masm: this);
1907#endif
1908
1909 branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1910
1911 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), reg: countRegister);
1912 }
1913 jump(op.m_reentry);
1914 }
1915
1916 void generateCharacterClassNonGreedy(size_t opIndex)
1917 {
1918 YarrOp& op = m_ops[opIndex];
1919 PatternTerm* term = op.m_term;
1920
1921 const RegisterID countRegister = regT1;
1922
1923 move(TrustedImm32(0), countRegister);
1924 op.m_reentry = label();
1925 if (m_decodeSurrogatePairs)
1926 storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1927 storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1928 }
1929
1930 void backtrackCharacterClassNonGreedy(size_t opIndex)
1931 {
1932 YarrOp& op = m_ops[opIndex];
1933 PatternTerm* term = op.m_term;
1934
1935 const RegisterID character = regT0;
1936 const RegisterID countRegister = regT1;
1937
1938 JumpList nonGreedyFailures;
1939
1940 m_backtrackingState.link(this);
1941
1942 if (m_decodeSurrogatePairs)
1943 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), reg: index);
1944 loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), reg: countRegister);
1945
1946 nonGreedyFailures.append(atEndOfInput());
1947 nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
1948
1949 JumpList matchDest;
1950 readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1951 // If we are matching the "any character" builtin class we only need to read the
1952 // character and don't need to match as it will always succeed.
1953 if (term->invert() || !term->characterClass->m_anyCharacter) {
1954 matchCharacterClass(character, matchDest, charClass: term->characterClass);
1955
1956 if (term->invert())
1957 nonGreedyFailures.append(other: matchDest);
1958 else {
1959 nonGreedyFailures.append(jump());
1960 matchDest.link(masm: this);
1961 }
1962 }
1963
1964 add32(TrustedImm32(1), index);
1965#ifdef JIT_UNICODE_EXPRESSIONS
1966 if (m_decodeSurrogatePairs) {
1967 nonGreedyFailures.append(atEndOfInput());
1968 Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1969 add32(TrustedImm32(1), index);
1970 isBMPChar.link(masm: this);
1971 }
1972#endif
1973 add32(TrustedImm32(1), countRegister);
1974
1975 jump(op.m_reentry);
1976
1977 nonGreedyFailures.link(masm: this);
1978 sub32(countRegister, index);
1979 m_backtrackingState.fallthrough();
1980 }
1981
1982 void generateDotStarEnclosure(size_t opIndex)
1983 {
1984 YarrOp& op = m_ops[opIndex];
1985 PatternTerm* term = op.m_term;
1986
1987 const RegisterID character = regT0;
1988 const RegisterID matchPos = regT1;
1989#ifndef HAVE_INITIAL_START_REG
1990 const RegisterID initialStart = character;
1991#endif
1992
1993 JumpList foundBeginningNewLine;
1994 JumpList saveStartIndex;
1995 JumpList foundEndingNewLine;
1996
1997 if (m_pattern.dotAll()) {
1998 move(TrustedImm32(0), matchPos);
1999 setMatchStart(matchPos);
2000 move(length, index);
2001 return;
2002 }
2003
2004 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2005 getMatchStart(reg: matchPos);
2006
2007#ifndef HAVE_INITIAL_START_REG
2008 loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2009#endif
2010 saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2011 Label findBOLLoop(this);
2012 sub32(TrustedImm32(1), matchPos);
2013 if (m_charSize == Char8)
2014 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2015 else
2016 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2017 matchCharacterClass(character, matchDest&: foundBeginningNewLine, charClass: m_pattern.newlineCharacterClass());
2018
2019#ifndef HAVE_INITIAL_START_REG
2020 loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2021#endif
2022 branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2023 saveStartIndex.append(jump());
2024
2025 foundBeginningNewLine.link(masm: this);
2026 add32(TrustedImm32(1), matchPos); // Advance past newline
2027 saveStartIndex.link(masm: this);
2028
2029 if (!m_pattern.multiline() && term->anchors.bolAnchor)
2030 op.m_jumps.append(branchTest32(NonZero, matchPos));
2031
2032 ASSERT(!m_pattern.m_body->m_hasFixedSize);
2033 setMatchStart(matchPos);
2034
2035 move(index, matchPos);
2036
2037 Label findEOLLoop(this);
2038 foundEndingNewLine.append(branch32(Equal, matchPos, length));
2039 if (m_charSize == Char8)
2040 load8(BaseIndex(input, matchPos, TimesOne, 0), character);
2041 else
2042 load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
2043 matchCharacterClass(character, matchDest&: foundEndingNewLine, charClass: m_pattern.newlineCharacterClass());
2044 add32(TrustedImm32(1), matchPos);
2045 jump(findEOLLoop);
2046
2047 foundEndingNewLine.link(masm: this);
2048
2049 if (!m_pattern.multiline() && term->anchors.eolAnchor)
2050 op.m_jumps.append(branch32(NotEqual, matchPos, length));
2051
2052 move(matchPos, index);
2053 }
2054
2055 void backtrackDotStarEnclosure(size_t opIndex)
2056 {
2057 backtrackTermDefault(opIndex);
2058 }
2059
2060 // Code generation/backtracking for simple terms
2061 // (pattern characters, character classes, and assertions).
2062 // These methods farm out work to the set of functions above.
2063 void generateTerm(size_t opIndex)
2064 {
2065 YarrOp& op = m_ops[opIndex];
2066 PatternTerm* term = op.m_term;
2067
2068 switch (term->type) {
2069 case PatternTerm::TypePatternCharacter:
2070 switch (term->quantityType) {
2071 case QuantifierFixedCount:
2072 if (term->quantityMaxCount == 1)
2073 generatePatternCharacterOnce(opIndex);
2074 else
2075 generatePatternCharacterFixed(opIndex);
2076 break;
2077 case QuantifierGreedy:
2078 generatePatternCharacterGreedy(opIndex);
2079 break;
2080 case QuantifierNonGreedy:
2081 generatePatternCharacterNonGreedy(opIndex);
2082 break;
2083 }
2084 break;
2085
2086 case PatternTerm::TypeCharacterClass:
2087 switch (term->quantityType) {
2088 case QuantifierFixedCount:
2089 if (term->quantityMaxCount == 1)
2090 generateCharacterClassOnce(opIndex);
2091 else
2092 generateCharacterClassFixed(opIndex);
2093 break;
2094 case QuantifierGreedy:
2095 generateCharacterClassGreedy(opIndex);
2096 break;
2097 case QuantifierNonGreedy:
2098 generateCharacterClassNonGreedy(opIndex);
2099 break;
2100 }
2101 break;
2102
2103 case PatternTerm::TypeAssertionBOL:
2104 generateAssertionBOL(opIndex);
2105 break;
2106
2107 case PatternTerm::TypeAssertionEOL:
2108 generateAssertionEOL(opIndex);
2109 break;
2110
2111 case PatternTerm::TypeAssertionWordBoundary:
2112 generateAssertionWordBoundary(opIndex);
2113 break;
2114
2115 case PatternTerm::TypeForwardReference:
2116 m_failureReason = JITFailureReason::ForwardReference;
2117 break;
2118
2119 case PatternTerm::TypeParenthesesSubpattern:
2120 case PatternTerm::TypeParentheticalAssertion:
2121 RELEASE_ASSERT_NOT_REACHED();
2122
2123 case PatternTerm::TypeBackReference:
2124#if ENABLE(YARR_JIT_BACKREFERENCES)
2125 generateBackReference(opIndex);
2126#else
2127 m_failureReason = JITFailureReason::BackReference;
2128#endif
2129 break;
2130 case PatternTerm::TypeDotStarEnclosure:
2131 generateDotStarEnclosure(opIndex);
2132 break;
2133 }
2134 }
2135 void backtrackTerm(size_t opIndex)
2136 {
2137 YarrOp& op = m_ops[opIndex];
2138 PatternTerm* term = op.m_term;
2139
2140 switch (term->type) {
2141 case PatternTerm::TypePatternCharacter:
2142 switch (term->quantityType) {
2143 case QuantifierFixedCount:
2144 if (term->quantityMaxCount == 1)
2145 backtrackPatternCharacterOnce(opIndex);
2146 else
2147 backtrackPatternCharacterFixed(opIndex);
2148 break;
2149 case QuantifierGreedy:
2150 backtrackPatternCharacterGreedy(opIndex);
2151 break;
2152 case QuantifierNonGreedy:
2153 backtrackPatternCharacterNonGreedy(opIndex);
2154 break;
2155 }
2156 break;
2157
2158 case PatternTerm::TypeCharacterClass:
2159 switch (term->quantityType) {
2160 case QuantifierFixedCount:
2161 if (term->quantityMaxCount == 1)
2162 backtrackCharacterClassOnce(opIndex);
2163 else
2164 backtrackCharacterClassFixed(opIndex);
2165 break;
2166 case QuantifierGreedy:
2167 backtrackCharacterClassGreedy(opIndex);
2168 break;
2169 case QuantifierNonGreedy:
2170 backtrackCharacterClassNonGreedy(opIndex);
2171 break;
2172 }
2173 break;
2174
2175 case PatternTerm::TypeAssertionBOL:
2176 backtrackAssertionBOL(opIndex);
2177 break;
2178
2179 case PatternTerm::TypeAssertionEOL:
2180 backtrackAssertionEOL(opIndex);
2181 break;
2182
2183 case PatternTerm::TypeAssertionWordBoundary:
2184 backtrackAssertionWordBoundary(opIndex);
2185 break;
2186
2187 case PatternTerm::TypeForwardReference:
2188 m_failureReason = JITFailureReason::ForwardReference;
2189 break;
2190
2191 case PatternTerm::TypeParenthesesSubpattern:
2192 case PatternTerm::TypeParentheticalAssertion:
2193 RELEASE_ASSERT_NOT_REACHED();
2194
2195 case PatternTerm::TypeBackReference:
2196#if ENABLE(YARR_JIT_BACKREFERENCES)
2197 backtrackBackReference(opIndex);
2198#else
2199 m_failureReason = JITFailureReason::BackReference;
2200#endif
2201 break;
2202
2203 case PatternTerm::TypeDotStarEnclosure:
2204 backtrackDotStarEnclosure(opIndex);
2205 break;
2206 }
2207 }
2208
2209 void generate()
2210 {
2211 // Forwards generate the matching code.
2212 ASSERT(m_ops.size());
2213 size_t opIndex = 0;
2214
2215 do {
2216 YarrOp& op = m_ops[opIndex];
2217 switch (op.m_op) {
2218
2219 case OpTerm:
2220 generateTerm(opIndex);
2221 break;
2222
2223 // OpBodyAlternativeBegin/Next/End
2224 //
2225 // These nodes wrap the set of alternatives in the body of the regular expression.
2226 // There may be either one or two chains of OpBodyAlternative nodes, one representing
2227 // the 'once through' sequence of alternatives (if any exist), and one representing
2228 // the repeating alternatives (again, if any exist).
2229 //
2230 // Upon normal entry to the Begin alternative, we will check that input is available.
2231 // Reentry to the Begin alternative will take place after the check has taken place,
2232 // and will assume that the input position has already been progressed as appropriate.
2233 //
2234 // Entry to subsequent Next/End alternatives occurs when the prior alternative has
2235 // successfully completed a match - return a success state from JIT code.
2236 //
2237 // Next alternatives allow for reentry optimized to suit backtracking from its
2238 // preceding alternative. It expects the input position to still be set to a position
2239 // appropriate to its predecessor, and it will only perform an input check if the
2240 // predecessor had a minimum size less than its own.
2241 //
2242 // In the case 'once through' expressions, the End node will also have a reentry
2243 // point to jump to when the last alternative fails. Again, this expects the input
2244 // position to still reflect that expected by the prior alternative.
2245 case OpBodyAlternativeBegin: {
2246 PatternAlternative* alternative = op.m_alternative;
2247
2248 // Upon entry at the head of the set of alternatives, check if input is available
2249 // to run the first alternative. (This progresses the input position).
2250 op.m_jumps.append(jumpIfNoAvailableInput(countToCheck: alternative->m_minimumSize));
2251 // We will reenter after the check, and assume the input position to have been
2252 // set as appropriate to this alternative.
2253 op.m_reentry = label();
2254
2255 m_checkedOffset += alternative->m_minimumSize;
2256 break;
2257 }
2258 case OpBodyAlternativeNext:
2259 case OpBodyAlternativeEnd: {
2260 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2261 PatternAlternative* alternative = op.m_alternative;
2262
2263 // If we get here, the prior alternative matched - return success.
2264
2265 // Adjust the stack pointer to remove the pattern's frame.
2266 removeCallFrame();
2267
2268 // Load appropriate values into the return register and the first output
2269 // slot, and return. In the case of pattern with a fixed size, we will
2270 // not have yet set the value in the first
2271 ASSERT(index != returnRegister);
2272 if (m_pattern.m_body->m_hasFixedSize) {
2273 move(index, returnRegister);
2274 if (priorAlternative->m_minimumSize)
2275 sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
2276 if (compileMode == IncludeSubpatterns)
2277 store32(returnRegister, output);
2278 } else
2279 getMatchStart(reg: returnRegister);
2280 if (compileMode == IncludeSubpatterns)
2281 store32(index, Address(output, 4));
2282 move(index, returnRegister2);
2283
2284 generateReturn();
2285
2286 // This is the divide between the tail of the prior alternative, above, and
2287 // the head of the subsequent alternative, below.
2288
2289 if (op.m_op == OpBodyAlternativeNext) {
2290 // This is the reentry point for the Next alternative. We expect any code
2291 // that jumps here to do so with the input position matching that of the
2292 // PRIOR alteranative, and we will only check input availability if we
2293 // need to progress it forwards.
2294 op.m_reentry = label();
2295 if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2296 add32(Imm32(alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2297 op.m_jumps.append(jumpIfNoAvailableInput());
2298 } else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2299 sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2300 } else if (op.m_nextOp == notFound) {
2301 // This is the reentry point for the End of 'once through' alternatives,
2302 // jumped to when the last alternative fails to match.
2303 op.m_reentry = label();
2304 sub32(Imm32(priorAlternative->m_minimumSize), index);
2305 }
2306
2307 if (op.m_op == OpBodyAlternativeNext)
2308 m_checkedOffset += alternative->m_minimumSize;
2309 m_checkedOffset -= priorAlternative->m_minimumSize;
2310 break;
2311 }
2312
2313 // OpSimpleNestedAlternativeBegin/Next/End
2314 // OpNestedAlternativeBegin/Next/End
2315 //
2316 // These nodes are used to handle sets of alternatives that are nested within
2317 // subpatterns and parenthetical assertions. The 'simple' forms are used where
2318 // we do not need to be able to backtrack back into any alternative other than
2319 // the last, the normal forms allow backtracking into any alternative.
2320 //
2321 // Each Begin/Next node is responsible for planting an input check to ensure
2322 // sufficient input is available on entry. Next nodes additionally need to
2323 // jump to the end - Next nodes use the End node's m_jumps list to hold this
2324 // set of jumps.
2325 //
2326 // In the non-simple forms, successful alternative matches must store a
2327 // 'return address' using a DataLabelPtr, used to store the address to jump
2328 // to when backtracking, to get to the code for the appropriate alternative.
2329 case OpSimpleNestedAlternativeBegin:
2330 case OpNestedAlternativeBegin: {
2331 PatternTerm* term = op.m_term;
2332 PatternAlternative* alternative = op.m_alternative;
2333 PatternDisjunction* disjunction = term->parentheses.disjunction;
2334
2335 // Calculate how much input we need to check for, and if non-zero check.
2336 op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2337 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2338 op.m_checkAdjust -= disjunction->m_minimumSize;
2339 if (op.m_checkAdjust)
2340 op.m_jumps.append(jumpIfNoAvailableInput(countToCheck: op.m_checkAdjust.unsafeGet()));
2341
2342 m_checkedOffset += op.m_checkAdjust;
2343 break;
2344 }
2345 case OpSimpleNestedAlternativeNext:
2346 case OpNestedAlternativeNext: {
2347 PatternTerm* term = op.m_term;
2348 PatternAlternative* alternative = op.m_alternative;
2349 PatternDisjunction* disjunction = term->parentheses.disjunction;
2350
2351 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2352 if (op.m_op == OpNestedAlternativeNext) {
2353 unsigned parenthesesFrameLocation = term->frameLocation;
2354 op.m_returnAddress = storeToFrameWithPatch(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2355 }
2356
2357 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2358 // If the previous alternative matched without consuming characters then
2359 // backtrack to try to match while consumming some input.
2360 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2361 }
2362
2363 // If we reach here then the last alternative has matched - jump to the
2364 // End node, to skip over any further alternatives.
2365 //
2366 // FIXME: this is logically O(N^2) (though N can be expected to be very
2367 // small). We could avoid this either by adding an extra jump to the JIT
2368 // data structures, or by making backtracking code that jumps to Next
2369 // alternatives are responsible for checking that input is available (if
2370 // we didn't need to plant the input checks, then m_jumps would be free).
2371 YarrOp* endOp = &m_ops[op.m_nextOp];
2372 while (endOp->m_nextOp != notFound) {
2373 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
2374 endOp = &m_ops[endOp->m_nextOp];
2375 }
2376 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
2377 endOp->m_jumps.append(jump());
2378
2379 // This is the entry point for the next alternative.
2380 op.m_reentry = label();
2381
2382 // Calculate how much input we need to check for, and if non-zero check.
2383 op.m_checkAdjust = alternative->m_minimumSize;
2384 if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2385 op.m_checkAdjust -= disjunction->m_minimumSize;
2386 if (op.m_checkAdjust)
2387 op.m_jumps.append(jumpIfNoAvailableInput(countToCheck: op.m_checkAdjust.unsafeGet()));
2388
2389 YarrOp& lastOp = m_ops[op.m_previousOp];
2390 m_checkedOffset -= lastOp.m_checkAdjust;
2391 m_checkedOffset += op.m_checkAdjust;
2392 break;
2393 }
2394 case OpSimpleNestedAlternativeEnd:
2395 case OpNestedAlternativeEnd: {
2396 PatternTerm* term = op.m_term;
2397
2398 // In the non-simple case, store a 'return address' so we can backtrack correctly.
2399 if (op.m_op == OpNestedAlternativeEnd) {
2400 unsigned parenthesesFrameLocation = term->frameLocation;
2401 op.m_returnAddress = storeToFrameWithPatch(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2402 }
2403
2404 if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2405 // If the previous alternative matched without consuming characters then
2406 // backtrack to try to match while consumming some input.
2407 op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2408 }
2409
2410 // If this set of alternatives contains more than one alternative,
2411 // then the Next nodes will have planted jumps to the End, and added
2412 // them to this node's m_jumps list.
2413 op.m_jumps.link(this);
2414 op.m_jumps.clear();
2415
2416 YarrOp& lastOp = m_ops[op.m_previousOp];
2417 m_checkedOffset -= lastOp.m_checkAdjust;
2418 break;
2419 }
2420
2421 // OpParenthesesSubpatternOnceBegin/End
2422 //
2423 // These nodes support (optionally) capturing subpatterns, that have a
2424 // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
2425 case OpParenthesesSubpatternOnceBegin: {
2426 PatternTerm* term = op.m_term;
2427 unsigned parenthesesFrameLocation = term->frameLocation;
2428 const RegisterID indexTemporary = regT0;
2429 ASSERT(term->quantityMaxCount == 1);
2430
2431 // Upon entry to a Greedy quantified set of parenthese store the index.
2432 // We'll use this for two purposes:
2433 // - To indicate which iteration we are on of mathing the remainder of
2434 // the expression after the parentheses - the first, including the
2435 // match within the parentheses, or the second having skipped over them.
2436 // - To check for empty matches, which must be rejected.
2437 //
2438 // At the head of a NonGreedy set of parentheses we'll immediately set the
2439 // value on the stack to -1 (indicating a match skipping the subpattern),
2440 // and plant a jump to the end. We'll also plant a label to backtrack to
2441 // to reenter the subpattern later, with a store to set up index on the
2442 // second iteration.
2443 //
2444 // FIXME: for capturing parens, could use the index in the capture array?
2445 if (term->quantityType == QuantifierGreedy)
2446 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2447 else if (term->quantityType == QuantifierNonGreedy) {
2448 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2449 op.m_jumps.append(jump());
2450 op.m_reentry = label();
2451 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2452 }
2453
2454 // If the parenthese are capturing, store the starting index value to the
2455 // captures array, offsetting as necessary.
2456 //
2457 // FIXME: could avoid offsetting this value in JIT code, apply
2458 // offsets only afterwards, at the point the results array is
2459 // being accessed.
2460 if (term->capture() && compileMode == IncludeSubpatterns) {
2461 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2462 if (term->quantityType == QuantifierFixedCount)
2463 inputOffset += term->parentheses.disjunction->m_minimumSize;
2464 if (inputOffset) {
2465 move(index, indexTemporary);
2466 sub32(Imm32(inputOffset), indexTemporary);
2467 setSubpatternStart(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2468 } else
2469 setSubpatternStart(reg: index, subpattern: term->parentheses.subpatternId);
2470 }
2471 break;
2472 }
2473 case OpParenthesesSubpatternOnceEnd: {
2474 PatternTerm* term = op.m_term;
2475 const RegisterID indexTemporary = regT0;
2476 ASSERT(term->quantityMaxCount == 1);
2477
2478 // Runtime ASSERT to make sure that the nested alternative handled the
2479 // "no input consumed" check.
2480 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2481 Jump pastBreakpoint;
2482 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2483 // ### abortWithReason(YARRNoInputConsumed);
2484 pastBreakpoint.link(masm: this);
2485 }
2486
2487 // If the parenthese are capturing, store the ending index value to the
2488 // captures array, offsetting as necessary.
2489 //
2490 // FIXME: could avoid offsetting this value in JIT code, apply
2491 // offsets only afterwards, at the point the results array is
2492 // being accessed.
2493 if (term->capture() && compileMode == IncludeSubpatterns) {
2494 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2495 if (inputOffset) {
2496 move(index, indexTemporary);
2497 sub32(Imm32(inputOffset), indexTemporary);
2498 setSubpatternEnd(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2499 } else
2500 setSubpatternEnd(reg: index, subpattern: term->parentheses.subpatternId);
2501 }
2502
2503 // If the parentheses are quantified Greedy then add a label to jump back
2504 // to if we get a failed match from after the parentheses. For NonGreedy
2505 // parentheses, link the jump from before the subpattern to here.
2506 if (term->quantityType == QuantifierGreedy)
2507 op.m_reentry = label();
2508 else if (term->quantityType == QuantifierNonGreedy) {
2509 YarrOp& beginOp = m_ops[op.m_previousOp];
2510 beginOp.m_jumps.link(this);
2511 }
2512 break;
2513 }
2514
2515 // OpParenthesesSubpatternTerminalBegin/End
2516 case OpParenthesesSubpatternTerminalBegin: {
2517 PatternTerm* term = op.m_term;
2518 ASSERT(term->quantityType == QuantifierGreedy);
2519 ASSERT(term->quantityMaxCount == quantifyInfinite);
2520 ASSERT(!term->capture());
2521
2522 // Upon entry set a label to loop back to.
2523 op.m_reentry = label();
2524
2525 // Store the start index of the current match; we need to reject zero
2526 // length matches.
2527 storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2528 break;
2529 }
2530 case OpParenthesesSubpatternTerminalEnd: {
2531 YarrOp& beginOp = m_ops[op.m_previousOp];
2532 if (!ASSERT_DISABLED) {
2533 PatternTerm* term = op.m_term;
2534
2535 // Runtime ASSERT to make sure that the nested alternative handled the
2536 // "no input consumed" check.
2537 Jump pastBreakpoint;
2538 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
2539 // ### abortWithReason(YARRNoInputConsumed);
2540 pastBreakpoint.link(masm: this);
2541 }
2542
2543 // We know that the match is non-zero, we can accept it and
2544 // loop back up to the head of the subpattern.
2545 jump(beginOp.m_reentry);
2546
2547 // This is the entry point to jump to when we stop matching - we will
2548 // do so once the subpattern cannot match any more.
2549 op.m_reentry = label();
2550 break;
2551 }
2552
2553 // OpParenthesesSubpatternBegin/End
2554 //
2555 // These nodes support generic subpatterns.
2556 case OpParenthesesSubpatternBegin: {
2557#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2558 PatternTerm* term = op.m_term;
2559 unsigned parenthesesFrameLocation = term->frameLocation;
2560
2561 // Upon entry to a Greedy quantified set of parenthese store the index.
2562 // We'll use this for two purposes:
2563 // - To indicate which iteration we are on of mathing the remainder of
2564 // the expression after the parentheses - the first, including the
2565 // match within the parentheses, or the second having skipped over them.
2566 // - To check for empty matches, which must be rejected.
2567 //
2568 // At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2569 // in the backtrack info to -1 (indicating a match skipping the subpattern),
2570 // and plant a jump to the end. We'll also plant a label to backtrack to
2571 // to reenter the subpattern later, with a store to set 'begin' to current index
2572 // on the second iteration.
2573 //
2574 // FIXME: for capturing parens, could use the index in the capture array?
2575 if (term->quantityType == QuantifierGreedy || term->quantityType == QuantifierNonGreedy) {
2576 storeToFrame(TrustedImm32(0), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2577 storeToFrame(TrustedImmPtr(nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2578
2579 if (term->quantityType == QuantifierNonGreedy) {
2580 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2581 op.m_jumps.append(jump());
2582 }
2583
2584 op.m_reentry = label();
2585 RegisterID currParenContextReg = regT0;
2586 RegisterID newParenContextReg = regT1;
2587
2588 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), reg: currParenContextReg);
2589 allocateParenContext(result: newParenContextReg);
2590 storePtr(currParenContextReg, newParenContextReg);
2591 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2592 saveParenContext(parenContextReg: newParenContextReg, tempReg: regT2, firstSubpattern: term->parentheses.subpatternId, lastSubpattern: term->parentheses.lastSubpatternId, subpatternBaseFrameLocation: parenthesesFrameLocation);
2593 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2594 }
2595
2596 // If the parenthese are capturing, store the starting index value to the
2597 // captures array, offsetting as necessary.
2598 //
2599 // FIXME: could avoid offsetting this value in JIT code, apply
2600 // offsets only afterwards, at the point the results array is
2601 // being accessed.
2602 if (term->capture() && compileMode == IncludeSubpatterns) {
2603 const RegisterID indexTemporary = regT0;
2604 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2605 if (term->quantityType == QuantifierFixedCount)
2606 inputOffset += term->parentheses.disjunction->m_minimumSize;
2607 if (inputOffset) {
2608 move(index, indexTemporary);
2609 sub32(Imm32(inputOffset), indexTemporary);
2610 setSubpatternStart(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2611 } else
2612 setSubpatternStart(reg: index, subpattern: term->parentheses.subpatternId);
2613 }
2614#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2615 RELEASE_ASSERT_NOT_REACHED();
2616#endif
2617 break;
2618 }
2619 case OpParenthesesSubpatternEnd: {
2620#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2621 PatternTerm* term = op.m_term;
2622 unsigned parenthesesFrameLocation = term->frameLocation;
2623
2624 // Runtime ASSERT to make sure that the nested alternative handled the
2625 // "no input consumed" check.
2626 if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2627 Jump pastBreakpoint;
2628 pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2629 // ### abortWithReason(YARRNoInputConsumed);
2630 pastBreakpoint.link(masm: this);
2631 }
2632
2633 const RegisterID countTemporary = regT1;
2634
2635 YarrOp& beginOp = m_ops[op.m_previousOp];
2636 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: countTemporary);
2637 add32(TrustedImm32(1), countTemporary);
2638 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2639
2640 // If the parenthese are capturing, store the ending index value to the
2641 // captures array, offsetting as necessary.
2642 //
2643 // FIXME: could avoid offsetting this value in JIT code, apply
2644 // offsets only afterwards, at the point the results array is
2645 // being accessed.
2646 if (term->capture() && compileMode == IncludeSubpatterns) {
2647 const RegisterID indexTemporary = regT0;
2648
2649 unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2650 if (inputOffset) {
2651 move(index, indexTemporary);
2652 sub32(Imm32(inputOffset), indexTemporary);
2653 setSubpatternEnd(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2654 } else
2655 setSubpatternEnd(reg: index, subpattern: term->parentheses.subpatternId);
2656 }
2657
2658 // If the parentheses are quantified Greedy then add a label to jump back
2659 // to if we get a failed match from after the parentheses. For NonGreedy
2660 // parentheses, link the jump from before the subpattern to here.
2661 if (term->quantityType == QuantifierGreedy) {
2662 if (term->quantityMaxCount != quantifyInfinite)
2663 branch32(Below, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2664 else
2665 jump(beginOp.m_reentry);
2666
2667 op.m_reentry = label();
2668 } else if (term->quantityType == QuantifierNonGreedy) {
2669 YarrOp& beginOp = m_ops[op.m_previousOp];
2670 beginOp.m_jumps.link(this);
2671 op.m_reentry = label();
2672 }
2673#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2674 RELEASE_ASSERT_NOT_REACHED();
2675#endif
2676 break;
2677 }
2678
2679 // OpParentheticalAssertionBegin/End
2680 case OpParentheticalAssertionBegin: {
2681 PatternTerm* term = op.m_term;
2682
2683 // Store the current index - assertions should not update index, so
2684 // we will need to restore it upon a successful match.
2685 unsigned parenthesesFrameLocation = term->frameLocation;
2686 storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2687
2688 // Check
2689 op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2690 if (op.m_checkAdjust)
2691 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2692
2693 m_checkedOffset -= op.m_checkAdjust;
2694 break;
2695 }
2696 case OpParentheticalAssertionEnd: {
2697 PatternTerm* term = op.m_term;
2698
2699 // Restore the input index value.
2700 unsigned parenthesesFrameLocation = term->frameLocation;
2701 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), reg: index);
2702
2703 // If inverted, a successful match of the assertion must be treated
2704 // as a failure, so jump to backtracking.
2705 if (term->invert()) {
2706 op.m_jumps.append(jump());
2707 op.m_reentry = label();
2708 }
2709
2710 YarrOp& lastOp = m_ops[op.m_previousOp];
2711 m_checkedOffset += lastOp.m_checkAdjust;
2712 break;
2713 }
2714
2715 case OpMatchFailed:
2716 removeCallFrame();
2717 generateFailReturn();
2718 break;
2719 }
2720
2721 ++opIndex;
2722 } while (opIndex < m_ops.size());
2723 }
2724
2725 void backtrack()
2726 {
2727 // Backwards generate the backtracking code.
2728 size_t opIndex = m_ops.size();
2729 ASSERT(opIndex);
2730
2731 do {
2732 --opIndex;
2733
2734 YarrOp& op = m_ops[opIndex];
2735 switch (op.m_op) {
2736
2737 case OpTerm:
2738 backtrackTerm(opIndex);
2739 break;
2740
2741 // OpBodyAlternativeBegin/Next/End
2742 //
2743 // For each Begin/Next node representing an alternative, we need to decide what to do
2744 // in two circumstances:
2745 // - If we backtrack back into this node, from within the alternative.
2746 // - If the input check at the head of the alternative fails (if this exists).
2747 //
2748 // We treat these two cases differently since in the former case we have slightly
2749 // more information - since we are backtracking out of a prior alternative we know
2750 // that at least enough input was available to run it. For example, given the regular
2751 // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
2752 // character match of 'a'), then we need not perform an additional input availability
2753 // check before running the second alternative.
2754 //
2755 // Backtracking required differs for the last alternative, which in the case of the
2756 // repeating set of alternatives must loop. The code generated for the last alternative
2757 // will also be used to handle all input check failures from any prior alternatives -
2758 // these require similar functionality, in seeking the next available alternative for
2759 // which there is sufficient input.
2760 //
2761 // Since backtracking of all other alternatives simply requires us to link backtracks
2762 // to the reentry point for the subsequent alternative, we will only be generating any
2763 // code when backtracking the last alternative.
2764 case OpBodyAlternativeBegin:
2765 case OpBodyAlternativeNext: {
2766 PatternAlternative* alternative = op.m_alternative;
2767
2768 if (op.m_op == OpBodyAlternativeNext) {
2769 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2770 m_checkedOffset += priorAlternative->m_minimumSize;
2771 }
2772 m_checkedOffset -= alternative->m_minimumSize;
2773
2774 // Is this the last alternative? If not, then if we backtrack to this point we just
2775 // need to jump to try to match the next alternative.
2776 if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2777 m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2778 break;
2779 }
2780 YarrOp& endOp = m_ops[op.m_nextOp];
2781
2782 YarrOp* beginOp = &op;
2783 while (beginOp->m_op != OpBodyAlternativeBegin) {
2784 ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2785 beginOp = &m_ops[beginOp->m_previousOp];
2786 }
2787
2788 bool onceThrough = endOp.m_nextOp == notFound;
2789
2790 JumpList lastStickyAlternativeFailures;
2791
2792 // First, generate code to handle cases where we backtrack out of an attempted match
2793 // of the last alternative. If this is a 'once through' set of alternatives then we
2794 // have nothing to do - link this straight through to the End.
2795 if (onceThrough)
2796 m_backtrackingState.linkTo(endOp.m_reentry, this);
2797 else {
2798 // If we don't need to move the input poistion, and the pattern has a fixed size
2799 // (in which case we omit the store of the start index until the pattern has matched)
2800 // then we can just link the backtrack out of the last alternative straight to the
2801 // head of the first alternative.
2802 if (m_pattern.m_body->m_hasFixedSize
2803 && (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2804 && (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == 1))
2805 m_backtrackingState.linkTo(beginOp->m_reentry, this);
2806 else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2807 // It is a sticky pattern and the last alternative failed, jump to the end.
2808 m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2809 } else {
2810 // We need to generate a trampoline of code to execute before looping back
2811 // around to the first alternative.
2812 m_backtrackingState.link(this);
2813
2814 // No need to advance and retry for a sticky pattern.
2815 if (!m_pattern.sticky()) {
2816 // If the pattern size is not fixed, then store the start index for use if we match.
2817 if (!m_pattern.m_body->m_hasFixedSize) {
2818 if (alternative->m_minimumSize == 1)
2819 setMatchStart(index);
2820 else {
2821 move(index, regT0);
2822 if (alternative->m_minimumSize)
2823 sub32(Imm32(alternative->m_minimumSize - 1), regT0);
2824 else
2825 add32(TrustedImm32(1), regT0);
2826 setMatchStart(regT0);
2827 }
2828 }
2829
2830 // Generate code to loop. Check whether the last alternative is longer than the
2831 // first (e.g. /a|xy/ or /a|xyz/).
2832 if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2833 // We want to loop, and increment input position. If the delta is 1, it is
2834 // already correctly incremented, if more than one then decrement as appropriate.
2835 unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2836 ASSERT(delta);
2837 if (delta != 1)
2838 sub32(Imm32(delta - 1), index);
2839 jump(beginOp->m_reentry);
2840 } else {
2841 // If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2842 // be sufficent input available to handle this, so just fall through.
2843 unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2844 if (delta != 0xFFFFFFFFu) {
2845 // We need to check input because we are incrementing the input.
2846 add32(Imm32(delta + 1), index);
2847 checkInput().linkTo(beginOp->m_reentry, this);
2848 }
2849 }
2850 }
2851 }
2852 }
2853
2854 // We can reach this point in the code in two ways:
2855 // - Fallthrough from the code above (a repeating alternative backtracked out of its
2856 // last alternative, and did not have sufficent input to run the first).
2857 // - We will loop back up to the following label when a repeating alternative loops,
2858 // following a failed input check.
2859 //
2860 // Either way, we have just failed the input check for the first alternative.
2861 Label firstInputCheckFailed(this);
2862
2863 // Generate code to handle input check failures from alternatives except the last.
2864 // prevOp is the alternative we're handling a bail out from (initially Begin), and
2865 // nextOp is the alternative we will be attempting to reenter into.
2866 //
2867 // We will link input check failures from the forwards matching path back to the code
2868 // that can handle them.
2869 YarrOp* prevOp = beginOp;
2870 YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2871 while (nextOp->m_op != OpBodyAlternativeEnd) {
2872 prevOp->m_jumps.link(this);
2873
2874 // We only get here if an input check fails, it is only worth checking again
2875 // if the next alternative has a minimum size less than the last.
2876 if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2877 // FIXME: if we added an extra label to YarrOp, we could avoid needing to
2878 // subtract delta back out, and reduce this code. Should performance test
2879 // the benefit of this.
2880 unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2881 sub32(Imm32(delta), index);
2882 Jump fail = jumpIfNoAvailableInput();
2883 add32(Imm32(delta), index);
2884 jump(nextOp->m_reentry);
2885 fail.link(masm: this);
2886 } else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2887 add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2888 prevOp = nextOp;
2889 nextOp = &m_ops[nextOp->m_nextOp];
2890 }
2891
2892 // We fall through to here if there is insufficient input to run the last alternative.
2893
2894 // If there is insufficient input to run the last alternative, then for 'once through'
2895 // alternatives we are done - just jump back up into the forwards matching path at the End.
2896 if (onceThrough) {
2897 op.m_jumps.linkTo(endOp.m_reentry, this);
2898 jump(endOp.m_reentry);
2899 break;
2900 }
2901
2902 // For repeating alternatives, link any input check failure from the last alternative to
2903 // this point.
2904 op.m_jumps.link(this);
2905
2906 bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2907
2908 // Check for cases where input position is already incremented by 1 for the last
2909 // alternative (this is particularly useful where the minimum size of the body
2910 // disjunction is 0, e.g. /a*|b/).
2911 if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
2912 // index is already incremented by 1, so just store it now!
2913 setMatchStart(index);
2914 needsToUpdateMatchStart = false;
2915 }
2916
2917 if (!m_pattern.sticky()) {
2918 // Check whether there is sufficient input to loop. Increment the input position by
2919 // one, and check. Also add in the minimum disjunction size before checking - there
2920 // is no point in looping if we're just going to fail all the input checks around
2921 // the next iteration.
2922 ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2923 if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2924 // If the last alternative had the same minimum size as the disjunction,
2925 // just simply increment input pos by 1, no adjustment based on minimum size.
2926 add32(TrustedImm32(1), index);
2927 } else {
2928 // If the minumum for the last alternative was one greater than than that
2929 // for the disjunction, we're already progressed by 1, nothing to do!
2930 unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - 1;
2931 if (delta)
2932 sub32(Imm32(delta), index);
2933 }
2934 Jump matchFailed = jumpIfNoAvailableInput();
2935
2936 if (needsToUpdateMatchStart) {
2937 if (!m_pattern.m_body->m_minimumSize)
2938 setMatchStart(index);
2939 else {
2940 move(index, regT0);
2941 sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2942 setMatchStart(regT0);
2943 }
2944 }
2945
2946 // Calculate how much more input the first alternative requires than the minimum
2947 // for the body as a whole. If no more is needed then we dont need an additional
2948 // input check here - jump straight back up to the start of the first alternative.
2949 if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2950 jump(beginOp->m_reentry);
2951 else {
2952 if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2953 add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2954 else
2955 sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2956 checkInput().linkTo(beginOp->m_reentry, this);
2957 jump(firstInputCheckFailed);
2958 }
2959
2960 // We jump to here if we iterate to the point that there is insufficient input to
2961 // run any matches, and need to return a failure state from JIT code.
2962 matchFailed.link(masm: this);
2963 }
2964
2965 lastStickyAlternativeFailures.link(masm: this);
2966 removeCallFrame();
2967 generateFailReturn();
2968 break;
2969 }
2970 case OpBodyAlternativeEnd: {
2971 // We should never backtrack back into a body disjunction.
2972 ASSERT(m_backtrackingState.isEmpty());
2973
2974 PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2975 m_checkedOffset += priorAlternative->m_minimumSize;
2976 break;
2977 }
2978
2979 // OpSimpleNestedAlternativeBegin/Next/End
2980 // OpNestedAlternativeBegin/Next/End
2981 //
2982 // Generate code for when we backtrack back out of an alternative into
2983 // a Begin or Next node, or when the entry input count check fails. If
2984 // there are more alternatives we need to jump to the next alternative,
2985 // if not we backtrack back out of the current set of parentheses.
2986 //
2987 // In the case of non-simple nested assertions we need to also link the
2988 // 'return address' appropriately to backtrack back out into the correct
2989 // alternative.
2990 case OpSimpleNestedAlternativeBegin:
2991 case OpSimpleNestedAlternativeNext:
2992 case OpNestedAlternativeBegin:
2993 case OpNestedAlternativeNext: {
2994 YarrOp& nextOp = m_ops[op.m_nextOp];
2995 bool isBegin = op.m_previousOp == notFound;
2996 bool isLastAlternative = nextOp.m_nextOp == notFound;
2997 ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
2998 ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
2999
3000 // Treat an input check failure the same as a failed match.
3001 m_backtrackingState.append(op.m_jumps);
3002
3003 // Set the backtracks to jump to the appropriate place. We may need
3004 // to link the backtracks in one of three different way depending on
3005 // the type of alternative we are dealing with:
3006 // - A single alternative, with no simplings.
3007 // - The last alternative of a set of two or more.
3008 // - An alternative other than the last of a set of two or more.
3009 //
3010 // In the case of a single alternative on its own, we don't need to
3011 // jump anywhere - if the alternative fails to match we can just
3012 // continue to backtrack out of the parentheses without jumping.
3013 //
3014 // In the case of the last alternative in a set of more than one, we
3015 // need to jump to return back out to the beginning. We'll do so by
3016 // adding a jump to the End node's m_jumps list, and linking this
3017 // when we come to generate the Begin node. For alternatives other
3018 // than the last, we need to jump to the next alternative.
3019 //
3020 // If the alternative had adjusted the input position we must link
3021 // backtracking to here, correct, and then jump on. If not we can
3022 // link the backtracks directly to their destination.
3023 if (op.m_checkAdjust) {
3024 // Handle the cases where we need to link the backtracks here.
3025 m_backtrackingState.link(this);
3026 sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3027 if (!isLastAlternative) {
3028 // An alternative that is not the last should jump to its successor.
3029 jump(nextOp.m_reentry);
3030 } else if (!isBegin) {
3031 // The last of more than one alternatives must jump back to the beginning.
3032 nextOp.m_jumps.append(jump());
3033 } else {
3034 // A single alternative on its own can fall through.
3035 m_backtrackingState.fallthrough();
3036 }
3037 } else {
3038 // Handle the cases where we can link the backtracks directly to their destinations.
3039 if (!isLastAlternative) {
3040 // An alternative that is not the last should jump to its successor.
3041 m_backtrackingState.linkTo(nextOp.m_reentry, this);
3042 } else if (!isBegin) {
3043 // The last of more than one alternatives must jump back to the beginning.
3044 m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3045 }
3046 // In the case of a single alternative on its own do nothing - it can fall through.
3047 }
3048
3049 // If there is a backtrack jump from a zero length match link it here.
3050 if (op.m_zeroLengthMatch.isSet())
3051 m_backtrackingState.append(op.m_zeroLengthMatch);
3052
3053 // At this point we've handled the backtracking back into this node.
3054 // Now link any backtracks that need to jump to here.
3055
3056 // For non-simple alternatives, link the alternative's 'return address'
3057 // so that we backtrack back out into the previous alternative.
3058 if (op.m_op == OpNestedAlternativeNext)
3059 m_backtrackingState.append(op.m_returnAddress);
3060
3061 // If there is more than one alternative, then the last alternative will
3062 // have planted a jump to be linked to the end. This jump was added to the
3063 // End node's m_jumps list. If we are back at the beginning, link it here.
3064 if (isBegin) {
3065 YarrOp* endOp = &m_ops[op.m_nextOp];
3066 while (endOp->m_nextOp != notFound) {
3067 ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
3068 endOp = &m_ops[endOp->m_nextOp];
3069 }
3070 ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
3071 m_backtrackingState.append(endOp->m_jumps);
3072 }
3073
3074 if (!isBegin) {
3075 YarrOp& lastOp = m_ops[op.m_previousOp];
3076 m_checkedOffset += lastOp.m_checkAdjust;
3077 }
3078 m_checkedOffset -= op.m_checkAdjust;
3079 break;
3080 }
3081 case OpSimpleNestedAlternativeEnd:
3082 case OpNestedAlternativeEnd: {
3083 PatternTerm* term = op.m_term;
3084
3085 // If there is a backtrack jump from a zero length match link it here.
3086 if (op.m_zeroLengthMatch.isSet())
3087 m_backtrackingState.append(op.m_zeroLengthMatch);
3088
3089 // If we backtrack into the end of a simple subpattern do nothing;
3090 // just continue through into the last alternative. If we backtrack
3091 // into the end of a non-simple set of alterntives we need to jump
3092 // to the backtracking return address set up during generation.
3093 if (op.m_op == OpNestedAlternativeEnd) {
3094 m_backtrackingState.link(this);
3095
3096 // Plant a jump to the return address.
3097 unsigned parenthesesFrameLocation = term->frameLocation;
3098 loadFromFrameAndJump(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
3099
3100 // Link the DataLabelPtr associated with the end of the last
3101 // alternative to this point.
3102 m_backtrackingState.append(op.m_returnAddress);
3103 }
3104
3105 YarrOp& lastOp = m_ops[op.m_previousOp];
3106 m_checkedOffset += lastOp.m_checkAdjust;
3107 break;
3108 }
3109
3110 // OpParenthesesSubpatternOnceBegin/End
3111 //
3112 // When we are backtracking back out of a capturing subpattern we need
3113 // to clear the start index in the matches output array, to record that
3114 // this subpattern has not been captured.
3115 //
3116 // When backtracking back out of a Greedy quantified subpattern we need
3117 // to catch this, and try running the remainder of the alternative after
3118 // the subpattern again, skipping the parentheses.
3119 //
3120 // Upon backtracking back into a quantified set of parentheses we need to
3121 // check whether we were currently skipping the subpattern. If not, we
3122 // can backtrack into them, if we were we need to either backtrack back
3123 // out of the start of the parentheses, or jump back to the forwards
3124 // matching start, depending of whether the match is Greedy or NonGreedy.
3125 case OpParenthesesSubpatternOnceBegin: {
3126 PatternTerm* term = op.m_term;
3127 ASSERT(term->quantityMaxCount == 1);
3128
3129 // We only need to backtrack to this point if capturing or greedy.
3130 if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
3131 m_backtrackingState.link(this);
3132
3133 // If capturing, clear the capture (we only need to reset start).
3134 if (term->capture() && compileMode == IncludeSubpatterns)
3135 clearSubpatternStart(subpattern: term->parentheses.subpatternId);
3136
3137 // If Greedy, jump to the end.
3138 if (term->quantityType == QuantifierGreedy) {
3139 // Clear the flag in the stackframe indicating we ran through the subpattern.
3140 unsigned parenthesesFrameLocation = term->frameLocation;
3141 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
3142 // Jump to after the parentheses, skipping the subpattern.
3143 jump(m_ops[op.m_nextOp].m_reentry);
3144 // A backtrack from after the parentheses, when skipping the subpattern,
3145 // will jump back to here.
3146 op.m_jumps.link(this);
3147 }
3148
3149 m_backtrackingState.fallthrough();
3150 }
3151 break;
3152 }
3153 case OpParenthesesSubpatternOnceEnd: {
3154 PatternTerm* term = op.m_term;
3155
3156 if (term->quantityType != QuantifierFixedCount) {
3157 m_backtrackingState.link(this);
3158
3159 // Check whether we should backtrack back into the parentheses, or if we
3160 // are currently in a state where we had skipped over the subpattern
3161 // (in which case the flag value on the stack will be -1).
3162 unsigned parenthesesFrameLocation = term->frameLocation;
3163 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3164
3165 if (term->quantityType == QuantifierGreedy) {
3166 // For Greedy parentheses, we skip after having already tried going
3167 // through the subpattern, so if we get here we're done.
3168 YarrOp& beginOp = m_ops[op.m_previousOp];
3169 beginOp.m_jumps.append(hadSkipped);
3170 } else {
3171 // For NonGreedy parentheses, we try skipping the subpattern first,
3172 // so if we get here we need to try running through the subpattern
3173 // next. Jump back to the start of the parentheses in the forwards
3174 // matching path.
3175 ASSERT(term->quantityType == QuantifierNonGreedy);
3176 YarrOp& beginOp = m_ops[op.m_previousOp];
3177 hadSkipped.linkTo(label: beginOp.m_reentry, masm: this);
3178 }
3179
3180 m_backtrackingState.fallthrough();
3181 }
3182
3183 m_backtrackingState.append(op.m_jumps);
3184 break;
3185 }
3186
3187 // OpParenthesesSubpatternTerminalBegin/End
3188 //
3189 // Terminal subpatterns will always match - there is nothing after them to
3190 // force a backtrack, and they have a minimum count of 0, and as such will
3191 // always produce an acceptable result.
3192 case OpParenthesesSubpatternTerminalBegin: {
3193 // We will backtrack to this point once the subpattern cannot match any
3194 // more. Since no match is accepted as a successful match (we are Greedy
3195 // quantified with a minimum of zero) jump back to the forwards matching
3196 // path at the end.
3197 YarrOp& endOp = m_ops[op.m_nextOp];
3198 m_backtrackingState.linkTo(endOp.m_reentry, this);
3199 break;
3200 }
3201 case OpParenthesesSubpatternTerminalEnd:
3202 // We should never be backtracking to here (hence the 'terminal' in the name).
3203 ASSERT(m_backtrackingState.isEmpty());
3204 m_backtrackingState.append(op.m_jumps);
3205 break;
3206
3207 // OpParenthesesSubpatternBegin/End
3208 //
3209 // When we are backtracking back out of a capturing subpattern we need
3210 // to clear the start index in the matches output array, to record that
3211 // this subpattern has not been captured.
3212 //
3213 // When backtracking back out of a Greedy quantified subpattern we need
3214 // to catch this, and try running the remainder of the alternative after
3215 // the subpattern again, skipping the parentheses.
3216 //
3217 // Upon backtracking back into a quantified set of parentheses we need to
3218 // check whether we were currently skipping the subpattern. If not, we
3219 // can backtrack into them, if we were we need to either backtrack back
3220 // out of the start of the parentheses, or jump back to the forwards
3221 // matching start, depending of whether the match is Greedy or NonGreedy.
3222 case OpParenthesesSubpatternBegin: {
3223#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3224 PatternTerm* term = op.m_term;
3225 unsigned parenthesesFrameLocation = term->frameLocation;
3226
3227 if (term->quantityType != QuantifierFixedCount) {
3228 m_backtrackingState.link(this);
3229
3230 RegisterID currParenContextReg = regT0;
3231 RegisterID newParenContextReg = regT1;
3232
3233 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), reg: currParenContextReg);
3234
3235 restoreParenContext(parenContextReg: currParenContextReg, tempReg: regT2, firstSubpattern: term->parentheses.subpatternId, lastSubpattern: term->parentheses.lastSubpatternId, subpatternBaseFrameLocation: parenthesesFrameLocation);
3236
3237 freeParenContext(headPtrRegister: currParenContextReg, newHeadPtrRegister: newParenContextReg);
3238 storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
3239
3240 const RegisterID countTemporary = regT0;
3241 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: countTemporary);
3242 Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
3243
3244 sub32(TrustedImm32(1), countTemporary);
3245 storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
3246
3247 jump(m_ops[op.m_nextOp].m_reentry);
3248
3249 zeroLengthMatch.link(masm: this);
3250
3251 // Clear the flag in the stackframe indicating we didn't run through the subpattern.
3252 storeToFrame(TrustedImm32(-1), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
3253
3254 if (term->quantityType == QuantifierGreedy)
3255 jump(m_ops[op.m_nextOp].m_reentry);
3256
3257 // If Greedy, jump to the end.
3258 if (term->quantityType == QuantifierGreedy) {
3259 // A backtrack from after the parentheses, when skipping the subpattern,
3260 // will jump back to here.
3261 op.m_jumps.link(this);
3262 }
3263
3264 m_backtrackingState.fallthrough();
3265 }
3266#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3267 RELEASE_ASSERT_NOT_REACHED();
3268#endif
3269 break;
3270 }
3271 case OpParenthesesSubpatternEnd: {
3272#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3273 PatternTerm* term = op.m_term;
3274
3275 if (term->quantityType != QuantifierFixedCount) {
3276 m_backtrackingState.link(this);
3277
3278 unsigned parenthesesFrameLocation = term->frameLocation;
3279
3280 if (term->quantityType == QuantifierGreedy) {
3281 // Check whether we should backtrack back into the parentheses, or if we
3282 // are currently in a state where we had skipped over the subpattern
3283 // (in which case the flag value on the stack will be -1).
3284 Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32(-1));
3285
3286 // For Greedy parentheses, we skip after having already tried going
3287 // through the subpattern, so if we get here we're done.
3288 YarrOp& beginOp = m_ops[op.m_previousOp];
3289 beginOp.m_jumps.append(hadSkipped);
3290 } else {
3291 // For NonGreedy parentheses, we try skipping the subpattern first,
3292 // so if we get here we need to try running through the subpattern
3293 // next. Jump back to the start of the parentheses in the forwards
3294 // matching path.
3295 ASSERT(term->quantityType == QuantifierNonGreedy);
3296
3297 const RegisterID beginTemporary = regT0;
3298 const RegisterID countTemporary = regT1;
3299
3300 YarrOp& beginOp = m_ops[op.m_previousOp];
3301
3302 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), reg: beginTemporary);
3303 branch32(Equal, beginTemporary, TrustedImm32(-1)).linkTo(beginOp.m_reentry, this);
3304
3305 JumpList exceededMatchLimit;
3306
3307 if (term->quantityMaxCount != quantifyInfinite) {
3308 loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: countTemporary);
3309 exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32(term->quantityMaxCount.unsafeGet())));
3310 }
3311
3312 branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
3313
3314 exceededMatchLimit.link(masm: this);
3315 }
3316
3317 m_backtrackingState.fallthrough();
3318 }
3319
3320 m_backtrackingState.append(op.m_jumps);
3321#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3322 RELEASE_ASSERT_NOT_REACHED();
3323#endif
3324 break;
3325 }
3326
3327 // OpParentheticalAssertionBegin/End
3328 case OpParentheticalAssertionBegin: {
3329 PatternTerm* term = op.m_term;
3330 YarrOp& endOp = m_ops[op.m_nextOp];
3331
3332 // We need to handle the backtracks upon backtracking back out
3333 // of a parenthetical assertion if either we need to correct
3334 // the input index, or the assertion was inverted.
3335 if (op.m_checkAdjust || term->invert()) {
3336 m_backtrackingState.link(this);
3337
3338 if (op.m_checkAdjust)
3339 add32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3340
3341 // In an inverted assertion failure to match the subpattern
3342 // is treated as a successful match - jump to the end of the
3343 // subpattern. We already have adjusted the input position
3344 // back to that before the assertion, which is correct.
3345 if (term->invert())
3346 jump(endOp.m_reentry);
3347
3348 m_backtrackingState.fallthrough();
3349 }
3350
3351 // The End node's jump list will contain any backtracks into
3352 // the end of the assertion. Also, if inverted, we will have
3353 // added the failure caused by a successful match to this.
3354 m_backtrackingState.append(endOp.m_jumps);
3355
3356 m_checkedOffset += op.m_checkAdjust;
3357 break;
3358 }
3359 case OpParentheticalAssertionEnd: {
3360 // FIXME: We should really be clearing any nested subpattern
3361 // matches on bailing out from after the pattern. Firefox has
3362 // this bug too (presumably because they use YARR!)
3363
3364 // Never backtrack into an assertion; later failures bail to before the begin.
3365 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
3366
3367 YarrOp& lastOp = m_ops[op.m_previousOp];
3368 m_checkedOffset -= lastOp.m_checkAdjust;
3369 break;
3370 }
3371
3372 case OpMatchFailed:
3373 break;
3374 }
3375
3376 } while (opIndex);
3377 }
3378
3379 // Compilation methods:
3380 // ====================
3381
3382 // opCompileParenthesesSubpattern
3383 // Emits ops for a subpattern (set of parentheses). These consist
3384 // of a set of alternatives wrapped in an outer set of nodes for
3385 // the parentheses.
3386 // Supported types of parentheses are 'Once' (quantityMaxCount == 1),
3387 // 'Terminal' (non-capturing parentheses quantified as greedy
3388 // and infinite), and 0 based greedy / non-greedy quantified parentheses.
3389 // Alternatives will use the 'Simple' set of ops if either the
3390 // subpattern is terminal (in which case we will never need to
3391 // backtrack), or if the subpattern only contains one alternative.
3392 void opCompileParenthesesSubpattern(PatternTerm* term)
3393 {
3394 YarrOpCode parenthesesBeginOpCode;
3395 YarrOpCode parenthesesEndOpCode;
3396 YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
3397 YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
3398 YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
3399
3400 // We can currently only compile quantity 1 subpatterns that are
3401 // not copies. We generate a copy in the case of a range quantifier,
3402 // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
3403 // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
3404 // comes where the subpattern is capturing, in which case we would
3405 // need to restore the capture from the first subpattern upon a
3406 // failure in the second.
3407 if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
3408 m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
3409 return;
3410 }
3411
3412 if (term->quantityMaxCount == 1 && !term->parentheses.isCopy) {
3413 // Select the 'Once' nodes.
3414 parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
3415 parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
3416
3417 // If there is more than one alternative we cannot use the 'simple' nodes.
3418 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3419 alternativeBeginOpCode = OpNestedAlternativeBegin;
3420 alternativeNextOpCode = OpNestedAlternativeNext;
3421 alternativeEndOpCode = OpNestedAlternativeEnd;
3422 }
3423 } else if (term->parentheses.isTerminal) {
3424 // Select the 'Terminal' nodes.
3425 parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
3426 parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
3427 } else {
3428#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3429 // We only handle generic parenthesis with non-fixed counts.
3430 if (term->quantityType == QuantifierFixedCount) {
3431 // This subpattern is not supported by the JIT.
3432 m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
3433 return;
3434 }
3435
3436 m_containsNestedSubpatterns = true;
3437
3438 // Select the 'Generic' nodes.
3439 parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
3440 parenthesesEndOpCode = OpParenthesesSubpatternEnd;
3441
3442 // If there is more than one alternative we cannot use the 'simple' nodes.
3443 if (term->parentheses.disjunction->m_alternatives.size() != 1) {
3444 alternativeBeginOpCode = OpNestedAlternativeBegin;
3445 alternativeNextOpCode = OpNestedAlternativeNext;
3446 alternativeEndOpCode = OpNestedAlternativeEnd;
3447 }
3448#else
3449 // This subpattern is not supported by the JIT.
3450 m_failureReason = JITFailureReason::ParenthesizedSubpattern;
3451 return;
3452#endif
3453 }
3454
3455 size_t parenBegin = m_ops.size();
3456 m_ops.append(parenthesesBeginOpCode);
3457
3458 m_ops.append(alternativeBeginOpCode);
3459 m_ops.last().m_previousOp = notFound;
3460 m_ops.last().m_term = term;
3461 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3462 for (unsigned i = 0; i < alternatives.size(); ++i) {
3463 size_t lastOpIndex = m_ops.size() - 1;
3464
3465 PatternAlternative* nestedAlternative = alternatives[i].get();
3466 opCompileAlternative(alternative: nestedAlternative);
3467
3468 size_t thisOpIndex = m_ops.size();
3469 m_ops.append(YarrOp(alternativeNextOpCode));
3470
3471 YarrOp& lastOp = m_ops[lastOpIndex];
3472 YarrOp& thisOp = m_ops[thisOpIndex];
3473
3474 lastOp.m_alternative = nestedAlternative;
3475 lastOp.m_nextOp = thisOpIndex;
3476 thisOp.m_previousOp = lastOpIndex;
3477 thisOp.m_term = term;
3478 }
3479 YarrOp& lastOp = m_ops.last();
3480 ASSERT(lastOp.m_op == alternativeNextOpCode);
3481 lastOp.m_op = alternativeEndOpCode;
3482 lastOp.m_alternative = 0;
3483 lastOp.m_nextOp = notFound;
3484
3485 size_t parenEnd = m_ops.size();
3486 m_ops.append(parenthesesEndOpCode);
3487
3488 m_ops[parenBegin].m_term = term;
3489 m_ops[parenBegin].m_previousOp = notFound;
3490 m_ops[parenBegin].m_nextOp = parenEnd;
3491 m_ops[parenEnd].m_term = term;
3492 m_ops[parenEnd].m_previousOp = parenBegin;
3493 m_ops[parenEnd].m_nextOp = notFound;
3494 }
3495
3496 // opCompileParentheticalAssertion
3497 // Emits ops for a parenthetical assertion. These consist of an
3498 // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
3499 // the alternatives, with these wrapped by an outer pair of
3500 // OpParentheticalAssertionBegin/End nodes.
3501 // We can always use the OpSimpleNestedAlternative nodes in the
3502 // case of parenthetical assertions since these only ever match
3503 // once, and will never backtrack back into the assertion.
3504 void opCompileParentheticalAssertion(PatternTerm* term)
3505 {
3506 size_t parenBegin = m_ops.size();
3507 m_ops.append(OpParentheticalAssertionBegin);
3508
3509 m_ops.append(OpSimpleNestedAlternativeBegin);
3510 m_ops.last().m_previousOp = notFound;
3511 m_ops.last().m_term = term;
3512 Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3513 for (unsigned i = 0; i < alternatives.size(); ++i) {
3514 size_t lastOpIndex = m_ops.size() - 1;
3515
3516 PatternAlternative* nestedAlternative = alternatives[i].get();
3517 opCompileAlternative(alternative: nestedAlternative);
3518
3519 size_t thisOpIndex = m_ops.size();
3520 m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
3521
3522 YarrOp& lastOp = m_ops[lastOpIndex];
3523 YarrOp& thisOp = m_ops[thisOpIndex];
3524
3525 lastOp.m_alternative = nestedAlternative;
3526 lastOp.m_nextOp = thisOpIndex;
3527 thisOp.m_previousOp = lastOpIndex;
3528 thisOp.m_term = term;
3529 }
3530 YarrOp& lastOp = m_ops.last();
3531 ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
3532 lastOp.m_op = OpSimpleNestedAlternativeEnd;
3533 lastOp.m_alternative = 0;
3534 lastOp.m_nextOp = notFound;
3535
3536 size_t parenEnd = m_ops.size();
3537 m_ops.append(OpParentheticalAssertionEnd);
3538
3539 m_ops[parenBegin].m_term = term;
3540 m_ops[parenBegin].m_previousOp = notFound;
3541 m_ops[parenBegin].m_nextOp = parenEnd;
3542 m_ops[parenEnd].m_term = term;
3543 m_ops[parenEnd].m_previousOp = parenBegin;
3544 m_ops[parenEnd].m_nextOp = notFound;
3545 }
3546
3547 // opCompileAlternative
3548 // Called to emit nodes for all terms in an alternative.
3549 void opCompileAlternative(PatternAlternative* alternative)
3550 {
3551 optimizeAlternative(alternative);
3552
3553 for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
3554 PatternTerm* term = &alternative->m_terms[i];
3555
3556 switch (term->type) {
3557 case PatternTerm::TypeParenthesesSubpattern:
3558 opCompileParenthesesSubpattern(term);
3559 break;
3560
3561 case PatternTerm::TypeParentheticalAssertion:
3562 opCompileParentheticalAssertion(term);
3563 break;
3564
3565 default:
3566 m_ops.append(term);
3567 }
3568 }
3569 }
3570
3571 // opCompileBody
3572 // This method compiles the body disjunction of the regular expression.
3573 // The body consists of two sets of alternatives - zero or more 'once
3574 // through' (BOL anchored) alternatives, followed by zero or more
3575 // repeated alternatives.
3576 // For each of these two sets of alteratives, if not empty they will be
3577 // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
3578 // 'begin' node referencing the first alternative, and 'next' nodes
3579 // referencing any further alternatives. The begin/next/end nodes are
3580 // linked together in a doubly linked list. In the case of repeating
3581 // alternatives, the end node is also linked back to the beginning.
3582 // If no repeating alternatives exist, then a OpMatchFailed node exists
3583 // to return the failing result.
3584 void opCompileBody(PatternDisjunction* disjunction)
3585 {
3586 Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
3587 size_t currentAlternativeIndex = 0;
3588
3589 // Emit the 'once through' alternatives.
3590 if (alternatives.size() && alternatives[0]->onceThrough()) {
3591 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3592 m_ops.last().m_previousOp = notFound;
3593
3594 do {
3595 size_t lastOpIndex = m_ops.size() - 1;
3596 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3597 opCompileAlternative(alternative);
3598
3599 size_t thisOpIndex = m_ops.size();
3600 m_ops.append(YarrOp(OpBodyAlternativeNext));
3601
3602 YarrOp& lastOp = m_ops[lastOpIndex];
3603 YarrOp& thisOp = m_ops[thisOpIndex];
3604
3605 lastOp.m_alternative = alternative;
3606 lastOp.m_nextOp = thisOpIndex;
3607 thisOp.m_previousOp = lastOpIndex;
3608
3609 ++currentAlternativeIndex;
3610 } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
3611
3612 YarrOp& lastOp = m_ops.last();
3613
3614 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3615 lastOp.m_op = OpBodyAlternativeEnd;
3616 lastOp.m_alternative = 0;
3617 lastOp.m_nextOp = notFound;
3618 }
3619
3620 if (currentAlternativeIndex == alternatives.size()) {
3621 m_ops.append(YarrOp(OpMatchFailed));
3622 return;
3623 }
3624
3625 // Emit the repeated alternatives.
3626 size_t repeatLoop = m_ops.size();
3627 m_ops.append(YarrOp(OpBodyAlternativeBegin));
3628 m_ops.last().m_previousOp = notFound;
3629 do {
3630 size_t lastOpIndex = m_ops.size() - 1;
3631 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
3632 ASSERT(!alternative->onceThrough());
3633 opCompileAlternative(alternative);
3634
3635 size_t thisOpIndex = m_ops.size();
3636 m_ops.append(YarrOp(OpBodyAlternativeNext));
3637
3638 YarrOp& lastOp = m_ops[lastOpIndex];
3639 YarrOp& thisOp = m_ops[thisOpIndex];
3640
3641 lastOp.m_alternative = alternative;
3642 lastOp.m_nextOp = thisOpIndex;
3643 thisOp.m_previousOp = lastOpIndex;
3644
3645 ++currentAlternativeIndex;
3646 } while (currentAlternativeIndex < alternatives.size());
3647 YarrOp& lastOp = m_ops.last();
3648 ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3649 lastOp.m_op = OpBodyAlternativeEnd;
3650 lastOp.m_alternative = 0;
3651 lastOp.m_nextOp = repeatLoop;
3652 }
3653
3654 void generateTryReadUnicodeCharacterHelper()
3655 {
3656#ifdef JIT_UNICODE_EXPRESSIONS
3657 if (m_tryReadUnicodeCharacterCalls.isEmpty())
3658 return;
3659
3660 ASSERT(m_decodeSurrogatePairs);
3661
3662 m_tryReadUnicodeCharacterEntry = label();
3663
3664 tryReadUnicodeCharImpl(resultReg: regT0);
3665
3666 ret();
3667#endif
3668 }
3669
3670 void generateEnter()
3671 {
3672#if CPU(X86_64)
3673 push(X86Registers::ebp);
3674 move(stackPointerRegister, X86Registers::ebp);
3675
3676 if (m_pattern.m_saveInitialStartValue)
3677 push(X86Registers::ebx);
3678
3679#if OS(WINDOWS)
3680 push(X86Registers::edi);
3681#endif
3682#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3683 if (m_containsNestedSubpatterns) {
3684#if OS(WINDOWS)
3685 push(X86Registers::esi);
3686#endif
3687 push(X86Registers::r12);
3688 }
3689#endif
3690
3691 if (m_decodeSurrogatePairs) {
3692 push(X86Registers::r13);
3693 push(X86Registers::r14);
3694 push(X86Registers::r15);
3695
3696 move(TrustedImm32(0xd800), leadingSurrogateTag);
3697 move(TrustedImm32(0xdc00), trailingSurrogateTag);
3698 }
3699 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3700 zeroExtend32ToPtr(src: index, dest: index);
3701 zeroExtend32ToPtr(src: length, dest: length);
3702#if OS(WINDOWS)
3703 if (compileMode == IncludeSubpatterns)
3704 loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
3705 // rcx is the pointer to the allocated space for result in x64 Windows.
3706 push(X86Registers::ecx);
3707#endif
3708#elif CPU(X86)
3709 push(X86Registers::ebp);
3710 move(stackPointerRegister, X86Registers::ebp);
3711 // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
3712 push(X86Registers::ebx);
3713 push(X86Registers::edi);
3714 push(X86Registers::esi);
3715 // load output into edi (2 = saved ebp + return address).
3716 #if COMPILER(MSVC)
3717 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
3718 loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
3719 loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
3720 if (compileMode == IncludeSubpatterns)
3721 loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
3722 #else
3723 if (compileMode == IncludeSubpatterns)
3724 loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
3725 #endif
3726#elif CPU(ARM64)
3727 if (m_decodeSurrogatePairs) {
3728 pushPair(framePointerRegister, linkRegister);
3729 move(TrustedImm32(0x10000), supplementaryPlanesBase);
3730 move(TrustedImm32(0xfffffc00), surrogateTagMask);
3731 move(TrustedImm32(0xd800), leadingSurrogateTag);
3732 move(TrustedImm32(0xdc00), trailingSurrogateTag);
3733 }
3734
3735 // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3736 zeroExtend32ToPtr(index, index);
3737 zeroExtend32ToPtr(length, length);
3738#elif CPU(ARM_THUMB2)
3739 push(ARMRegisters::r4);
3740 push(ARMRegisters::r5);
3741 push(ARMRegisters::r6);
3742 push(ARMRegisters::r8);
3743#elif CPU(MIPS)
3744 // Do nothing.
3745#endif
3746
3747 store8(TrustedImm32(1), &m_vm->isExecutingInRegExpJIT);
3748 }
3749
3750 void generateReturn()
3751 {
3752 store8(TrustedImm32(0), &m_vm->isExecutingInRegExpJIT);
3753
3754#if CPU(X86_64)
3755#if OS(WINDOWS)
3756 // Store the return value in the allocated space pointed by rcx.
3757 pop(X86Registers::ecx);
3758 store64(returnRegister, Address(X86Registers::ecx));
3759 store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
3760 move(X86Registers::ecx, returnRegister);
3761#endif
3762 if (m_decodeSurrogatePairs) {
3763 pop(X86Registers::r15);
3764 pop(X86Registers::r14);
3765 pop(X86Registers::r13);
3766 }
3767
3768#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3769 if (m_containsNestedSubpatterns) {
3770 pop(X86Registers::r12);
3771#if OS(WINDOWS)
3772 pop(X86Registers::esi);
3773#endif
3774 }
3775#endif
3776#if OS(WINDOWS)
3777 pop(X86Registers::edi);
3778#endif
3779
3780 if (m_pattern.m_saveInitialStartValue)
3781 pop(X86Registers::ebx);
3782 pop(X86Registers::ebp);
3783#elif CPU(X86)
3784 pop(X86Registers::esi);
3785 pop(X86Registers::edi);
3786 pop(X86Registers::ebx);
3787 pop(X86Registers::ebp);
3788#elif CPU(ARM64)
3789 if (m_decodeSurrogatePairs)
3790 popPair(framePointerRegister, linkRegister);
3791#elif CPU(ARM_THUMB2)
3792 pop(ARMRegisters::r8);
3793 pop(ARMRegisters::r6);
3794 pop(ARMRegisters::r5);
3795 pop(ARMRegisters::r4);
3796#elif CPU(MIPS)
3797 // Do nothing
3798#endif
3799 ret();
3800 }
3801
3802public:
3803 YarrGenerator(VM* vm, YarrPattern& pattern, YarrCodeBlock& codeBlock, YarrCharSize charSize)
3804 : m_vm(vm)
3805 , m_pattern(pattern)
3806 , m_codeBlock(codeBlock)
3807 , m_charSize(charSize)
3808 , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
3809 , m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
3810 , m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
3811#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3812 , m_containsNestedSubpatterns(false)
3813 , m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : 0, m_pattern.m_body->m_callFrameSize)
3814#endif
3815 {
3816 }
3817
3818 void compile()
3819 {
3820 YarrCodeBlock& codeBlock = m_codeBlock;
3821
3822#ifndef JIT_UNICODE_EXPRESSIONS
3823 if (m_decodeSurrogatePairs) {
3824 codeBlock.setFallBackWithFailureReason(JITFailureReason::DecodeSurrogatePair);
3825 return;
3826 }
3827#endif
3828
3829 if (m_pattern.m_containsBackreferences
3830#if ENABLE(YARR_JIT_BACKREFERENCES)
3831 && (compileMode == MatchOnly || (m_pattern.ignoreCase() && m_charSize != Char8))
3832#endif
3833 ) {
3834 codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
3835 return;
3836 }
3837
3838 // We need to compile before generating code since we set flags based on compilation that
3839 // are used during generation.
3840 opCompileBody(disjunction: m_pattern.m_body);
3841
3842 if (m_failureReason) {
3843 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3844 return;
3845 }
3846
3847#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3848 if (m_containsNestedSubpatterns)
3849 codeBlock.setUsesPatternContextBuffer();
3850#endif
3851
3852 generateEnter();
3853
3854 Jump hasInput = checkInput();
3855 generateFailReturn();
3856 hasInput.link(masm: this);
3857
3858#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3859 if (m_containsNestedSubpatterns)
3860 move(TrustedImm32(matchLimit), remainingMatchCount);
3861#endif
3862
3863 if (compileMode == IncludeSubpatterns) {
3864 for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
3865 store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
3866 }
3867
3868 if (!m_pattern.m_body->m_hasFixedSize)
3869 setMatchStart(index);
3870
3871 initCallFrame();
3872
3873#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3874 if (m_containsNestedSubpatterns)
3875 initParenContextFreeList();
3876#endif
3877
3878 if (m_pattern.m_saveInitialStartValue) {
3879#ifdef HAVE_INITIAL_START_REG
3880 move(index, initialStart);
3881#else
3882 storeToFrame(index, m_pattern.m_initialStartValueFrameLocation);
3883#endif
3884 }
3885
3886 generate();
3887 backtrack();
3888
3889 generateTryReadUnicodeCharacterHelper();
3890
3891 generateJITFailReturn();
3892
3893 JSGlobalData data(m_vm->regExpAllocator);
3894 DefaultLinkBuffer linkBuffer(data, this, REGEXP_CODE_ID, JITCompilationCanFail);
3895 if (linkBuffer.didFailToAllocate()) {
3896 codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure);
3897 return;
3898 }
3899
3900 if (!m_tryReadUnicodeCharacterCalls.isEmpty()) {
3901 CodeLocationLabel tryReadUnicodeCharacterHelper = linkBuffer.locationOf(label: m_tryReadUnicodeCharacterEntry);
3902
3903 for (auto call : m_tryReadUnicodeCharacterCalls)
3904 linkBuffer.link(call, function: tryReadUnicodeCharacterHelper);
3905 }
3906
3907 m_backtrackingState.linkDataLabels(linkBuffer);
3908
3909 CodeRef codeRef;
3910 if (compileMode == MatchOnly) {
3911 if (m_charSize == Char8) {
3912 codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT",
3913 "Match-only 8-bit regular expression");
3914 codeBlock.set8BitCodeMatchOnly(codeRef);
3915 } else {
3916 codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT",
3917 "Match-only 16-bit regular expression");
3918 codeBlock.set16BitCodeMatchOnly(codeRef);
3919 }
3920 } else {
3921 if (m_charSize == Char8) {
3922 codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT", "8-bit regular expression");
3923 codeBlock.set8BitCode(codeRef);
3924 } else {
3925 codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT", "16-bit regular expression");
3926 codeBlock.set16BitCode(codeRef);
3927 }
3928 }
3929 QV4::generateFunctionTable(function: nullptr, codeRef: &codeRef);
3930
3931 if (Q_UNLIKELY(!linkBuffer.makeExecutable()))
3932 m_failureReason = JITFailureReason::ExecutableMemoryAllocationFailure;
3933
3934 if (m_failureReason)
3935 codeBlock.setFallBackWithFailureReason(*m_failureReason);
3936 }
3937
3938private:
3939 VM* m_vm;
3940
3941 YarrPattern& m_pattern;
3942
3943 YarrCodeBlock& m_codeBlock;
3944 YarrCharSize m_charSize;
3945
3946 // Used to detect regular expression constructs that are not currently
3947 // supported in the JIT; fall back to the interpreter when this is detected.
3948 std::optional<JITFailureReason> m_failureReason;
3949
3950 bool m_decodeSurrogatePairs;
3951 bool m_unicodeIgnoreCase;
3952 CanonicalMode m_canonicalMode;
3953#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3954 bool m_containsNestedSubpatterns;
3955 ParenContextSizes m_parenContextSizes;
3956#endif
3957 JumpList m_abortExecution;
3958 JumpList m_hitMatchLimit;
3959 Vector<Call> m_tryReadUnicodeCharacterCalls;
3960 Label m_tryReadUnicodeCharacterEntry;
3961
3962 // The regular expression expressed as a linear sequence of operations.
3963 Vector<YarrOp, 128> m_ops;
3964
3965 // This records the current input offset being applied due to the current
3966 // set of alternatives we are nested within. E.g. when matching the
3967 // character 'b' within the regular expression /abc/, we will know that
3968 // the minimum size for the alternative is 3, checked upon entry to the
3969 // alternative, and that 'b' is at offset 1 from the start, and as such
3970 // when matching 'b' we need to apply an offset of -2 to the load.
3971 //
3972 // FIXME: This should go away. Rather than tracking this value throughout
3973 // code generation, we should gather this information up front & store it
3974 // on the YarrOp structure.
3975 Checked<unsigned> m_checkedOffset;
3976
3977 // This class records state whilst generating the backtracking path of code.
3978 BacktrackingState m_backtrackingState;
3979};
3980
3981void YarrCodeBlock::replaceCodeRef(MacroAssemblerCodeRef &target,
3982 const MacroAssemblerCodeRef &source)
3983{
3984 if (!!target && target.code().executableAddress() != source.code().executableAddress())
3985 QV4::destroyFunctionTable(function: nullptr, codeRef: &target);
3986
3987 target = source;
3988}
3989
3990static void dumpCompileFailure(JITFailureReason failure)
3991{
3992 switch (failure) {
3993 case JITFailureReason::DecodeSurrogatePair:
3994 dataLog(value: "Can't JIT a pattern decoding surrogate pairs\n");
3995 break;
3996 case JITFailureReason::BackReference:
3997 dataLog(value: "Can't JIT some patterns containing back references\n");
3998 break;
3999 case JITFailureReason::ForwardReference:
4000 dataLog(value: "Can't JIT a pattern containing forward references\n");
4001 break;
4002 case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
4003 dataLog(value: "Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
4004 break;
4005 case JITFailureReason::ParenthesizedSubpattern:
4006 dataLog(value: "Can't JIT a pattern containing parenthesized subpatterns\n");
4007 break;
4008 case JITFailureReason::FixedCountParenthesizedSubpattern:
4009 dataLog(value: "Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
4010 break;
4011 case JITFailureReason::ExecutableMemoryAllocationFailure:
4012 dataLog(value: "Can't JIT because of failure of allocation of executable memory\n");
4013 break;
4014 }
4015}
4016
4017void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
4018{
4019 if (mode == MatchOnly)
4020 YarrGenerator<MatchOnly>(vm, pattern, codeBlock, charSize).compile();
4021 else
4022 YarrGenerator<IncludeSubpatterns>(vm, pattern, codeBlock, charSize).compile();
4023
4024 if (auto failureReason = codeBlock.failureReason()) {
4025 if (Options::dumpCompiledRegExpPatterns())
4026 dumpCompileFailure(failure: *failureReason);
4027 }
4028}
4029
4030}}
4031
4032#endif
4033

source code of qtdeclarative/src/3rdparty/masm/yarr/YarrJIT.cpp