YarrJIT.cpp source code [qtdeclarative/src/3rdparty/masm/yarr/YarrJIT.cpp]

1	/*
2	* Copyright (C) 2009-2018 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#include "config.h"
27	#include "YarrJIT.h"
28
29	#include <wtf/ASCIICType.h>
30	#include "LinkBuffer.h"
31	#include "Options.h"
32	#include "VM.h"
33	#include "Yarr.h"
34	#include "YarrCanonicalize.h"
35
36	#include <private/qv4functiontable_p.h>
37
38	#if ENABLE(YARR_JIT)
39
40	namespace JSC { namespace Yarr {
41
42	template<YarrJITCompileMode compileMode>
43	class YarrGenerator : private DefaultMacroAssembler {
44
45	#if CPU(ARM_THUMB2)
46	static const RegisterID input = ARMRegisters::r0;
47	static const RegisterID index = ARMRegisters::r1;
48	static const RegisterID length = ARMRegisters::r2;
49	static const RegisterID output = ARMRegisters::r3;
50
51	static const RegisterID regT0 = ARMRegisters::r4;
52	static const RegisterID regT1 = ARMRegisters::r5;
53	static const RegisterID initialStart = ARMRegisters::r8;
54
55	static const RegisterID returnRegister = ARMRegisters::r0;
56	static const RegisterID returnRegister2 = ARMRegisters::r1;
57
58	#define HAVE_INITIAL_START_REG
59	#elif CPU(ARM64)
60	// Argument registers
61	static const RegisterID input = ARM64Registers::x0;
62	static const RegisterID index = ARM64Registers::x1;
63	static const RegisterID length = ARM64Registers::x2;
64	static const RegisterID output = ARM64Registers::x3;
65	static const RegisterID freelistRegister = ARM64Registers::x4;
66	static const RegisterID freelistSizeRegister = ARM64Registers::x5;
67
68	// Scratch registers
69	static const RegisterID regT0 = ARM64Registers::x6;
70	static const RegisterID regT1 = ARM64Registers::x7;
71	static const RegisterID regT2 = ARM64Registers::x8;
72	static const RegisterID remainingMatchCount = ARM64Registers::x9;
73	static const RegisterID regUnicodeInputAndTrail = ARM64Registers::x10;
74	static const RegisterID initialStart = ARM64Registers::x11;
75	static const RegisterID supplementaryPlanesBase = ARM64Registers::x12;
76	static const RegisterID surrogateTagMask = ARM64Registers::x13;
77	static const RegisterID leadingSurrogateTag = ARM64Registers::x14;
78	static const RegisterID trailingSurrogateTag = ARM64Registers::x15;
79
80	static const RegisterID returnRegister = ARM64Registers::x0;
81	static const RegisterID returnRegister2 = ARM64Registers::x1;
82
83	#define HAVE_INITIAL_START_REG
84	#define JIT_UNICODE_EXPRESSIONS
85	#elif CPU(MIPS)
86	static const RegisterID input = MIPSRegisters::a0;
87	static const RegisterID index = MIPSRegisters::a1;
88	static const RegisterID length = MIPSRegisters::a2;
89	static const RegisterID output = MIPSRegisters::a3;
90
91	static const RegisterID regT0 = MIPSRegisters::t4;
92	static const RegisterID regT1 = MIPSRegisters::t5;
93	static const RegisterID initialStart = MIPSRegisters::t6;
94
95	static const RegisterID returnRegister = MIPSRegisters::v0;
96	static const RegisterID returnRegister2 = MIPSRegisters::v1;
97
98	#define HAVE_INITIAL_START_REG
99	#elif CPU(X86)
100	static const RegisterID input = X86Registers::eax;
101	static const RegisterID index = X86Registers::edx;
102	static const RegisterID length = X86Registers::ecx;
103	static const RegisterID output = X86Registers::edi;
104
105	static const RegisterID regT0 = X86Registers::ebx;
106	static const RegisterID regT1 = X86Registers::esi;
107
108	static const RegisterID returnRegister = X86Registers::eax;
109	static const RegisterID returnRegister2 = X86Registers::edx;
110	#elif CPU(X86_64)
111	#if !OS(WINDOWS)
112	// Argument registers
113	static const RegisterID input = X86Registers::edi;
114	static const RegisterID index = X86Registers::esi;
115	static const RegisterID length = X86Registers::edx;
116	static const RegisterID output = X86Registers::ecx;
117	static const RegisterID freelistRegister = X86Registers::r8;
118	static const RegisterID freelistSizeRegister = X86Registers::r9; // Only used during initialization.
119	#else
120	// If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
121	// http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
122	COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
123	static const RegisterID input = X86Registers::edx;
124	static const RegisterID index = X86Registers::r8;
125	static const RegisterID length = X86Registers::r9;
126	static const RegisterID output = X86Registers::r10;
127	#endif
128
129	// Scratch registers
130	static const RegisterID regT0 = X86Registers::eax;
131	#if !OS(WINDOWS)
132	static const RegisterID regT1 = X86Registers::r9;
133	static const RegisterID regT2 = X86Registers::r10;
134	#else
135	static const RegisterID regT1 = X86Registers::ecx;
136	static const RegisterID regT2 = X86Registers::edi;
137	#endif
138
139	static const RegisterID initialStart = X86Registers::ebx;
140	#if !OS(WINDOWS)
141	static const RegisterID remainingMatchCount = X86Registers::r12;
142	#else
143	static const RegisterID remainingMatchCount = X86Registers::esi;
144	#endif
145	static const RegisterID regUnicodeInputAndTrail = X86Registers::r13;
146	static const RegisterID leadingSurrogateTag = X86Registers::r14;
147	static const RegisterID trailingSurrogateTag = X86Registers::r15;
148
149	static const RegisterID returnRegister = X86Registers::eax;
150	static const RegisterID returnRegister2 = X86Registers::edx;
151
152	const TrustedImm32 supplementaryPlanesBase = TrustedImm32 (`0x10000`);
153	const TrustedImm32 surrogateTagMask = TrustedImm32 (`0xfffffc00`);
154	#define HAVE_INITIAL_START_REG
155	#define JIT_UNICODE_EXPRESSIONS
156	#endif
157
158	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
159	struct ParenContextSizes {
160	size_t m_numSubpatterns;
161	size_t m_frameSlots;
162
163	ParenContextSizes(size_t numSubpatterns, size_t frameSlots)
164	: m_numSubpatterns(numSubpatterns)
165	, m_frameSlots(frameSlots)
166	{
167	}
168
169	size_t numSubpatterns() { return m_numSubpatterns; }
170
171	size_t frameSlots() { return m_frameSlots; }
172	};
173
174	struct ParenContext {
175	struct ParenContext* next;
176	uint32_t begin;
177	uint32_t matchAmount;
178	uintptr_t returnAddress;
179	#if OS(INTEGRITY)
180	union {
181	struct Subpatterns {
182	unsigned start;
183	unsigned end;
184	} subpatterns[`1`];
185	uintptr_t frameSlots[`1`];
186	};
187	#else
188	struct Subpatterns {
189	unsigned start;
190	unsigned end;
191	} subpatterns[`0`];
192	uintptr_t frameSlots[`0`];
193	#endif
194
195	static size_t sizeFor(ParenContextSizes& parenContextSizes)
196	{
197	return sizeof(ParenContext) + sizeof(Subpatterns) * parenContextSizes.numSubpatterns() + sizeof(uintptr_t) * parenContextSizes.frameSlots();
198	}
199
200	static ptrdiff_t nextOffset()
201	{
202	return offsetof(ParenContext, next);
203	}
204
205	static ptrdiff_t beginOffset()
206	{
207	return offsetof(ParenContext, begin);
208	}
209
210	static ptrdiff_t matchAmountOffset()
211	{
212	return offsetof(ParenContext, matchAmount);
213	}
214
215	static ptrdiff_t returnAddressOffset()
216	{
217	return offsetof(ParenContext, returnAddress);
218	}
219
220	static ptrdiff_t subpatternOffset(size_t subpattern)
221	{
222	return offsetof(ParenContext, subpatterns) + (subpattern - `1`) * sizeof(Subpatterns);
223	}
224
225	static ptrdiff_t savedFrameOffset(ParenContextSizes& parenContextSizes)
226	{
227	return offsetof(ParenContext, subpatterns) + (parenContextSizes.numSubpatterns()) * sizeof(Subpatterns);
228	}
229	};
230
231	void initParenContextFreeList()
232	{
233	RegisterID parenContextPointer = regT0;
234	RegisterID nextParenContextPointer = regT2;
235
236	size_t parenContextSize = ParenContext::sizeFor(m_parenContextSizes);
237
238	parenContextSize = WTF::roundUpToMultipleOf<sizeof(uintptr_t)>(x: parenContextSize);
239
240	// Check that the paren context is a reasonable size.
241	if (parenContextSize > INT16_MAX)
242	m_abortExecution.append(jump());
243
244	Jump emptyFreeList = branchTestPtr(Zero, freelistRegister);
245	move(freelistRegister, parenContextPointer);
246	addPtr(TrustedImm32 (parenContextSize), freelistRegister, nextParenContextPointer);
247	addPtr(freelistRegister, freelistSizeRegister);
248	subPtr(TrustedImm32 (parenContextSize), freelistSizeRegister);
249
250	Label loopTop(this);
251	Jump initDone = branchPtr(Above, nextParenContextPointer, freelistSizeRegister);
252	storePtr(nextParenContextPointer, Address(parenContextPointer, ParenContext::nextOffset()));
253	move(nextParenContextPointer, parenContextPointer);
254	addPtr(TrustedImm32 (parenContextSize), parenContextPointer, nextParenContextPointer);
255	jump(loopTop);
256
257	initDone.link(masm: this);
258	storePtr(TrustedImmPtr (nullptr), Address(parenContextPointer, ParenContext::nextOffset()));
259	emptyFreeList.link(masm: this);
260	}
261
262	void allocateParenContext(RegisterID result)
263	{
264	m_abortExecution.append(branchTestPtr(Zero, freelistRegister));
265	sub32(TrustedImm32 (`1`), remainingMatchCount);
266	m_hitMatchLimit.append(branchTestPtr(Zero, remainingMatchCount));
267	move(freelistRegister, result);
268	loadPtr(Address(freelistRegister, ParenContext::nextOffset()), freelistRegister);
269	}
270
271	void freeParenContext(RegisterID headPtrRegister, RegisterID newHeadPtrRegister)
272	{
273	loadPtr(Address(headPtrRegister, ParenContext::nextOffset()), newHeadPtrRegister);
274	storePtr(freelistRegister, Address(headPtrRegister, ParenContext::nextOffset()));
275	move(headPtrRegister, freelistRegister);
276	}
277
278	void saveParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
279	{
280	store32(index, Address(parenContextReg, ParenContext::beginOffset()));
281	loadFromFrame(frameLocation: subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: tempReg);
282	store32(tempReg, Address(parenContextReg, ParenContext::matchAmountOffset()));
283	loadFromFrame(frameLocation: subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex(), reg: tempReg);
284	storePtr(tempReg, Address(parenContextReg, ParenContext::returnAddressOffset()));
285	if (compileMode == IncludeSubpatterns) {
286	for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
287	loadPtr(Address (output, (subpattern << `1`) * sizeof(unsigned)), tempReg);
288	storePtr(tempReg, Address(parenContextReg, ParenContext::subpatternOffset(subpattern)));
289	clearSubpatternStart(subpattern);
290	}
291	}
292	subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
293	for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
294	loadFromFrame(frameLocation, reg: tempReg);
295	storePtr(tempReg, Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)));
296	}
297	}
298
299	void restoreParenContext(RegisterID parenContextReg, RegisterID tempReg, unsigned firstSubpattern, unsigned lastSubpattern, unsigned subpatternBaseFrameLocation)
300	{
301	load32(Address(parenContextReg, ParenContext::beginOffset()), index);
302	storeToFrame(index, subpatternBaseFrameLocation + BackTrackInfoParentheses::beginIndex());
303	load32(Address(parenContextReg, ParenContext::matchAmountOffset()), tempReg);
304	storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
305	loadPtr(Address(parenContextReg, ParenContext::returnAddressOffset()), tempReg);
306	storeToFrame(tempReg, subpatternBaseFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
307	if (compileMode == IncludeSubpatterns) {
308	for (unsigned subpattern = firstSubpattern; subpattern <= lastSubpattern; subpattern++) {
309	loadPtr(Address(parenContextReg, ParenContext::subpatternOffset(subpattern)), tempReg);
310	storePtr(tempReg, Address (output, (subpattern << `1`) * sizeof(unsigned)));
311	}
312	}
313	subpatternBaseFrameLocation += YarrStackSpaceForBackTrackInfoParentheses;
314	for (unsigned frameLocation = subpatternBaseFrameLocation; frameLocation < m_parenContextSizes.frameSlots(); frameLocation++) {
315	loadPtr(Address(parenContextReg, ParenContext::savedFrameOffset(m_parenContextSizes) + frameLocation * sizeof(uintptr_t)), tempReg);
316	storeToFrame(tempReg, frameLocation);
317	}
318	}
319	#endif
320
321	void optimizeAlternative(PatternAlternative* alternative)
322	{
323	if (!alternative->m_terms.size())
324	return;
325
326	for (unsigned i = `0`; i < alternative->m_terms.size() - `1`; ++i) {
327	PatternTerm& term = alternative->m_terms [i];
328	PatternTerm& nextTerm = alternative->m_terms [i + `1`];
329
330	// We can move BMP only character classes after fixed character terms.
331	if ((term.type == PatternTerm::TypeCharacterClass)
332	&& (term.quantityType == QuantifierFixedCount)
333	&& (!m_decodeSurrogatePairs \|\| (!term.characterClass->m_hasNonBMPCharacters && !term.m_invert))
334	&& (nextTerm.type == PatternTerm::TypePatternCharacter)
335	&& (nextTerm.quantityType == QuantifierFixedCount)) {
336	PatternTerm termCopy = term;
337	alternative->m_terms [i] = nextTerm;
338	alternative->m_terms [i + `1`] = termCopy;
339	}
340	}
341	}
342
343	void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar32* matches, unsigned matchCount)
344	{
345	do {
346	// pick which range we're going to generate
347	int which = count >> `1`;
348	char lo = ranges[which].begin;
349	char hi = ranges[which].end;
350
351	// check if there are any ranges or matches below lo. If not, just jl to failure -
352	// if there is anything else to check, check that first, if it falls through jmp to failure.
353	if ((matchIndex < matchCount) && (matches[matchIndex] < lo)) {
354	Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32 ((unsigned short)lo));
355
356	// generate code for all ranges before this one
357	if (which)
358	matchCharacterClassRange(character, failures, matchDest, ranges, count: which, matchIndex, matches, matchCount);
359
360	while ((matchIndex < matchCount) && (matches[matchIndex] < lo)) {
361	matchDest.append(branch32(Equal, character, Imm32 ((unsigned short)matches[*matchIndex])));
362	++*matchIndex;
363	}
364	failures.append(jump());
365
366	loOrAbove.link(masm: this);
367	} else if (which) {
368	Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32 ((unsigned short)lo));
369
370	matchCharacterClassRange(character, failures, matchDest, ranges, count: which, matchIndex, matches, matchCount);
371	failures.append(jump());
372
373	loOrAbove.link(masm: this);
374	} else
375	failures.append(branch32(LessThan, character, Imm32 ((unsigned short)lo)));
376
377	while ((matchIndex < matchCount) && (matches[matchIndex] <= hi))
378	++*matchIndex;
379
380	matchDest.append(branch32(LessThanOrEqual, character, Imm32 ((unsigned short)hi)));
381	// fall through to here, the value is above hi.
382
383	// shuffle along & loop around if there are any more matches to handle.
384	unsigned next = which + `1`;
385	ranges += next;
386	count -= next;
387	} while (count);
388	}
389
390	void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
391	{
392	if (charClass->m_table && !m_decodeSurrogatePairs) {
393	ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
394	matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
395	return;
396	}
397	JumpList unicodeFail;
398	if (charClass->m_matchesUnicode.size() \|\| charClass->m_rangesUnicode.size()) {
399	JumpList isAscii;
400	if (charClass->m_matches.size() \|\| charClass->m_ranges.size())
401	isAscii.append(branch32(LessThanOrEqual, character, TrustedImm32 (`0x7f`)));
402
403	if (charClass->m_matchesUnicode.size()) {
404	for (unsigned i = `0`; i < charClass->m_matchesUnicode.size(); ++i) {
405	UChar32 ch = charClass->m_matchesUnicode [i];
406	matchDest.append(branch32(Equal, character, Imm32 (ch)));
407	}
408	}
409
410	if (charClass->m_rangesUnicode.size()) {
411	for (unsigned i = `0`; i < charClass->m_rangesUnicode.size(); ++i) {
412	UChar32 lo = charClass->m_rangesUnicode [i].begin;
413	UChar32 hi = charClass->m_rangesUnicode [i].end;
414
415	Jump below = branch32(LessThan, character, Imm32 (lo));
416	matchDest.append(branch32(LessThanOrEqual, character, Imm32 (hi)));
417	below.link(masm: this);
418	}
419	}
420
421	if (charClass->m_matches.size() \|\| charClass->m_ranges.size())
422	unicodeFail = jump();
423	isAscii.link(masm: this);
424	}
425
426	if (charClass->m_ranges.size()) {
427	unsigned matchIndex = `0`;
428	JumpList failures;
429	matchCharacterClassRange(character, failures, matchDest, ranges: charClass->m_ranges.data(), count: charClass->m_ranges.size(),
430	matchIndex: &matchIndex, matches: charClass->m_matches.data(), matchCount: charClass->m_matches.size());
431	while (matchIndex < charClass->m_matches.size())
432	matchDest.append(branch32(Equal, character, Imm32 ((unsigned short)charClass->m_matches [matchIndex++])));
433
434	failures.link(masm: this);
435	} else if (charClass->m_matches.size()) {
436	// optimization: gather 'a','A' etc back together, can mask & test once.
437	Vector<char> matchesAZaz;
438
439	for (unsigned i = `0`; i < charClass->m_matches.size(); ++i) {
440	char ch = charClass->m_matches [i];
441	if (m_pattern.ignoreCase()) {
442	if (isASCIILower(c: ch)) {
443	matchesAZaz.append(value: ch);
444	continue;
445	}
446	if (isASCIIUpper(c: ch))
447	continue;
448	}
449	matchDest.append(branch32(Equal, character, Imm32 ((unsigned short)ch)));
450	}
451
452	if (unsigned countAZaz = matchesAZaz.size()) {
453	or32(TrustedImm32 (`32`), character);
454	for (unsigned i = `0`; i < countAZaz; ++i)
455	matchDest.append(branch32(Equal, character, TrustedImm32 (matchesAZaz [i])));
456	}
457	}
458
459	if (charClass->m_matchesUnicode.size() \|\| charClass->m_rangesUnicode.size())
460	unicodeFail.link(masm: this);
461	}
462
463	// Jumps if input not available; will have (incorrectly) incremented already!
464	Jump jumpIfNoAvailableInput(unsigned countToCheck = `0`)
465	{
466	if (countToCheck)
467	add32(Imm32 (countToCheck), index);
468	return branch32(Above, index, length);
469	}
470
471	Jump jumpIfAvailableInput(unsigned countToCheck)
472	{
473	add32(Imm32 (countToCheck), index);
474	return branch32(BelowOrEqual, index, length);
475	}
476
477	Jump checkNotEnoughInput(RegisterID additionalAmount)
478	{
479	add32(index, additionalAmount);
480	return branch32(Above, additionalAmount, length);
481	}
482
483	Jump checkInput()
484	{
485	return branch32(BelowOrEqual, index, length);
486	}
487
488	Jump atEndOfInput()
489	{
490	return branch32(Equal, index, length);
491	}
492
493	Jump notAtEndOfInput()
494	{
495	return branch32(NotEqual, index, length);
496	}
497
498	BaseIndex negativeOffsetIndexedAddress(Checked<unsigned> negativeCharacterOffset, RegisterID tempReg, RegisterID indexReg = index)
499	{
500	RegisterID base = input;
501
502	// BaseIndex() addressing can take a int32_t offset. Given that we can have a regular
503	// expression that has unsigned character offsets, BaseIndex's signed offset is insufficient
504	// for addressing in extreme cases where we might underflow. Therefore we check to see if
505	// negativeCharacterOffset will underflow directly or after converting for 16 bit characters.
506	// If so, we do our own address calculating by adjusting the base, using the result register
507	// as a temp address register.
508	unsigned maximumNegativeOffsetForCharacterSize = m_charSize == Char8 ? `0x7fffffff` : `0x3fffffff`;
509	unsigned offsetAdjustAmount = `0x40000000`;
510	if (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
511	base = tempReg;
512	move(input, base);
513	while (negativeCharacterOffset.unsafeGet() > maximumNegativeOffsetForCharacterSize) {
514	subPtr(TrustedImm32 (offsetAdjustAmount), base);
515	if (m_charSize != Char8)
516	subPtr(TrustedImm32 (offsetAdjustAmount), base);
517	negativeCharacterOffset -= offsetAdjustAmount;
518	}
519	}
520
521	Checked<int32_t> characterOffset(-static_cast<int32_t>(negativeCharacterOffset.unsafeGet()));
522
523	if (m_charSize == Char8)
524	return BaseIndex (input, indexReg, TimesOne, (characterOffset * static_cast<int32_t>(sizeof(char))).unsafeGet());
525
526	return BaseIndex (input, indexReg, TimesTwo, (characterOffset * static_cast<int32_t>(sizeof(UChar))).unsafeGet());
527	}
528
529	#ifdef JIT_UNICODE_EXPRESSIONS
530	void tryReadUnicodeCharImpl(RegisterID resultReg)
531	{
532	ASSERT(m_charSize == Char16);
533
534	JumpList notUnicode;
535	load16Unaligned(regUnicodeInputAndTrail, resultReg);
536	and32(surrogateTagMask, resultReg, regT2);
537	notUnicode.append(branch32(NotEqual, regT2, leadingSurrogateTag));
538	addPtr(TrustedImm32 (`2`), regUnicodeInputAndTrail);
539	getEffectiveAddress(address: BaseIndex (input, length, TimesTwo), dest: regT2);
540	notUnicode.append(branch32(AboveOrEqual, regUnicodeInputAndTrail, regT2));
541	load16Unaligned(Address (regUnicodeInputAndTrail), regUnicodeInputAndTrail);
542	and32(surrogateTagMask, regUnicodeInputAndTrail, regT2);
543	notUnicode.append(branch32(NotEqual, regT2, trailingSurrogateTag));
544	sub32(leadingSurrogateTag, resultReg);
545	sub32(trailingSurrogateTag, regUnicodeInputAndTrail);
546	lshift32(TrustedImm32 (`10`), resultReg);
547	or32(regUnicodeInputAndTrail, resultReg);
548	add32(supplementaryPlanesBase, resultReg);
549	notUnicode.link(masm: this);
550	}
551
552	void tryReadUnicodeChar(BaseIndex address, RegisterID resultReg)
553	{
554	ASSERT(m_charSize == Char16);
555
556	getEffectiveAddress(address, dest: regUnicodeInputAndTrail);
557
558	if (resultReg == regT0)
559	m_tryReadUnicodeCharacterCalls.append(nearCall());
560	else
561	tryReadUnicodeCharImpl(resultReg);
562	}
563	#endif
564
565	void readCharacterDontDecodeSurrogates(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
566	{
567	BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, tempReg: resultReg, indexReg);
568
569	if (m_charSize == Char8)
570	load8(address, resultReg);
571	else
572	load16Unaligned(address, resultReg);
573	}
574
575	void readCharacter(Checked<unsigned> negativeCharacterOffset, RegisterID resultReg, RegisterID indexReg = index)
576	{
577	BaseIndex address = negativeOffsetIndexedAddress(negativeCharacterOffset, tempReg: resultReg, indexReg);
578
579	if (m_charSize == Char8)
580	load8(address, resultReg);
581	#ifdef JIT_UNICODE_EXPRESSIONS
582	else if (m_decodeSurrogatePairs)
583	tryReadUnicodeChar(address, resultReg);
584	#endif
585	else
586	load16Unaligned(address, resultReg);
587	}
588
589	Jump jumpIfCharNotEquals(UChar32 ch, Checked<unsigned> negativeCharacterOffset, RegisterID character)
590	{
591	readCharacter(negativeCharacterOffset, resultReg: character);
592
593	// For case-insesitive compares, non-ascii characters that have different
594	// upper & lower case representations are converted to a character class.
595	ASSERT(!m_pattern.ignoreCase() \|\| isASCIIAlpha(ch) \|\| isCanonicallyUnique(ch, m_canonicalMode));
596	if (m_pattern.ignoreCase() && isASCIIAlpha(c: ch)) {
597	or32(TrustedImm32 (`0x20`), character);
598	ch \|= `0x20`;
599	}
600
601	return branch32(NotEqual, character, Imm32 (ch));
602	}
603
604	void storeToFrame(RegisterID reg, unsigned frameLocation)
605	{
606	poke(reg, frameLocation);
607	}
608
609	void storeToFrame(TrustedImm32 imm, unsigned frameLocation)
610	{
611	poke(imm, frameLocation);
612	}
613
614	#if CPU(ARM64) \|\| CPU(X86_64)
615	void storeToFrame(TrustedImmPtr imm, unsigned frameLocation)
616	{
617	poke(imm, frameLocation);
618	}
619	#endif
620
621	DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
622	{
623	return storePtrWithPatch(initialValue: TrustedImmPtr (nullptr), address: Address (stackPointerRegister, frameLocation * sizeof(void*)));
624	}
625
626	void loadFromFrame(unsigned frameLocation, RegisterID reg)
627	{
628	peek(dest: reg, index: frameLocation);
629	}
630
631	void loadFromFrameAndJump(unsigned frameLocation)
632	{
633	jump(Address (stackPointerRegister, frameLocation * sizeof(void*)));
634	}
635
636	unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
637	{
638	if (!callFrameSize)
639	return `0`;
640
641	callFrameSize = sizeof(void**);
642	if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
643	CRASH();
644	callFrameSize = (callFrameSize + `0x3f`) & ~`0x3f`;
645	return callFrameSize;
646	}
647	void initCallFrame()
648	{
649	unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(callFrameSize: m_pattern.m_body->m_callFrameSize);
650	if (callFrameSizeInBytes) {
651	#if CPU(X86_64) \|\| CPU(ARM64)
652	if (Options::zeroStackFrame()) {
653	// We need to start from the stack pointer, because we could have spilled callee saves
654	move(stackPointerRegister, regT0);
655	subPtr(Imm32 (callFrameSizeInBytes), stackPointerRegister);
656	if (callFrameSizeInBytes <= `128`) {
657	for (unsigned offset = `0`; offset < callFrameSizeInBytes; offset += sizeof(intptr_t))
658	storePtr(TrustedImmPtr (`0`), Address (regT0, -`8` - int(offset)));
659	} else {
660	Label zeroLoop = label();
661	subPtr(TrustedImm32 (sizeof(intptr_t) * `2`), regT0);
662	#if CPU(ARM64)
663	storePair64(ARM64Registers::zr, ARM64Registers::zr, regT0);
664	#else
665	storePtr(TrustedImmPtr (`0`), Address (regT0));
666	storePtr(TrustedImmPtr (`0`), Address (regT0, sizeof(intptr_t)));
667	#endif
668	branchPtr(NotEqual, regT0, stackPointerRegister).linkTo(zeroLoop, this);
669	}
670	} else
671	#endif
672	subPtr(Imm32 (callFrameSizeInBytes), stackPointerRegister);
673
674	}
675	}
676	void removeCallFrame()
677	{
678	unsigned callFrameSizeInBytes = alignCallFrameSizeInBytes(callFrameSize: m_pattern.m_body->m_callFrameSize);
679	if (callFrameSizeInBytes)
680	addPtr(Imm32 (callFrameSizeInBytes), stackPointerRegister);
681	}
682
683	void generateFailReturn()
684	{
685	move(TrustedImmPtr ((void*)WTF::notFound), returnRegister);
686	move(TrustedImm32 (`0`), returnRegister2);
687	generateReturn();
688	}
689
690	void generateJITFailReturn()
691	{
692	if (m_abortExecution.empty() && m_hitMatchLimit.empty())
693	return;
694
695	JumpList finishExiting;
696	if (!m_abortExecution.empty()) {
697	m_abortExecution.link(masm: this);
698	move(TrustedImmPtr ((void)static_cast*<size_t>(-`2`)), returnRegister);
699	finishExiting.append(jump());
700	}
701
702	if (!m_hitMatchLimit.empty()) {
703	m_hitMatchLimit.link(masm: this);
704	move(TrustedImmPtr ((void)static_cast*<size_t>(-`1`)), returnRegister);
705	}
706
707	finishExiting.link(masm: this);
708	removeCallFrame();
709	move(TrustedImm32 (`0`), returnRegister2);
710	generateReturn();
711	}
712
713	// Used to record subpatterns, should only be called if compileMode is IncludeSubpatterns.
714	void setSubpatternStart(RegisterID reg, unsigned subpattern)
715	{
716	ASSERT(subpattern);
717	// FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
718	store32(reg, Address (output, (subpattern << `1`) * sizeof(int)));
719	}
720	void setSubpatternEnd(RegisterID reg, unsigned subpattern)
721	{
722	ASSERT(subpattern);
723	// FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
724	store32(reg, Address (output, ((subpattern << `1`) + `1`) * sizeof(int)));
725	}
726	void clearSubpatternStart(unsigned subpattern)
727	{
728	ASSERT(subpattern);
729	// FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
730	store32(TrustedImm32 (-`1`), Address (output, (subpattern << `1`) * sizeof(int)));
731	}
732
733	void clearMatches(unsigned subpattern, unsigned lastSubpattern)
734	{
735	for (; subpattern <= lastSubpattern; subpattern++)
736	clearSubpatternStart(subpattern);
737	}
738
739	// We use one of three different strategies to track the start of the current match,
740	// while matching.
741	// 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
742	// at the end of matching. This is irrespective of compileMode, and in this case
743	// these methods should never be called.
744	// 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
745	// vector, store the match start in the output vector.
746	// 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
747	// in this register.
748	void setMatchStart(RegisterID reg)
749	{
750	ASSERT(!m_pattern.m_body->m_hasFixedSize);
751	if (compileMode == IncludeSubpatterns)
752	store32(reg, output);
753	else
754	move(reg, output);
755	}
756	void getMatchStart(RegisterID reg)
757	{
758	ASSERT(!m_pattern.m_body->m_hasFixedSize);
759	if (compileMode == IncludeSubpatterns)
760	load32(output, reg);
761	else
762	move(output, reg);
763	}
764
765	enum YarrOpCode {
766	// These nodes wrap body alternatives - those in the main disjunction,
767	// rather than subpatterns or assertions. These are chained together in
768	// a doubly linked list, with a 'begin' node for the first alternative,
769	// a 'next' node for each subsequent alternative, and an 'end' node at
770	// the end. In the case of repeating alternatives, the 'end' node also
771	// has a reference back to 'begin'.
772	OpBodyAlternativeBegin,
773	OpBodyAlternativeNext,
774	OpBodyAlternativeEnd,
775	// Similar to the body alternatives, but used for subpatterns with two
776	// or more alternatives.
777	OpNestedAlternativeBegin,
778	OpNestedAlternativeNext,
779	OpNestedAlternativeEnd,
780	// Used for alternatives in subpatterns where there is only a single
781	// alternative (backtracking is easier in these cases), or for alternatives
782	// which never need to be backtracked (those in parenthetical assertions,
783	// terminal subpatterns).
784	OpSimpleNestedAlternativeBegin,
785	OpSimpleNestedAlternativeNext,
786	OpSimpleNestedAlternativeEnd,
787	// Used to wrap 'Once' subpattern matches (quantityMaxCount == 1).
788	OpParenthesesSubpatternOnceBegin,
789	OpParenthesesSubpatternOnceEnd,
790	// Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
791	OpParenthesesSubpatternTerminalBegin,
792	OpParenthesesSubpatternTerminalEnd,
793	// Used to wrap generic captured matches
794	OpParenthesesSubpatternBegin,
795	OpParenthesesSubpatternEnd,
796	// Used to wrap parenthetical assertions.
797	OpParentheticalAssertionBegin,
798	OpParentheticalAssertionEnd,
799	// Wraps all simple terms (pattern characters, character classes).
800	OpTerm,
801	// Where an expression contains only 'once through' body alternatives
802	// and no repeating ones, this op is used to return match failure.
803	OpMatchFailed
804	};
805
806	// This structure is used to hold the compiled opcode information,
807	// including reference back to the original PatternTerm/PatternAlternatives,
808	// and JIT compilation data structures.
809	struct YarrOp {
810	explicit YarrOp(PatternTerm* term)
811	: m_op(OpTerm)
812	, m_term(term)
813	, m_isDeadCode(false)
814	{
815	}
816
817	explicit YarrOp(YarrOpCode op)
818	: m_op(op)
819	, m_isDeadCode(false)
820	{
821	}
822
823	// The operation, as a YarrOpCode, and also a reference to the PatternTerm.
824	YarrOpCode m_op;
825	PatternTerm* m_term = nullptr;
826
827	// For alternatives, this holds the PatternAlternative and doubly linked
828	// references to this alternative's siblings. In the case of the
829	// OpBodyAlternativeEnd node at the end of a section of repeating nodes,
830	// m_nextOp will reference the OpBodyAlternativeBegin node of the first
831	// repeating alternative.
832	PatternAlternative* m_alternative = nullptr;
833	size_t m_previousOp = `0`;
834	size_t m_nextOp = `0`;
835
836	// Used to record a set of Jumps out of the generated code, typically
837	// used for jumps out to backtracking code, and a single reentry back
838	// into the code for a node (likely where a backtrack will trigger
839	// rematching).
840	Label m_reentry;
841	JumpList m_jumps;
842
843	// Used for backtracking when the prior alternative did not consume any
844	// characters but matched.
845	Jump m_zeroLengthMatch;
846
847	// This flag is used to null out the second pattern character, when
848	// two are fused to match a pair together.
849	bool m_isDeadCode;
850
851	// Currently used in the case of some of the more complex management of
852	// 'm_checkedOffset', to cache the offset used in this alternative, to avoid
853	// recalculating it.
854	Checked<unsigned> m_checkAdjust;
855
856	// Used by OpNestedAlternativeNext/End to hold the pointer to the
857	// value that will be pushed into the pattern's frame to return to,
858	// upon backtracking back into the disjunction.
859	DataLabelPtr m_returnAddress;
860	};
861
862	// BacktrackingState
863	// This class encapsulates information about the state of code generation
864	// whilst generating the code for backtracking, when a term fails to match.
865	// Upon entry to code generation of the backtracking code for a given node,
866	// the Backtracking state will hold references to all control flow sources
867	// that are outputs in need of further backtracking from the prior node
868	// generated (which is the subsequent operation in the regular expression,
869	// and in the m_ops Vector, since we generated backtracking backwards).
870	// These references to control flow take the form of:
871	// - A jump list of jumps, to be linked to code that will backtrack them
872	// further.
873	// - A set of DataLabelPtr values, to be populated with values to be
874	// treated effectively as return addresses backtracking into complex
875	// subpatterns.
876	// - A flag indicating that the current sequence of generated code up to
877	// this point requires backtracking.
878	class BacktrackingState {
879	public:
880	BacktrackingState()
881	: m_pendingFallthrough(false)
882	{
883	}
884
885	// Add a jump or jumps, a return address, or set the flag indicating
886	// that the current 'fallthrough' control flow requires backtracking.
887	void append(const Jump& jump)
888	{
889	m_laterFailures.append(jump);
890	}
891	void append(JumpList& jumpList)
892	{
893	m_laterFailures.append(other: jumpList);
894	}
895	void append(const DataLabelPtr& returnAddress)
896	{
897	m_pendingReturns.append(value: returnAddress);
898	}
899	void fallthrough()
900	{
901	ASSERT(!m_pendingFallthrough);
902	m_pendingFallthrough = true;
903	}
904
905	// These methods clear the backtracking state, either linking to the
906	// current location, a provided label, or copying the backtracking out
907	// to a JumpList. All actions may require code generation to take place,
908	// and as such are passed a pointer to the assembler.
909	void link(MacroAssembler* assembler)
910	{
911	if (m_pendingReturns.size()) {
912	Label here(assembler);
913	for (unsigned i = `0`; i < m_pendingReturns.size(); ++i)
914	m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns [i], here));
915	m_pendingReturns.clear();
916	}
917	m_laterFailures.link(masm: assembler);
918	m_laterFailures.clear();
919	m_pendingFallthrough = false;
920	}
921	void linkTo(Label label, MacroAssembler* assembler)
922	{
923	if (m_pendingReturns.size()) {
924	for (unsigned i = `0`; i < m_pendingReturns.size(); ++i)
925	m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns [i], label));
926	m_pendingReturns.clear();
927	}
928	if (m_pendingFallthrough)
929	assembler->jump(target: label);
930	m_laterFailures.linkTo(label, masm: assembler);
931	m_laterFailures.clear();
932	m_pendingFallthrough = false;
933	}
934	void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
935	{
936	if (m_pendingReturns.size()) {
937	Label here(assembler);
938	for (unsigned i = `0`; i < m_pendingReturns.size(); ++i)
939	m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns [i], here));
940	m_pendingReturns.clear();
941	m_pendingFallthrough = true;
942	}
943	if (m_pendingFallthrough)
944	jumpList.append(jump: assembler->jump());
945	jumpList.append(other: m_laterFailures);
946	m_laterFailures.clear();
947	m_pendingFallthrough = false;
948	}
949
950	bool isEmpty()
951	{
952	return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
953	}
954
955	// Called at the end of code generation to link all return addresses.
956	void linkDataLabels(DefaultLinkBuffer& linkBuffer)
957	{
958	ASSERT(isEmpty());
959	for (unsigned i = `0`; i < m_backtrackRecords.size(); ++i)
960	linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
961	}
962
963	private:
964	struct ReturnAddressRecord {
965	ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
966	: m_dataLabel (dataLabel)
967	, m_backtrackLocation (backtrackLocation)
968	{
969	}
970
971	DataLabelPtr m_dataLabel;
972	Label m_backtrackLocation;
973	};
974
975	JumpList m_laterFailures;
976	bool m_pendingFallthrough;
977	Vector<DataLabelPtr, `4`> m_pendingReturns;
978	Vector<ReturnAddressRecord, `4`> m_backtrackRecords;
979	};
980
981	// Generation methods:
982	// ===================
983
984	// This method provides a default implementation of backtracking common
985	// to many terms; terms commonly jump out of the forwards matching path
986	// on any failed conditions, and add these jumps to the m_jumps list. If
987	// no special handling is required we can often just backtrack to m_jumps.
988	void backtrackTermDefault(size_t opIndex)
989	{
990	YarrOp& op = m_ops[opIndex];
991	m_backtrackingState.append(op.m_jumps);
992	}
993
994	void generateAssertionBOL(size_t opIndex)
995	{
996	YarrOp& op = m_ops[opIndex];
997	PatternTerm* term = op.m_term;
998
999	if (m_pattern.multiline()) {
1000	const RegisterID character = regT0;
1001
1002	JumpList matchDest;
1003	if (!term->inputPosition)
1004	matchDest.append(branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet())));
1005
1006	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition + `1`, resultReg: character);
1007	matchCharacterClass(character, matchDest, charClass: m_pattern.newlineCharacterClass());
1008	op.m_jumps.append(jump());
1009
1010	matchDest.link(masm: this);
1011	} else {
1012	// Erk, really should poison out these alternatives early. :-/
1013	if (term->inputPosition)
1014	op.m_jumps.append(jump());
1015	else
1016	op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checkedOffset.unsafeGet())));
1017	}
1018	}
1019	void backtrackAssertionBOL(size_t opIndex)
1020	{
1021	backtrackTermDefault(opIndex);
1022	}
1023
1024	void generateAssertionEOL(size_t opIndex)
1025	{
1026	YarrOp& op = m_ops[opIndex];
1027	PatternTerm* term = op.m_term;
1028
1029	if (m_pattern.multiline()) {
1030	const RegisterID character = regT0;
1031
1032	JumpList matchDest;
1033	if (term->inputPosition == m_checkedOffset.unsafeGet())
1034	matchDest.append(atEndOfInput());
1035
1036	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1037	matchCharacterClass(character, matchDest, charClass: m_pattern.newlineCharacterClass());
1038	op.m_jumps.append(jump());
1039
1040	matchDest.link(masm: this);
1041	} else {
1042	if (term->inputPosition == m_checkedOffset.unsafeGet())
1043	op.m_jumps.append(notAtEndOfInput());
1044	// Erk, really should poison out these alternatives early. :-/
1045	else
1046	op.m_jumps.append(jump());
1047	}
1048	}
1049	void backtrackAssertionEOL(size_t opIndex)
1050	{
1051	backtrackTermDefault(opIndex);
1052	}
1053
1054	// Also falls though on nextIsNotWordChar.
1055	void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
1056	{
1057	YarrOp& op = m_ops[opIndex];
1058	PatternTerm* term = op.m_term;
1059
1060	const RegisterID character = regT0;
1061
1062	if (term->inputPosition == m_checkedOffset.unsafeGet())
1063	nextIsNotWordChar.append(atEndOfInput());
1064
1065	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1066
1067	CharacterClass* wordcharCharacterClass;
1068
1069	if (m_unicodeIgnoreCase)
1070	wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1071	else
1072	wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1073
1074	matchCharacterClass(character, matchDest&: nextIsWordChar, charClass: wordcharCharacterClass);
1075	}
1076
1077	void generateAssertionWordBoundary(size_t opIndex)
1078	{
1079	YarrOp& op = m_ops[opIndex];
1080	PatternTerm* term = op.m_term;
1081
1082	const RegisterID character = regT0;
1083
1084	Jump atBegin;
1085	JumpList matchDest;
1086	if (!term->inputPosition)
1087	atBegin = branch32(Equal, index, Imm32(m_checkedOffset.unsafeGet()));
1088	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition + `1`, resultReg: character);
1089
1090	CharacterClass* wordcharCharacterClass;
1091
1092	if (m_unicodeIgnoreCase)
1093	wordcharCharacterClass = m_pattern.wordUnicodeIgnoreCaseCharCharacterClass();
1094	else
1095	wordcharCharacterClass = m_pattern.wordcharCharacterClass();
1096
1097	matchCharacterClass(character, matchDest, charClass: wordcharCharacterClass);
1098	if (!term->inputPosition)
1099	atBegin.link(masm: this);
1100
1101	// We fall through to here if the last character was not a wordchar.
1102	JumpList nonWordCharThenWordChar;
1103	JumpList nonWordCharThenNonWordChar;
1104	if (term->invert()) {
1105	matchAssertionWordchar(opIndex, nextIsWordChar&: nonWordCharThenNonWordChar, nextIsNotWordChar&: nonWordCharThenWordChar);
1106	nonWordCharThenWordChar.append(jump());
1107	} else {
1108	matchAssertionWordchar(opIndex, nextIsWordChar&: nonWordCharThenWordChar, nextIsNotWordChar&: nonWordCharThenNonWordChar);
1109	nonWordCharThenNonWordChar.append(jump());
1110	}
1111	op.m_jumps.append(nonWordCharThenNonWordChar);
1112
1113	// We jump here if the last character was a wordchar.
1114	matchDest.link(masm: this);
1115	JumpList wordCharThenWordChar;
1116	JumpList wordCharThenNonWordChar;
1117	if (term->invert()) {
1118	matchAssertionWordchar(opIndex, nextIsWordChar&: wordCharThenNonWordChar, nextIsNotWordChar&: wordCharThenWordChar);
1119	wordCharThenWordChar.append(jump());
1120	} else {
1121	matchAssertionWordchar(opIndex, nextIsWordChar&: wordCharThenWordChar, nextIsNotWordChar&: wordCharThenNonWordChar);
1122	// This can fall-though!
1123	}
1124
1125	op.m_jumps.append(wordCharThenWordChar);
1126
1127	nonWordCharThenWordChar.link(masm: this);
1128	wordCharThenNonWordChar.link(masm: this);
1129	}
1130	void backtrackAssertionWordBoundary(size_t opIndex)
1131	{
1132	backtrackTermDefault(opIndex);
1133	}
1134
1135	#if ENABLE(YARR_JIT_BACKREFERENCES)
1136	void matchBackreference(size_t opIndex, JumpList& characterMatchFails, RegisterID character, RegisterID patternIndex, RegisterID patternCharacter)
1137	{
1138	YarrOp& op = m_ops[opIndex];
1139	PatternTerm* term = op.m_term;
1140	unsigned subpatternId = term->backReferenceSubpatternId;
1141
1142	Label loop(this);
1143
1144	readCharacterDontDecodeSurrogates(negativeCharacterOffset: `0`, resultReg: patternCharacter, indexReg: patternIndex);
1145	readCharacterDontDecodeSurrogates(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1146
1147	if (!m_pattern.ignoreCase())
1148	characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1149	else {
1150	Jump charactersMatch = branch32(Equal, character, patternCharacter);
1151	ExtendedAddress characterTableEntry(character, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1152	load16(characterTableEntry, character);
1153	ExtendedAddress patternTableEntry(patternCharacter, reinterpret_cast<intptr_t>(&canonicalTableLChar));
1154	load16(patternTableEntry, patternCharacter);
1155	characterMatchFails.append(branch32(NotEqual, character, patternCharacter));
1156	charactersMatch.link(masm: this);
1157	}
1158
1159
1160	add32(TrustedImm32 (`1`), index);
1161	add32(TrustedImm32 (`1`), patternIndex);
1162
1163	branch32(NotEqual, patternIndex, Address (output, ((subpatternId << `1`) + `1`) * sizeof(int))).linkTo(loop, this);
1164	}
1165
1166	void generateBackReference(size_t opIndex)
1167	{
1168	YarrOp& op = m_ops[opIndex];
1169	PatternTerm* term = op.m_term;
1170
1171	if (m_pattern.ignoreCase() && m_charSize != Char8) {
1172	m_failureReason = JITFailureReason::BackReference;
1173	return;
1174	}
1175
1176	unsigned subpatternId = term->backReferenceSubpatternId;
1177	unsigned parenthesesFrameLocation = term->frameLocation;
1178
1179	const RegisterID characterOrTemp = regT0;
1180	const RegisterID patternIndex = regT1;
1181	const RegisterID patternTemp = regT2;
1182
1183	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1184	if (term->quantityType != QuantifierFixedCount \|\| term->quantityMaxCount != `1`)
1185	storeToFrame(TrustedImm32 (`0`), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1186
1187	JumpList matches;
1188
1189	if (term->quantityType != QuantifierNonGreedy) {
1190	load32(Address (output, (subpatternId << `1`) * sizeof(int)), patternIndex);
1191	load32(Address (output, ((subpatternId << `1`) + `1`) * sizeof(int)), patternTemp);
1192
1193	// An empty match is successful without consuming characters
1194	if (term->quantityType != QuantifierFixedCount \|\| term->quantityMaxCount != `1`) {
1195	matches.append(branch32(Equal, TrustedImm32 (-`1`), patternIndex));
1196	matches.append(branch32(Equal, patternIndex, patternTemp));
1197	} else {
1198	Jump zeroLengthMatch = branch32(Equal, TrustedImm32 (-`1`), patternIndex);
1199	Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1200	zeroLengthMatch.link(masm: this);
1201	storeToFrame(TrustedImm32 (`1`), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1202	matches.append(jump());
1203	tryNonZeroMatch.link(masm: this);
1204	}
1205	}
1206
1207	switch (term->quantityType) {
1208	case QuantifierFixedCount: {
1209	Label outerLoop(this);
1210
1211	// PatternTemp should contain pattern end index at this point
1212	sub32(patternIndex, patternTemp);
1213	if (m_checkedOffset - term->inputPosition)
1214	sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
1215	op.m_jumps.append(checkNotEnoughInput(additionalAmount: patternTemp));
1216
1217	matchBackreference(opIndex, characterMatchFails&: op.m_jumps, character: characterOrTemp, patternIndex, patternCharacter: patternTemp);
1218
1219	if (term->quantityMaxCount != `1`) {
1220	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: characterOrTemp);
1221	add32(TrustedImm32 (`1`), characterOrTemp);
1222	storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1223	matches.append(branch32(Equal, Imm32 (term->quantityMaxCount.unsafeGet()), characterOrTemp));
1224	load32(Address (output, (subpatternId << `1`) * sizeof(int)), patternIndex);
1225	load32(Address (output, ((subpatternId << `1`) + `1`) * sizeof(int)), patternTemp);
1226	jump(outerLoop);
1227	}
1228	matches.link(masm: this);
1229	break;
1230	}
1231
1232	case QuantifierGreedy: {
1233	JumpList incompleteMatches;
1234
1235	Label outerLoop(this);
1236
1237	// PatternTemp should contain pattern end index at this point
1238	sub32(patternIndex, patternTemp);
1239	if (m_checkedOffset - term->inputPosition)
1240	sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
1241	matches.append(checkNotEnoughInput(additionalAmount: patternTemp));
1242
1243	matchBackreference(opIndex, characterMatchFails&: incompleteMatches, character: characterOrTemp, patternIndex, patternCharacter: patternTemp);
1244
1245	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: characterOrTemp);
1246	add32(TrustedImm32 (`1`), characterOrTemp);
1247	storeToFrame(characterOrTemp, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1248	if (term->quantityMaxCount != quantifyInfinite)
1249	matches.append(branch32(Equal, Imm32 (term->quantityMaxCount.unsafeGet()), characterOrTemp));
1250	load32(Address (output, (subpatternId << `1`) * sizeof(int)), patternIndex);
1251	load32(Address (output, ((subpatternId << `1`) + `1`) * sizeof(int)), patternTemp);
1252
1253	// Store current index in frame for restoring after a partial match
1254	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1255	jump(outerLoop);
1256
1257	incompleteMatches.link(masm: this);
1258	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), reg: index);
1259
1260	matches.link(masm: this);
1261	op.m_reentry = label();
1262	break;
1263	}
1264
1265	case QuantifierNonGreedy: {
1266	JumpList incompleteMatches;
1267
1268	matches.append(jump());
1269
1270	op.m_reentry = label();
1271
1272	load32(Address (output, (subpatternId << `1`) * sizeof(int)), patternIndex);
1273	load32(Address (output, ((subpatternId << `1`) + `1`) * sizeof(int)), patternTemp);
1274
1275	// An empty match is successful without consuming characters
1276	Jump zeroLengthMatch = branch32(Equal, TrustedImm32 (-`1`), patternIndex);
1277	Jump tryNonZeroMatch = branch32(NotEqual, patternIndex, patternTemp);
1278	zeroLengthMatch.link(masm: this);
1279	storeToFrame(TrustedImm32 (`1`), parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1280	matches.append(jump());
1281	tryNonZeroMatch.link(masm: this);
1282
1283	// Check if we have input remaining to match
1284	sub32(patternIndex, patternTemp);
1285	if (m_checkedOffset - term->inputPosition)
1286	sub32(Imm32((m_checkedOffset - term->inputPosition).unsafeGet()), patternTemp);
1287	matches.append(checkNotEnoughInput(additionalAmount: patternTemp));
1288
1289	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex());
1290
1291	matchBackreference(opIndex, characterMatchFails&: incompleteMatches, character: characterOrTemp, patternIndex, patternCharacter: patternTemp);
1292
1293	matches.append(jump());
1294
1295	incompleteMatches.link(masm: this);
1296	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), reg: index);
1297
1298	matches.link(masm: this);
1299	break;
1300	}
1301	}
1302	}
1303	void backtrackBackReference(size_t opIndex)
1304	{
1305	YarrOp& op = m_ops[opIndex];
1306	PatternTerm* term = op.m_term;
1307
1308	unsigned subpatternId = term->backReferenceSubpatternId;
1309
1310	m_backtrackingState.link(this);
1311	op.m_jumps.link(this);
1312
1313	JumpList failures;
1314
1315	unsigned parenthesesFrameLocation = term->frameLocation;
1316	switch (term->quantityType) {
1317	case QuantifierFixedCount:
1318	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::beginIndex(), reg: index);
1319	break;
1320
1321	case QuantifierGreedy: {
1322	const RegisterID matchAmount = regT0;
1323	const RegisterID patternStartIndex = regT1;
1324	const RegisterID patternEndIndexOrLen = regT2;
1325
1326	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: matchAmount);
1327	failures.append(branchTest32(Zero, matchAmount));
1328
1329	load32(Address (output, (subpatternId << `1`) * sizeof(int)), patternStartIndex);
1330	load32(Address (output, ((subpatternId << `1`) + `1`) * sizeof(int)), patternEndIndexOrLen);
1331	sub32(patternStartIndex, patternEndIndexOrLen);
1332	sub32(patternEndIndexOrLen, index);
1333
1334	sub32(TrustedImm32 (`1`), matchAmount);
1335	storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1336	jump(op.m_reentry);
1337	break;
1338	}
1339
1340	case QuantifierNonGreedy: {
1341	const RegisterID matchAmount = regT0;
1342
1343	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex(), reg: matchAmount);
1344	if (term->quantityMaxCount != quantifyInfinite)
1345	failures.append(branch32(AboveOrEqual, Imm32 (term->quantityMaxCount.unsafeGet()), matchAmount));
1346	add32(TrustedImm32 (`1`), matchAmount);
1347	storeToFrame(matchAmount, parenthesesFrameLocation + BackTrackInfoBackReference::matchAmountIndex());
1348	jump(op.m_reentry);
1349	break;
1350	}
1351	}
1352	failures.link(masm: this);
1353	m_backtrackingState.fallthrough();
1354	}
1355	#endif
1356
1357	void generatePatternCharacterOnce(size_t opIndex)
1358	{
1359	YarrOp& op = m_ops[opIndex];
1360
1361	if (op.m_isDeadCode)
1362	return;
1363
1364	// m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
1365	// node, so there must always be at least one more node.
1366	ASSERT(opIndex + `1` < m_ops.size());
1367	YarrOp* nextOp = &m_ops[opIndex + `1`];
1368
1369	PatternTerm* term = op.m_term;
1370	UChar32 ch = term->patternCharacter;
1371
1372	if ((ch > `0xff`) && (m_charSize == Char8)) {
1373	// Have a 16 bit pattern character and an 8 bit string - short circuit
1374	op.m_jumps.append(jump());
1375	return;
1376	}
1377
1378	const RegisterID character = regT0;
1379	#if CPU(X86_64) \|\| CPU(ARM64)
1380	unsigned maxCharactersAtOnce = m_charSize == Char8 ? `8` : `4`;
1381	#else
1382	unsigned maxCharactersAtOnce = m_charSize == Char8 ? `4` : `2`;
1383	#endif
1384	uint64_t ignoreCaseMask = `0`;
1385	#if CPU(BIG_ENDIAN)
1386	uint64_t allCharacters = ch << (m_charSize == Char8 ? `24` : `16`);
1387	#else
1388	uint64_t allCharacters = ch;
1389	#endif
1390	unsigned numberCharacters;
1391	unsigned startTermPosition = term->inputPosition;
1392
1393	// For case-insesitive compares, non-ascii characters that have different
1394	// upper & lower case representations are converted to a character class.
1395	ASSERT(!m_pattern.ignoreCase() \|\| isASCIIAlpha(ch) \|\| isCanonicallyUnique(ch, m_canonicalMode));
1396
1397	if (m_pattern.ignoreCase() && isASCIIAlpha(c: ch)) {
1398	#if CPU(BIG_ENDIAN)
1399	ignoreCaseMask \|= `32` << (m_charSize == Char8 ? `24` : `16`);
1400	#else
1401	ignoreCaseMask \|= `32`;
1402	#endif
1403	}
1404
1405	for (numberCharacters = `1`; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
1406	PatternTerm* nextTerm = nextOp->m_term;
1407
1408	// YarrJIT handles decoded surrogate pair as one character if unicode flag is enabled.
1409	// Note that the numberCharacters become 1 while the width of the pattern character becomes 32bit in this case.
1410	if (nextTerm->type != PatternTerm::TypePatternCharacter
1411	\|\| nextTerm->quantityType != QuantifierFixedCount
1412	\|\| nextTerm->quantityMaxCount != `1`
1413	\|\| nextTerm->inputPosition != (startTermPosition + numberCharacters)
1414	\|\| (U16_LENGTH(nextTerm->patternCharacter) != `1` && m_decodeSurrogatePairs))
1415	break;
1416
1417	nextOp->m_isDeadCode = true;
1418
1419	#if CPU(BIG_ENDIAN)
1420	int shiftAmount = (m_charSize == Char8 ? `24` : `16`) - ((m_charSize == Char8 ? `8` : `16`) * numberCharacters);
1421	#else
1422	int shiftAmount = (m_charSize == Char8 ? `8` : `16`) * numberCharacters;
1423	#endif
1424
1425	UChar32 currentCharacter = nextTerm->patternCharacter;
1426
1427	if ((currentCharacter > `0xff`) && (m_charSize == Char8)) {
1428	// Have a 16 bit pattern character and an 8 bit string - short circuit
1429	op.m_jumps.append(jump());
1430	return;
1431	}
1432
1433	// For case-insesitive compares, non-ascii characters that have different
1434	// upper & lower case representations are converted to a character class.
1435	ASSERT(!m_pattern.ignoreCase() \|\| isASCIIAlpha(currentCharacter) \|\| isCanonicallyUnique(currentCharacter, m_canonicalMode));
1436
1437	allCharacters \|= (static_cast<uint64_t>(currentCharacter) << shiftAmount);
1438
1439	if ((m_pattern.ignoreCase()) && (isASCIIAlpha(c: currentCharacter)))
1440	ignoreCaseMask \|= `32ULL` << shiftAmount;
1441	}
1442
1443	if (m_decodeSurrogatePairs)
1444	op.m_jumps.append(jumpIfNoAvailableInput());
1445
1446	if (m_charSize == Char8) {
1447	auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1448	op.m_jumps.append(jumpIfCharNotEquals(ch: characters, negativeCharacterOffset: offset, character));
1449	};
1450
1451	auto check2 = [&] (Checked<unsigned> offset, uint16_t characters, uint16_t mask) {
1452	load16Unaligned(negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), character);
1453	if (mask)
1454	or32(Imm32 (mask), character);
1455	op.m_jumps.append(branch32(NotEqual, character, Imm32 (characters \| mask)));
1456	};
1457
1458	auto check4 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1459	if (mask) {
1460	load32WithUnalignedHalfWords(address: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), dest: character);
1461	if (mask)
1462	or32(Imm32 (mask), character);
1463	op.m_jumps.append(branch32(NotEqual, character, Imm32 (characters \| mask)));
1464	return;
1465	}
1466	op.m_jumps.append(branch32WithUnalignedHalfWords(cond: NotEqual, left: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), right: TrustedImm32 (characters)));
1467	};
1468
1469	#if CPU(X86_64) \|\| CPU(ARM64)
1470	auto check8 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1471	load64(negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), character);
1472	if (mask)
1473	or64(TrustedImm64 (mask), character);
1474	op.m_jumps.append(branch64(NotEqual, character, TrustedImm64 (characters \| mask)));
1475	};
1476	#endif
1477
1478	switch (numberCharacters) {
1479	case `1`:
1480	// Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1481	check1(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`);
1482	return;
1483	case `2`: {
1484	check2(m_checkedOffset - startTermPosition, allCharacters & `0xffff`, ignoreCaseMask & `0xffff`);
1485	return;
1486	}
1487	case `3`: {
1488	check2(m_checkedOffset - startTermPosition, allCharacters & `0xffff`, ignoreCaseMask & `0xffff`);
1489	check1(m_checkedOffset - startTermPosition - `2`, (allCharacters >> `16`) & `0xff`);
1490	return;
1491	}
1492	case `4`: {
1493	check4(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`, ignoreCaseMask & `0xffffffff`);
1494	return;
1495	}
1496	#if CPU(X86_64) \|\| CPU(ARM64)
1497	case `5`: {
1498	check4(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`, ignoreCaseMask & `0xffffffff`);
1499	check1(m_checkedOffset - startTermPosition - `4`, (allCharacters >> `32`) & `0xff`);
1500	return;
1501	}
1502	case `6`: {
1503	check4(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`, ignoreCaseMask & `0xffffffff`);
1504	check2(m_checkedOffset - startTermPosition - `4`, (allCharacters >> `32`) & `0xffff`, (ignoreCaseMask >> `32`) & `0xffff`);
1505	return;
1506	}
1507	case `7`: {
1508	check4(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`, ignoreCaseMask & `0xffffffff`);
1509	check2(m_checkedOffset - startTermPosition - `4`, (allCharacters >> `32`) & `0xffff`, (ignoreCaseMask >> `32`) & `0xffff`);
1510	check1(m_checkedOffset - startTermPosition - `6`, (allCharacters >> `48`) & `0xff`);
1511	return;
1512	}
1513	case `8`: {
1514	check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1515	return;
1516	}
1517	#endif
1518	}
1519	} else {
1520	auto check1 = [&] (Checked<unsigned> offset, UChar32 characters) {
1521	op.m_jumps.append(jumpIfCharNotEquals(ch: characters, negativeCharacterOffset: offset, character));
1522	};
1523
1524	auto check2 = [&] (Checked<unsigned> offset, unsigned characters, unsigned mask) {
1525	if (mask) {
1526	load32WithUnalignedHalfWords(address: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), dest: character);
1527	if (mask)
1528	or32(Imm32 (mask), character);
1529	op.m_jumps.append(branch32(NotEqual, character, Imm32 (characters \| mask)));
1530	return;
1531	}
1532	op.m_jumps.append(branch32WithUnalignedHalfWords(cond: NotEqual, left: negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), right: TrustedImm32 (characters)));
1533	};
1534
1535	#if CPU(X86_64) \|\| CPU(ARM64)
1536	auto check4 = [&] (Checked<unsigned> offset, uint64_t characters, uint64_t mask) {
1537	load64(negativeOffsetIndexedAddress(negativeCharacterOffset: offset, tempReg: character), character);
1538	if (mask)
1539	or64(TrustedImm64 (mask), character);
1540	op.m_jumps.append(branch64(NotEqual, character, TrustedImm64 (characters \| mask)));
1541	};
1542	#endif
1543
1544	switch (numberCharacters) {
1545	case `1`:
1546	// Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
1547	check1(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`);
1548	return;
1549	case `2`:
1550	check2(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`, ignoreCaseMask & `0xffffffff`);
1551	return;
1552	#if CPU(X86_64) \|\| CPU(ARM64)
1553	case `3`:
1554	check2(m_checkedOffset - startTermPosition, allCharacters & `0xffffffff`, ignoreCaseMask & `0xffffffff`);
1555	check1(m_checkedOffset - startTermPosition - `2`, (allCharacters >> `32`) & `0xffff`);
1556	return;
1557	case `4`:
1558	check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
1559	return;
1560	#endif
1561	}
1562	}
1563	}
1564	void backtrackPatternCharacterOnce(size_t opIndex)
1565	{
1566	backtrackTermDefault(opIndex);
1567	}
1568
1569	void generatePatternCharacterFixed(size_t opIndex)
1570	{
1571	YarrOp& op = m_ops[opIndex];
1572	PatternTerm* term = op.m_term;
1573	UChar32 ch = term->patternCharacter;
1574
1575	const RegisterID character = regT0;
1576	const RegisterID countRegister = regT1;
1577
1578	if (m_decodeSurrogatePairs)
1579	op.m_jumps.append(jumpIfNoAvailableInput());
1580
1581	move(index, countRegister);
1582	Checked<unsigned> scaledMaxCount = term->quantityMaxCount;
1583	scaledMaxCount *= U_IS_BMP(ch) ? `1` : `2`;
1584	sub32(Imm32 (scaledMaxCount.unsafeGet()), countRegister);
1585
1586	Label loop(this);
1587	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition - scaledMaxCount, resultReg: character, indexReg: countRegister);
1588	// For case-insesitive compares, non-ascii characters that have different
1589	// upper & lower case representations are converted to a character class.
1590	ASSERT(!m_pattern.ignoreCase() \|\| isASCIIAlpha(ch) \|\| isCanonicallyUnique(ch, m_canonicalMode));
1591	if (m_pattern.ignoreCase() && isASCIIAlpha(c: ch)) {
1592	or32(TrustedImm32 (`0x20`), character);
1593	ch \|= `0x20`;
1594	}
1595
1596	op.m_jumps.append(branch32(NotEqual, character, Imm32 (ch)));
1597	#ifdef JIT_UNICODE_EXPRESSIONS
1598	if (m_decodeSurrogatePairs && !U_IS_BMP(ch))
1599	add32(TrustedImm32 (`2`), countRegister);
1600	else
1601	#endif
1602	add32(TrustedImm32 (`1`), countRegister);
1603	branch32(NotEqual, countRegister, index).linkTo(loop, this);
1604	}
1605	void backtrackPatternCharacterFixed(size_t opIndex)
1606	{
1607	backtrackTermDefault(opIndex);
1608	}
1609
1610	void generatePatternCharacterGreedy(size_t opIndex)
1611	{
1612	YarrOp& op = m_ops[opIndex];
1613	PatternTerm* term = op.m_term;
1614	UChar32 ch = term->patternCharacter;
1615
1616	const RegisterID character = regT0;
1617	const RegisterID countRegister = regT1;
1618
1619	move(TrustedImm32 (`0`), countRegister);
1620
1621	// Unless have a 16 bit pattern character and an 8 bit string - short circuit
1622	if (!((ch > `0xff`) && (m_charSize == Char8))) {
1623	JumpList failures;
1624	Label loop(this);
1625	failures.append(atEndOfInput());
1626	failures.append(jumpIfCharNotEquals(ch, negativeCharacterOffset: m_checkedOffset - term->inputPosition, character));
1627
1628	add32(TrustedImm32 (`1`), index);
1629	#ifdef JIT_UNICODE_EXPRESSIONS
1630	if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1631	Jump surrogatePairOk = notAtEndOfInput();
1632	sub32(TrustedImm32 (`1`), index);
1633	failures.append(jump());
1634	surrogatePairOk.link(masm: this);
1635	add32(TrustedImm32 (`1`), index);
1636	}
1637	#endif
1638	add32(TrustedImm32 (`1`), countRegister);
1639
1640	if (term->quantityMaxCount == quantifyInfinite)
1641	jump(loop);
1642	else
1643	branch32(NotEqual, countRegister, Imm32 (term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1644
1645	failures.link(masm: this);
1646	}
1647	op.m_reentry = label();
1648
1649	storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1650	}
1651	void backtrackPatternCharacterGreedy(size_t opIndex)
1652	{
1653	YarrOp& op = m_ops[opIndex];
1654	PatternTerm* term = op.m_term;
1655
1656	const RegisterID countRegister = regT1;
1657
1658	m_backtrackingState.link(this);
1659
1660	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), reg: countRegister);
1661	m_backtrackingState.append(branchTest32(Zero, countRegister));
1662	sub32(TrustedImm32 (`1`), countRegister);
1663	if (!m_decodeSurrogatePairs \|\| U_IS_BMP(term->patternCharacter))
1664	sub32(TrustedImm32 (`1`), index);
1665	else
1666	sub32(TrustedImm32 (`2`), index);
1667	jump(op.m_reentry);
1668	}
1669
1670	void generatePatternCharacterNonGreedy(size_t opIndex)
1671	{
1672	YarrOp& op = m_ops[opIndex];
1673	PatternTerm* term = op.m_term;
1674
1675	const RegisterID countRegister = regT1;
1676
1677	move(TrustedImm32 (`0`), countRegister);
1678	op.m_reentry = label();
1679	storeToFrame(countRegister, term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex());
1680	}
1681	void backtrackPatternCharacterNonGreedy(size_t opIndex)
1682	{
1683	YarrOp& op = m_ops[opIndex];
1684	PatternTerm* term = op.m_term;
1685	UChar32 ch = term->patternCharacter;
1686
1687	const RegisterID character = regT0;
1688	const RegisterID countRegister = regT1;
1689
1690	m_backtrackingState.link(this);
1691
1692	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoPatternCharacter::matchAmountIndex(), reg: countRegister);
1693
1694	// Unless have a 16 bit pattern character and an 8 bit string - short circuit
1695	if (!((ch > `0xff`) && (m_charSize == Char8))) {
1696	JumpList nonGreedyFailures;
1697	nonGreedyFailures.append(atEndOfInput());
1698	if (term->quantityMaxCount != quantifyInfinite)
1699	nonGreedyFailures.append(branch32(Equal, countRegister, Imm32 (term->quantityMaxCount.unsafeGet())));
1700	nonGreedyFailures.append(jumpIfCharNotEquals(ch, negativeCharacterOffset: m_checkedOffset - term->inputPosition, character));
1701
1702	add32(TrustedImm32 (`1`), index);
1703	#ifdef JIT_UNICODE_EXPRESSIONS
1704	if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1705	Jump surrogatePairOk = notAtEndOfInput();
1706	sub32(TrustedImm32 (`1`), index);
1707	nonGreedyFailures.append(jump());
1708	surrogatePairOk.link(masm: this);
1709	add32(TrustedImm32 (`1`), index);
1710	}
1711	#endif
1712	add32(TrustedImm32 (`1`), countRegister);
1713
1714	jump(op.m_reentry);
1715	nonGreedyFailures.link(masm: this);
1716	}
1717
1718	if (m_decodeSurrogatePairs && !U_IS_BMP(ch)) {
1719	// subtract countRegister2 for non-BMP characters*
1720	lshift32(TrustedImm32 (`1`), countRegister);
1721	}
1722
1723	sub32(countRegister, index);
1724	m_backtrackingState.fallthrough();
1725	}
1726
1727	void generateCharacterClassOnce(size_t opIndex)
1728	{
1729	YarrOp& op = m_ops[opIndex];
1730	PatternTerm* term = op.m_term;
1731
1732	const RegisterID character = regT0;
1733
1734	if (m_decodeSurrogatePairs) {
1735	op.m_jumps.append(jumpIfNoAvailableInput());
1736	storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1737	}
1738
1739	JumpList matchDest;
1740	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1741	// If we are matching the "any character" builtin class we only need to read the
1742	// character and don't need to match as it will always succeed.
1743	if (term->invert() \|\| !term->characterClass->m_anyCharacter) {
1744	matchCharacterClass(character, matchDest, charClass: term->characterClass);
1745
1746	if (term->invert())
1747	op.m_jumps.append(matchDest);
1748	else {
1749	op.m_jumps.append(jump());
1750	matchDest.link(masm: this);
1751	}
1752	}
1753	#ifdef JIT_UNICODE_EXPRESSIONS
1754	if (m_decodeSurrogatePairs) {
1755	Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1756	add32(TrustedImm32 (`1`), index);
1757	isBMPChar.link(masm: this);
1758	}
1759	#endif
1760	}
1761	void backtrackCharacterClassOnce(size_t opIndex)
1762	{
1763	#ifdef JIT_UNICODE_EXPRESSIONS
1764	if (m_decodeSurrogatePairs) {
1765	YarrOp& op = m_ops[opIndex];
1766	PatternTerm* term = op.m_term;
1767
1768	m_backtrackingState.link(this);
1769	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), reg: index);
1770	m_backtrackingState.fallthrough();
1771	}
1772	#endif
1773	backtrackTermDefault(opIndex);
1774	}
1775
1776	void generateCharacterClassFixed(size_t opIndex)
1777	{
1778	YarrOp& op = m_ops[opIndex];
1779	PatternTerm* term = op.m_term;
1780
1781	const RegisterID character = regT0;
1782	const RegisterID countRegister = regT1;
1783
1784	if (m_decodeSurrogatePairs)
1785	op.m_jumps.append(jumpIfNoAvailableInput());
1786
1787	move(index, countRegister);
1788	sub32(Imm32 (term->quantityMaxCount.unsafeGet()), countRegister);
1789
1790	Label loop(this);
1791	JumpList matchDest;
1792	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition - term->quantityMaxCount, resultReg: character, indexReg: countRegister);
1793	// If we are matching the "any character" builtin class we only need to read the
1794	// character and don't need to match as it will always succeed.
1795	if (term->invert() \|\| !term->characterClass->m_anyCharacter) {
1796	matchCharacterClass(character, matchDest, charClass: term->characterClass);
1797
1798	if (term->invert())
1799	op.m_jumps.append(matchDest);
1800	else {
1801	op.m_jumps.append(jump());
1802	matchDest.link(masm: this);
1803	}
1804	}
1805
1806	add32(TrustedImm32 (`1`), countRegister);
1807	#ifdef JIT_UNICODE_EXPRESSIONS
1808	if (m_decodeSurrogatePairs) {
1809	Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1810	op.m_jumps.append(atEndOfInput());
1811	add32(TrustedImm32 (`1`), countRegister);
1812	add32(TrustedImm32 (`1`), index);
1813	isBMPChar.link(masm: this);
1814	}
1815	#endif
1816	branch32(NotEqual, countRegister, index).linkTo(loop, this);
1817	}
1818	void backtrackCharacterClassFixed(size_t opIndex)
1819	{
1820	backtrackTermDefault(opIndex);
1821	}
1822
1823	void generateCharacterClassGreedy(size_t opIndex)
1824	{
1825	YarrOp& op = m_ops[opIndex];
1826	PatternTerm* term = op.m_term;
1827
1828	const RegisterID character = regT0;
1829	const RegisterID countRegister = regT1;
1830
1831	if (m_decodeSurrogatePairs)
1832	storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1833	move(TrustedImm32 (`0`), countRegister);
1834
1835	JumpList failures;
1836	Label loop(this);
1837	failures.append(atEndOfInput());
1838
1839	if (term->invert()) {
1840	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1841	matchCharacterClass(character, matchDest&: failures, charClass: term->characterClass);
1842	} else {
1843	JumpList matchDest;
1844	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1845	// If we are matching the "any character" builtin class we only need to read the
1846	// character and don't need to match as it will always succeed.
1847	if (!term->characterClass->m_anyCharacter) {
1848	matchCharacterClass(character, matchDest, charClass: term->characterClass);
1849	failures.append(jump());
1850	}
1851	matchDest.link(masm: this);
1852	}
1853
1854	add32(TrustedImm32 (`1`), index);
1855	#ifdef JIT_UNICODE_EXPRESSIONS
1856	if (m_decodeSurrogatePairs) {
1857	failures.append(atEndOfInput());
1858	Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1859	add32(TrustedImm32 (`1`), index);
1860	isBMPChar.link(masm: this);
1861	}
1862	#endif
1863	add32(TrustedImm32 (`1`), countRegister);
1864
1865	if (term->quantityMaxCount != quantifyInfinite) {
1866	branch32(NotEqual, countRegister, Imm32 (term->quantityMaxCount.unsafeGet())).linkTo(loop, this);
1867	failures.append(jump());
1868	} else
1869	jump(loop);
1870
1871	failures.link(masm: this);
1872	op.m_reentry = label();
1873
1874	storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1875	}
1876	void backtrackCharacterClassGreedy(size_t opIndex)
1877	{
1878	YarrOp& op = m_ops[opIndex];
1879	PatternTerm* term = op.m_term;
1880
1881	const RegisterID countRegister = regT1;
1882
1883	m_backtrackingState.link(this);
1884
1885	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), reg: countRegister);
1886	m_backtrackingState.append(branchTest32(Zero, countRegister));
1887	sub32(TrustedImm32 (`1`), countRegister);
1888	if (!m_decodeSurrogatePairs)
1889	sub32(TrustedImm32 (`1`), index);
1890	else {
1891	const RegisterID character = regT0;
1892
1893	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), reg: index);
1894	// Rematch one less
1895	storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1896
1897	Label rematchLoop(this);
1898	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1899
1900	sub32(TrustedImm32 (`1`), countRegister);
1901	add32(TrustedImm32 (`1`), index);
1902
1903	#ifdef JIT_UNICODE_EXPRESSIONS
1904	Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1905	add32(TrustedImm32 (`1`), index);
1906	isBMPChar.link(masm: this);
1907	#endif
1908
1909	branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
1910
1911	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), reg: countRegister);
1912	}
1913	jump(op.m_reentry);
1914	}
1915
1916	void generateCharacterClassNonGreedy(size_t opIndex)
1917	{
1918	YarrOp& op = m_ops[opIndex];
1919	PatternTerm* term = op.m_term;
1920
1921	const RegisterID countRegister = regT1;
1922
1923	move(TrustedImm32 (`0`), countRegister);
1924	op.m_reentry = label();
1925	if (m_decodeSurrogatePairs)
1926	storeToFrame(index, term->frameLocation + BackTrackInfoCharacterClass::beginIndex());
1927	storeToFrame(countRegister, term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex());
1928	}
1929
1930	void backtrackCharacterClassNonGreedy(size_t opIndex)
1931	{
1932	YarrOp& op = m_ops[opIndex];
1933	PatternTerm* term = op.m_term;
1934
1935	const RegisterID character = regT0;
1936	const RegisterID countRegister = regT1;
1937
1938	JumpList nonGreedyFailures;
1939
1940	m_backtrackingState.link(this);
1941
1942	if (m_decodeSurrogatePairs)
1943	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), reg: index);
1944	loadFromFrame(frameLocation: term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), reg: countRegister);
1945
1946	nonGreedyFailures.append(atEndOfInput());
1947	nonGreedyFailures.append(branch32(Equal, countRegister, Imm32 (term->quantityMaxCount.unsafeGet())));
1948
1949	JumpList matchDest;
1950	readCharacter(negativeCharacterOffset: m_checkedOffset - term->inputPosition, resultReg: character);
1951	// If we are matching the "any character" builtin class we only need to read the
1952	// character and don't need to match as it will always succeed.
1953	if (term->invert() \|\| !term->characterClass->m_anyCharacter) {
1954	matchCharacterClass(character, matchDest, charClass: term->characterClass);
1955
1956	if (term->invert())
1957	nonGreedyFailures.append(other: matchDest);
1958	else {
1959	nonGreedyFailures.append(jump());
1960	matchDest.link(masm: this);
1961	}
1962	}
1963
1964	add32(TrustedImm32 (`1`), index);
1965	#ifdef JIT_UNICODE_EXPRESSIONS
1966	if (m_decodeSurrogatePairs) {
1967	nonGreedyFailures.append(atEndOfInput());
1968	Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
1969	add32(TrustedImm32 (`1`), index);
1970	isBMPChar.link(masm: this);
1971	}
1972	#endif
1973	add32(TrustedImm32 (`1`), countRegister);
1974
1975	jump(op.m_reentry);
1976
1977	nonGreedyFailures.link(masm: this);
1978	sub32(countRegister, index);
1979	m_backtrackingState.fallthrough();
1980	}
1981
1982	void generateDotStarEnclosure(size_t opIndex)
1983	{
1984	YarrOp& op = m_ops[opIndex];
1985	PatternTerm* term = op.m_term;
1986
1987	const RegisterID character = regT0;
1988	const RegisterID matchPos = regT1;
1989	#ifndef HAVE_INITIAL_START_REG
1990	const RegisterID initialStart = character;
1991	#endif
1992
1993	JumpList foundBeginningNewLine;
1994	JumpList saveStartIndex;
1995	JumpList foundEndingNewLine;
1996
1997	if (m_pattern.dotAll()) {
1998	move(TrustedImm32 (`0`), matchPos);
1999	setMatchStart(matchPos);
2000	move(length, index);
2001	return;
2002	}
2003
2004	ASSERT(!m_pattern.m_body->m_hasFixedSize);
2005	getMatchStart(reg: matchPos);
2006
2007	#ifndef HAVE_INITIAL_START_REG
2008	loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2009	#endif
2010	saveStartIndex.append(branch32(BelowOrEqual, matchPos, initialStart));
2011	Label findBOLLoop(this);
2012	sub32(TrustedImm32 (`1`), matchPos);
2013	if (m_charSize == Char8)
2014	load8(BaseIndex (input, matchPos, TimesOne, `0`), character);
2015	else
2016	load16(BaseIndex (input, matchPos, TimesTwo, `0`), character);
2017	matchCharacterClass(character, matchDest&: foundBeginningNewLine, charClass: m_pattern.newlineCharacterClass());
2018
2019	#ifndef HAVE_INITIAL_START_REG
2020	loadFromFrame(m_pattern.m_initialStartValueFrameLocation, initialStart);
2021	#endif
2022	branch32(Above, matchPos, initialStart).linkTo(findBOLLoop, this);
2023	saveStartIndex.append(jump());
2024
2025	foundBeginningNewLine.link(masm: this);
2026	add32(TrustedImm32 (`1`), matchPos); // Advance past newline
2027	saveStartIndex.link(masm: this);
2028
2029	if (!m_pattern.multiline() && term->anchors.bolAnchor)
2030	op.m_jumps.append(branchTest32(NonZero, matchPos));
2031
2032	ASSERT(!m_pattern.m_body->m_hasFixedSize);
2033	setMatchStart(matchPos);
2034
2035	move(index, matchPos);
2036
2037	Label findEOLLoop(this);
2038	foundEndingNewLine.append(branch32(Equal, matchPos, length));
2039	if (m_charSize == Char8)
2040	load8(BaseIndex (input, matchPos, TimesOne, `0`), character);
2041	else
2042	load16(BaseIndex (input, matchPos, TimesTwo, `0`), character);
2043	matchCharacterClass(character, matchDest&: foundEndingNewLine, charClass: m_pattern.newlineCharacterClass());
2044	add32(TrustedImm32 (`1`), matchPos);
2045	jump(findEOLLoop);
2046
2047	foundEndingNewLine.link(masm: this);
2048
2049	if (!m_pattern.multiline() && term->anchors.eolAnchor)
2050	op.m_jumps.append(branch32(NotEqual, matchPos, length));
2051
2052	move(matchPos, index);
2053	}
2054
2055	void backtrackDotStarEnclosure(size_t opIndex)
2056	{
2057	backtrackTermDefault(opIndex);
2058	}
2059
2060	// Code generation/backtracking for simple terms
2061	// (pattern characters, character classes, and assertions).
2062	// These methods farm out work to the set of functions above.
2063	void generateTerm(size_t opIndex)
2064	{
2065	YarrOp& op = m_ops[opIndex];
2066	PatternTerm* term = op.m_term;
2067
2068	switch (term->type) {
2069	case PatternTerm::TypePatternCharacter:
2070	switch (term->quantityType) {
2071	case QuantifierFixedCount:
2072	if (term->quantityMaxCount == `1`)
2073	generatePatternCharacterOnce(opIndex);
2074	else
2075	generatePatternCharacterFixed(opIndex);
2076	break;
2077	case QuantifierGreedy:
2078	generatePatternCharacterGreedy(opIndex);
2079	break;
2080	case QuantifierNonGreedy:
2081	generatePatternCharacterNonGreedy(opIndex);
2082	break;
2083	}
2084	break;
2085
2086	case PatternTerm::TypeCharacterClass:
2087	switch (term->quantityType) {
2088	case QuantifierFixedCount:
2089	if (term->quantityMaxCount == `1`)
2090	generateCharacterClassOnce(opIndex);
2091	else
2092	generateCharacterClassFixed(opIndex);
2093	break;
2094	case QuantifierGreedy:
2095	generateCharacterClassGreedy(opIndex);
2096	break;
2097	case QuantifierNonGreedy:
2098	generateCharacterClassNonGreedy(opIndex);
2099	break;
2100	}
2101	break;
2102
2103	case PatternTerm::TypeAssertionBOL:
2104	generateAssertionBOL(opIndex);
2105	break;
2106
2107	case PatternTerm::TypeAssertionEOL:
2108	generateAssertionEOL(opIndex);
2109	break;
2110
2111	case PatternTerm::TypeAssertionWordBoundary:
2112	generateAssertionWordBoundary(opIndex);
2113	break;
2114
2115	case PatternTerm::TypeForwardReference:
2116	m_failureReason = JITFailureReason::ForwardReference;
2117	break;
2118
2119	case PatternTerm::TypeParenthesesSubpattern:
2120	case PatternTerm::TypeParentheticalAssertion:
2121	RELEASE_ASSERT_NOT_REACHED();
2122
2123	case PatternTerm::TypeBackReference:
2124	#if ENABLE(YARR_JIT_BACKREFERENCES)
2125	generateBackReference(opIndex);
2126	#else
2127	m_failureReason = JITFailureReason::BackReference;
2128	#endif
2129	break;
2130	case PatternTerm::TypeDotStarEnclosure:
2131	generateDotStarEnclosure(opIndex);
2132	break;
2133	}
2134	}
2135	void backtrackTerm(size_t opIndex)
2136	{
2137	YarrOp& op = m_ops[opIndex];
2138	PatternTerm* term = op.m_term;
2139
2140	switch (term->type) {
2141	case PatternTerm::TypePatternCharacter:
2142	switch (term->quantityType) {
2143	case QuantifierFixedCount:
2144	if (term->quantityMaxCount == `1`)
2145	backtrackPatternCharacterOnce(opIndex);
2146	else
2147	backtrackPatternCharacterFixed(opIndex);
2148	break;
2149	case QuantifierGreedy:
2150	backtrackPatternCharacterGreedy(opIndex);
2151	break;
2152	case QuantifierNonGreedy:
2153	backtrackPatternCharacterNonGreedy(opIndex);
2154	break;
2155	}
2156	break;
2157
2158	case PatternTerm::TypeCharacterClass:
2159	switch (term->quantityType) {
2160	case QuantifierFixedCount:
2161	if (term->quantityMaxCount == `1`)
2162	backtrackCharacterClassOnce(opIndex);
2163	else
2164	backtrackCharacterClassFixed(opIndex);
2165	break;
2166	case QuantifierGreedy:
2167	backtrackCharacterClassGreedy(opIndex);
2168	break;
2169	case QuantifierNonGreedy:
2170	backtrackCharacterClassNonGreedy(opIndex);
2171	break;
2172	}
2173	break;
2174
2175	case PatternTerm::TypeAssertionBOL:
2176	backtrackAssertionBOL(opIndex);
2177	break;
2178
2179	case PatternTerm::TypeAssertionEOL:
2180	backtrackAssertionEOL(opIndex);
2181	break;
2182
2183	case PatternTerm::TypeAssertionWordBoundary:
2184	backtrackAssertionWordBoundary(opIndex);
2185	break;
2186
2187	case PatternTerm::TypeForwardReference:
2188	m_failureReason = JITFailureReason::ForwardReference;
2189	break;
2190
2191	case PatternTerm::TypeParenthesesSubpattern:
2192	case PatternTerm::TypeParentheticalAssertion:
2193	RELEASE_ASSERT_NOT_REACHED();
2194
2195	case PatternTerm::TypeBackReference:
2196	#if ENABLE(YARR_JIT_BACKREFERENCES)
2197	backtrackBackReference(opIndex);
2198	#else
2199	m_failureReason = JITFailureReason::BackReference;
2200	#endif
2201	break;
2202
2203	case PatternTerm::TypeDotStarEnclosure:
2204	backtrackDotStarEnclosure(opIndex);
2205	break;
2206	}
2207	}
2208
2209	void generate()
2210	{
2211	// Forwards generate the matching code.
2212	ASSERT(m_ops.size());
2213	size_t opIndex = `0`;
2214
2215	do {
2216	YarrOp& op = m_ops[opIndex];
2217	switch (op.m_op) {
2218
2219	case OpTerm:
2220	generateTerm(opIndex);
2221	break;
2222
2223	// OpBodyAlternativeBegin/Next/End
2224	//
2225	// These nodes wrap the set of alternatives in the body of the regular expression.
2226	// There may be either one or two chains of OpBodyAlternative nodes, one representing
2227	// the 'once through' sequence of alternatives (if any exist), and one representing
2228	// the repeating alternatives (again, if any exist).
2229	//
2230	// Upon normal entry to the Begin alternative, we will check that input is available.
2231	// Reentry to the Begin alternative will take place after the check has taken place,
2232	// and will assume that the input position has already been progressed as appropriate.
2233	//
2234	// Entry to subsequent Next/End alternatives occurs when the prior alternative has
2235	// successfully completed a match - return a success state from JIT code.
2236	//
2237	// Next alternatives allow for reentry optimized to suit backtracking from its
2238	// preceding alternative. It expects the input position to still be set to a position
2239	// appropriate to its predecessor, and it will only perform an input check if the
2240	// predecessor had a minimum size less than its own.
2241	//
2242	// In the case 'once through' expressions, the End node will also have a reentry
2243	// point to jump to when the last alternative fails. Again, this expects the input
2244	// position to still reflect that expected by the prior alternative.
2245	case OpBodyAlternativeBegin: {
2246	PatternAlternative* alternative = op.m_alternative;
2247
2248	// Upon entry at the head of the set of alternatives, check if input is available
2249	// to run the first alternative. (This progresses the input position).
2250	op.m_jumps.append(jumpIfNoAvailableInput(countToCheck: alternative->m_minimumSize));
2251	// We will reenter after the check, and assume the input position to have been
2252	// set as appropriate to this alternative.
2253	op.m_reentry = label();
2254
2255	m_checkedOffset += alternative->m_minimumSize;
2256	break;
2257	}
2258	case OpBodyAlternativeNext:
2259	case OpBodyAlternativeEnd: {
2260	PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2261	PatternAlternative* alternative = op.m_alternative;
2262
2263	// If we get here, the prior alternative matched - return success.
2264
2265	// Adjust the stack pointer to remove the pattern's frame.
2266	removeCallFrame();
2267
2268	// Load appropriate values into the return register and the first output
2269	// slot, and return. In the case of pattern with a fixed size, we will
2270	// not have yet set the value in the first
2271	ASSERT(index != returnRegister);
2272	if (m_pattern.m_body->m_hasFixedSize) {
2273	move(index, returnRegister);
2274	if (priorAlternative->m_minimumSize)
2275	sub32(Imm32 (priorAlternative->m_minimumSize), returnRegister);
2276	if (compileMode == IncludeSubpatterns)
2277	store32(returnRegister, output);
2278	} else
2279	getMatchStart(reg: returnRegister);
2280	if (compileMode == IncludeSubpatterns)
2281	store32(index, Address (output, `4`));
2282	move(index, returnRegister2);
2283
2284	generateReturn();
2285
2286	// This is the divide between the tail of the prior alternative, above, and
2287	// the head of the subsequent alternative, below.
2288
2289	if (op.m_op == OpBodyAlternativeNext) {
2290	// This is the reentry point for the Next alternative. We expect any code
2291	// that jumps here to do so with the input position matching that of the
2292	// PRIOR alteranative, and we will only check input availability if we
2293	// need to progress it forwards.
2294	op.m_reentry = label();
2295	if (alternative->m_minimumSize > priorAlternative->m_minimumSize) {
2296	add32(Imm32 (alternative->m_minimumSize - priorAlternative->m_minimumSize), index);
2297	op.m_jumps.append(jumpIfNoAvailableInput());
2298	} else if (priorAlternative->m_minimumSize > alternative->m_minimumSize)
2299	sub32(Imm32 (priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
2300	} else if (op.m_nextOp == notFound) {
2301	// This is the reentry point for the End of 'once through' alternatives,
2302	// jumped to when the last alternative fails to match.
2303	op.m_reentry = label();
2304	sub32(Imm32 (priorAlternative->m_minimumSize), index);
2305	}
2306
2307	if (op.m_op == OpBodyAlternativeNext)
2308	m_checkedOffset += alternative->m_minimumSize;
2309	m_checkedOffset -= priorAlternative->m_minimumSize;
2310	break;
2311	}
2312
2313	// OpSimpleNestedAlternativeBegin/Next/End
2314	// OpNestedAlternativeBegin/Next/End
2315	//
2316	// These nodes are used to handle sets of alternatives that are nested within
2317	// subpatterns and parenthetical assertions. The 'simple' forms are used where
2318	// we do not need to be able to backtrack back into any alternative other than
2319	// the last, the normal forms allow backtracking into any alternative.
2320	//
2321	// Each Begin/Next node is responsible for planting an input check to ensure
2322	// sufficient input is available on entry. Next nodes additionally need to
2323	// jump to the end - Next nodes use the End node's m_jumps list to hold this
2324	// set of jumps.
2325	//
2326	// In the non-simple forms, successful alternative matches must store a
2327	// 'return address' using a DataLabelPtr, used to store the address to jump
2328	// to when backtracking, to get to the code for the appropriate alternative.
2329	case OpSimpleNestedAlternativeBegin:
2330	case OpNestedAlternativeBegin: {
2331	PatternTerm* term = op.m_term;
2332	PatternAlternative* alternative = op.m_alternative;
2333	PatternDisjunction* disjunction = term->parentheses.disjunction;
2334
2335	// Calculate how much input we need to check for, and if non-zero check.
2336	op.m_checkAdjust = Checked<unsigned>(alternative->m_minimumSize);
2337	if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2338	op.m_checkAdjust -= disjunction->m_minimumSize;
2339	if (op.m_checkAdjust)
2340	op.m_jumps.append(jumpIfNoAvailableInput(countToCheck: op.m_checkAdjust.unsafeGet()));
2341
2342	m_checkedOffset += op.m_checkAdjust;
2343	break;
2344	}
2345	case OpSimpleNestedAlternativeNext:
2346	case OpNestedAlternativeNext: {
2347	PatternTerm* term = op.m_term;
2348	PatternAlternative* alternative = op.m_alternative;
2349	PatternDisjunction* disjunction = term->parentheses.disjunction;
2350
2351	// In the non-simple case, store a 'return address' so we can backtrack correctly.
2352	if (op.m_op == OpNestedAlternativeNext) {
2353	unsigned parenthesesFrameLocation = term->frameLocation;
2354	op.m_returnAddress = storeToFrameWithPatch(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2355	}
2356
2357	if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2358	// If the previous alternative matched without consuming characters then
2359	// backtrack to try to match while consumming some input.
2360	op.m_zeroLengthMatch = branch32(Equal, index, Address (stackPointerRegister, term->frameLocation * sizeof(void*)));
2361	}
2362
2363	// If we reach here then the last alternative has matched - jump to the
2364	// End node, to skip over any further alternatives.
2365	//
2366	// FIXME: this is logically O(N^2) (though N can be expected to be very
2367	// small). We could avoid this either by adding an extra jump to the JIT
2368	// data structures, or by making backtracking code that jumps to Next
2369	// alternatives are responsible for checking that input is available (if
2370	// we didn't need to plant the input checks, then m_jumps would be free).
2371	YarrOp* endOp = &m_ops[op.m_nextOp];
2372	while (endOp->m_nextOp != notFound) {
2373	ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext \|\| endOp->m_op == OpNestedAlternativeNext);
2374	endOp = &m_ops[endOp->m_nextOp];
2375	}
2376	ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd \|\| endOp->m_op == OpNestedAlternativeEnd);
2377	endOp->m_jumps.append(jump());
2378
2379	// This is the entry point for the next alternative.
2380	op.m_reentry = label();
2381
2382	// Calculate how much input we need to check for, and if non-zero check.
2383	op.m_checkAdjust = alternative->m_minimumSize;
2384	if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
2385	op.m_checkAdjust -= disjunction->m_minimumSize;
2386	if (op.m_checkAdjust)
2387	op.m_jumps.append(jumpIfNoAvailableInput(countToCheck: op.m_checkAdjust.unsafeGet()));
2388
2389	YarrOp& lastOp = m_ops[op.m_previousOp];
2390	m_checkedOffset -= lastOp.m_checkAdjust;
2391	m_checkedOffset += op.m_checkAdjust;
2392	break;
2393	}
2394	case OpSimpleNestedAlternativeEnd:
2395	case OpNestedAlternativeEnd: {
2396	PatternTerm* term = op.m_term;
2397
2398	// In the non-simple case, store a 'return address' so we can backtrack correctly.
2399	if (op.m_op == OpNestedAlternativeEnd) {
2400	unsigned parenthesesFrameLocation = term->frameLocation;
2401	op.m_returnAddress = storeToFrameWithPatch(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
2402	}
2403
2404	if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
2405	// If the previous alternative matched without consuming characters then
2406	// backtrack to try to match while consumming some input.
2407	op.m_zeroLengthMatch = branch32(Equal, index, Address (stackPointerRegister, term->frameLocation * sizeof(void*)));
2408	}
2409
2410	// If this set of alternatives contains more than one alternative,
2411	// then the Next nodes will have planted jumps to the End, and added
2412	// them to this node's m_jumps list.
2413	op.m_jumps.link(this);
2414	op.m_jumps.clear();
2415
2416	YarrOp& lastOp = m_ops[op.m_previousOp];
2417	m_checkedOffset -= lastOp.m_checkAdjust;
2418	break;
2419	}
2420
2421	// OpParenthesesSubpatternOnceBegin/End
2422	//
2423	// These nodes support (optionally) capturing subpatterns, that have a
2424	// quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
2425	case OpParenthesesSubpatternOnceBegin: {
2426	PatternTerm* term = op.m_term;
2427	unsigned parenthesesFrameLocation = term->frameLocation;
2428	const RegisterID indexTemporary = regT0;
2429	ASSERT(term->quantityMaxCount == `1`);
2430
2431	// Upon entry to a Greedy quantified set of parenthese store the index.
2432	// We'll use this for two purposes:
2433	// - To indicate which iteration we are on of mathing the remainder of
2434	// the expression after the parentheses - the first, including the
2435	// match within the parentheses, or the second having skipped over them.
2436	// - To check for empty matches, which must be rejected.
2437	//
2438	// At the head of a NonGreedy set of parentheses we'll immediately set the
2439	// value on the stack to -1 (indicating a match skipping the subpattern),
2440	// and plant a jump to the end. We'll also plant a label to backtrack to
2441	// to reenter the subpattern later, with a store to set up index on the
2442	// second iteration.
2443	//
2444	// FIXME: for capturing parens, could use the index in the capture array?
2445	if (term->quantityType == QuantifierGreedy)
2446	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2447	else if (term->quantityType == QuantifierNonGreedy) {
2448	storeToFrame(TrustedImm32 (-`1`), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2449	op.m_jumps.append(jump());
2450	op.m_reentry = label();
2451	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
2452	}
2453
2454	// If the parenthese are capturing, store the starting index value to the
2455	// captures array, offsetting as necessary.
2456	//
2457	// FIXME: could avoid offsetting this value in JIT code, apply
2458	// offsets only afterwards, at the point the results array is
2459	// being accessed.
2460	if (term->capture() && compileMode == IncludeSubpatterns) {
2461	unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2462	if (term->quantityType == QuantifierFixedCount)
2463	inputOffset += term->parentheses.disjunction->m_minimumSize;
2464	if (inputOffset) {
2465	move(index, indexTemporary);
2466	sub32(Imm32 (inputOffset), indexTemporary);
2467	setSubpatternStart(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2468	} else
2469	setSubpatternStart(reg: index, subpattern: term->parentheses.subpatternId);
2470	}
2471	break;
2472	}
2473	case OpParenthesesSubpatternOnceEnd: {
2474	PatternTerm* term = op.m_term;
2475	const RegisterID indexTemporary = regT0;
2476	ASSERT(term->quantityMaxCount == `1`);
2477
2478	// Runtime ASSERT to make sure that the nested alternative handled the
2479	// "no input consumed" check.
2480	if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2481	Jump pastBreakpoint;
2482	pastBreakpoint = branch32(NotEqual, index, Address (stackPointerRegister, term->frameLocation * sizeof(void*)));
2483	// ### abortWithReason(YARRNoInputConsumed);
2484	pastBreakpoint.link(masm: this);
2485	}
2486
2487	// If the parenthese are capturing, store the ending index value to the
2488	// captures array, offsetting as necessary.
2489	//
2490	// FIXME: could avoid offsetting this value in JIT code, apply
2491	// offsets only afterwards, at the point the results array is
2492	// being accessed.
2493	if (term->capture() && compileMode == IncludeSubpatterns) {
2494	unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2495	if (inputOffset) {
2496	move(index, indexTemporary);
2497	sub32(Imm32 (inputOffset), indexTemporary);
2498	setSubpatternEnd(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2499	} else
2500	setSubpatternEnd(reg: index, subpattern: term->parentheses.subpatternId);
2501	}
2502
2503	// If the parentheses are quantified Greedy then add a label to jump back
2504	// to if we get a failed match from after the parentheses. For NonGreedy
2505	// parentheses, link the jump from before the subpattern to here.
2506	if (term->quantityType == QuantifierGreedy)
2507	op.m_reentry = label();
2508	else if (term->quantityType == QuantifierNonGreedy) {
2509	YarrOp& beginOp = m_ops[op.m_previousOp];
2510	beginOp.m_jumps.link(this);
2511	}
2512	break;
2513	}
2514
2515	// OpParenthesesSubpatternTerminalBegin/End
2516	case OpParenthesesSubpatternTerminalBegin: {
2517	PatternTerm* term = op.m_term;
2518	ASSERT(term->quantityType == QuantifierGreedy);
2519	ASSERT(term->quantityMaxCount == quantifyInfinite);
2520	ASSERT(!term->capture());
2521
2522	// Upon entry set a label to loop back to.
2523	op.m_reentry = label();
2524
2525	// Store the start index of the current match; we need to reject zero
2526	// length matches.
2527	storeToFrame(index, term->frameLocation + BackTrackInfoParenthesesTerminal::beginIndex());
2528	break;
2529	}
2530	case OpParenthesesSubpatternTerminalEnd: {
2531	YarrOp& beginOp = m_ops[op.m_previousOp];
2532	if (!ASSERT_DISABLED) {
2533	PatternTerm* term = op.m_term;
2534
2535	// Runtime ASSERT to make sure that the nested alternative handled the
2536	// "no input consumed" check.
2537	Jump pastBreakpoint;
2538	pastBreakpoint = branch32(NotEqual, index, Address (stackPointerRegister, term->frameLocation * sizeof(void*)));
2539	// ### abortWithReason(YARRNoInputConsumed);
2540	pastBreakpoint.link(masm: this);
2541	}
2542
2543	// We know that the match is non-zero, we can accept it and
2544	// loop back up to the head of the subpattern.
2545	jump(beginOp.m_reentry);
2546
2547	// This is the entry point to jump to when we stop matching - we will
2548	// do so once the subpattern cannot match any more.
2549	op.m_reentry = label();
2550	break;
2551	}
2552
2553	// OpParenthesesSubpatternBegin/End
2554	//
2555	// These nodes support generic subpatterns.
2556	case OpParenthesesSubpatternBegin: {
2557	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2558	PatternTerm* term = op.m_term;
2559	unsigned parenthesesFrameLocation = term->frameLocation;
2560
2561	// Upon entry to a Greedy quantified set of parenthese store the index.
2562	// We'll use this for two purposes:
2563	// - To indicate which iteration we are on of mathing the remainder of
2564	// the expression after the parentheses - the first, including the
2565	// match within the parentheses, or the second having skipped over them.
2566	// - To check for empty matches, which must be rejected.
2567	//
2568	// At the head of a NonGreedy set of parentheses we'll immediately set 'begin'
2569	// in the backtrack info to -1 (indicating a match skipping the subpattern),
2570	// and plant a jump to the end. We'll also plant a label to backtrack to
2571	// to reenter the subpattern later, with a store to set 'begin' to current index
2572	// on the second iteration.
2573	//
2574	// FIXME: for capturing parens, could use the index in the capture array?
2575	if (term->quantityType == QuantifierGreedy \|\| term->quantityType == QuantifierNonGreedy) {
2576	storeToFrame(TrustedImm32 (`0`), parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2577	storeToFrame(TrustedImmPtr (nullptr), parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2578
2579	if (term->quantityType == QuantifierNonGreedy) {
2580	storeToFrame(TrustedImm32 (-`1`), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2581	op.m_jumps.append(jump());
2582	}
2583
2584	op.m_reentry = label();
2585	RegisterID currParenContextReg = regT0;
2586	RegisterID newParenContextReg = regT1;
2587
2588	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), reg: currParenContextReg);
2589	allocateParenContext(result: newParenContextReg);
2590	storePtr(currParenContextReg, newParenContextReg);
2591	storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
2592	saveParenContext(parenContextReg: newParenContextReg, tempReg: regT2, firstSubpattern: term->parentheses.subpatternId, lastSubpattern: term->parentheses.lastSubpatternId, subpatternBaseFrameLocation: parenthesesFrameLocation);
2593	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
2594	}
2595
2596	// If the parenthese are capturing, store the starting index value to the
2597	// captures array, offsetting as necessary.
2598	//
2599	// FIXME: could avoid offsetting this value in JIT code, apply
2600	// offsets only afterwards, at the point the results array is
2601	// being accessed.
2602	if (term->capture() && compileMode == IncludeSubpatterns) {
2603	const RegisterID indexTemporary = regT0;
2604	unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2605	if (term->quantityType == QuantifierFixedCount)
2606	inputOffset += term->parentheses.disjunction->m_minimumSize;
2607	if (inputOffset) {
2608	move(index, indexTemporary);
2609	sub32(Imm32 (inputOffset), indexTemporary);
2610	setSubpatternStart(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2611	} else
2612	setSubpatternStart(reg: index, subpattern: term->parentheses.subpatternId);
2613	}
2614	#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2615	RELEASE_ASSERT_NOT_REACHED();
2616	#endif
2617	break;
2618	}
2619	case OpParenthesesSubpatternEnd: {
2620	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
2621	PatternTerm* term = op.m_term;
2622	unsigned parenthesesFrameLocation = term->frameLocation;
2623
2624	// Runtime ASSERT to make sure that the nested alternative handled the
2625	// "no input consumed" check.
2626	if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
2627	Jump pastBreakpoint;
2628	pastBreakpoint = branch32(NotEqual, index, Address (stackPointerRegister, parenthesesFrameLocation * sizeof(void*)));
2629	// ### abortWithReason(YARRNoInputConsumed);
2630	pastBreakpoint.link(masm: this);
2631	}
2632
2633	const RegisterID countTemporary = regT1;
2634
2635	YarrOp& beginOp = m_ops[op.m_previousOp];
2636	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: countTemporary);
2637	add32(TrustedImm32 (`1`), countTemporary);
2638	storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
2639
2640	// If the parenthese are capturing, store the ending index value to the
2641	// captures array, offsetting as necessary.
2642	//
2643	// FIXME: could avoid offsetting this value in JIT code, apply
2644	// offsets only afterwards, at the point the results array is
2645	// being accessed.
2646	if (term->capture() && compileMode == IncludeSubpatterns) {
2647	const RegisterID indexTemporary = regT0;
2648
2649	unsigned inputOffset = (m_checkedOffset - term->inputPosition).unsafeGet();
2650	if (inputOffset) {
2651	move(index, indexTemporary);
2652	sub32(Imm32 (inputOffset), indexTemporary);
2653	setSubpatternEnd(reg: indexTemporary, subpattern: term->parentheses.subpatternId);
2654	} else
2655	setSubpatternEnd(reg: index, subpattern: term->parentheses.subpatternId);
2656	}
2657
2658	// If the parentheses are quantified Greedy then add a label to jump back
2659	// to if we get a failed match from after the parentheses. For NonGreedy
2660	// parentheses, link the jump from before the subpattern to here.
2661	if (term->quantityType == QuantifierGreedy) {
2662	if (term->quantityMaxCount != quantifyInfinite)
2663	branch32(Below, countTemporary, Imm32 (term->quantityMaxCount.unsafeGet())).linkTo(beginOp.m_reentry, this);
2664	else
2665	jump(beginOp.m_reentry);
2666
2667	op.m_reentry = label();
2668	} else if (term->quantityType == QuantifierNonGreedy) {
2669	YarrOp& beginOp = m_ops[op.m_previousOp];
2670	beginOp.m_jumps.link(this);
2671	op.m_reentry = label();
2672	}
2673	#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
2674	RELEASE_ASSERT_NOT_REACHED();
2675	#endif
2676	break;
2677	}
2678
2679	// OpParentheticalAssertionBegin/End
2680	case OpParentheticalAssertionBegin: {
2681	PatternTerm* term = op.m_term;
2682
2683	// Store the current index - assertions should not update index, so
2684	// we will need to restore it upon a successful match.
2685	unsigned parenthesesFrameLocation = term->frameLocation;
2686	storeToFrame(index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
2687
2688	// Check
2689	op.m_checkAdjust = m_checkedOffset - term->inputPosition;
2690	if (op.m_checkAdjust)
2691	sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
2692
2693	m_checkedOffset -= op.m_checkAdjust;
2694	break;
2695	}
2696	case OpParentheticalAssertionEnd: {
2697	PatternTerm* term = op.m_term;
2698
2699	// Restore the input index value.
2700	unsigned parenthesesFrameLocation = term->frameLocation;
2701	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex(), reg: index);
2702
2703	// If inverted, a successful match of the assertion must be treated
2704	// as a failure, so jump to backtracking.
2705	if (term->invert()) {
2706	op.m_jumps.append(jump());
2707	op.m_reentry = label();
2708	}
2709
2710	YarrOp& lastOp = m_ops[op.m_previousOp];
2711	m_checkedOffset += lastOp.m_checkAdjust;
2712	break;
2713	}
2714
2715	case OpMatchFailed:
2716	removeCallFrame();
2717	generateFailReturn();
2718	break;
2719	}
2720
2721	++opIndex;
2722	} while (opIndex < m_ops.size());
2723	}
2724
2725	void backtrack()
2726	{
2727	// Backwards generate the backtracking code.
2728	size_t opIndex = m_ops.size();
2729	ASSERT(opIndex);
2730
2731	do {
2732	--opIndex;
2733
2734	YarrOp& op = m_ops[opIndex];
2735	switch (op.m_op) {
2736
2737	case OpTerm:
2738	backtrackTerm(opIndex);
2739	break;
2740
2741	// OpBodyAlternativeBegin/Next/End
2742	//
2743	// For each Begin/Next node representing an alternative, we need to decide what to do
2744	// in two circumstances:
2745	// - If we backtrack back into this node, from within the alternative.
2746	// - If the input check at the head of the alternative fails (if this exists).
2747	//
2748	// We treat these two cases differently since in the former case we have slightly
2749	// more information - since we are backtracking out of a prior alternative we know
2750	// that at least enough input was available to run it. For example, given the regular
2751	// expression /a\|b/, if we backtrack out of the first alternative (a failed pattern
2752	// character match of 'a'), then we need not perform an additional input availability
2753	// check before running the second alternative.
2754	//
2755	// Backtracking required differs for the last alternative, which in the case of the
2756	// repeating set of alternatives must loop. The code generated for the last alternative
2757	// will also be used to handle all input check failures from any prior alternatives -
2758	// these require similar functionality, in seeking the next available alternative for
2759	// which there is sufficient input.
2760	//
2761	// Since backtracking of all other alternatives simply requires us to link backtracks
2762	// to the reentry point for the subsequent alternative, we will only be generating any
2763	// code when backtracking the last alternative.
2764	case OpBodyAlternativeBegin:
2765	case OpBodyAlternativeNext: {
2766	PatternAlternative* alternative = op.m_alternative;
2767
2768	if (op.m_op == OpBodyAlternativeNext) {
2769	PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2770	m_checkedOffset += priorAlternative->m_minimumSize;
2771	}
2772	m_checkedOffset -= alternative->m_minimumSize;
2773
2774	// Is this the last alternative? If not, then if we backtrack to this point we just
2775	// need to jump to try to match the next alternative.
2776	if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
2777	m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
2778	break;
2779	}
2780	YarrOp& endOp = m_ops[op.m_nextOp];
2781
2782	YarrOp* beginOp = &op;
2783	while (beginOp->m_op != OpBodyAlternativeBegin) {
2784	ASSERT(beginOp->m_op == OpBodyAlternativeNext);
2785	beginOp = &m_ops[beginOp->m_previousOp];
2786	}
2787
2788	bool onceThrough = endOp.m_nextOp == notFound;
2789
2790	JumpList lastStickyAlternativeFailures;
2791
2792	// First, generate code to handle cases where we backtrack out of an attempted match
2793	// of the last alternative. If this is a 'once through' set of alternatives then we
2794	// have nothing to do - link this straight through to the End.
2795	if (onceThrough)
2796	m_backtrackingState.linkTo(endOp.m_reentry, this);
2797	else {
2798	// If we don't need to move the input poistion, and the pattern has a fixed size
2799	// (in which case we omit the store of the start index until the pattern has matched)
2800	// then we can just link the backtrack out of the last alternative straight to the
2801	// head of the first alternative.
2802	if (m_pattern.m_body->m_hasFixedSize
2803	&& (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize)
2804	&& (alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize == `1`))
2805	m_backtrackingState.linkTo(beginOp->m_reentry, this);
2806	else if (m_pattern.sticky() && m_ops[op.m_nextOp].m_op == OpBodyAlternativeEnd) {
2807	// It is a sticky pattern and the last alternative failed, jump to the end.
2808	m_backtrackingState.takeBacktracksToJumpList(lastStickyAlternativeFailures, this);
2809	} else {
2810	// We need to generate a trampoline of code to execute before looping back
2811	// around to the first alternative.
2812	m_backtrackingState.link(this);
2813
2814	// No need to advance and retry for a sticky pattern.
2815	if (!m_pattern.sticky()) {
2816	// If the pattern size is not fixed, then store the start index for use if we match.
2817	if (!m_pattern.m_body->m_hasFixedSize) {
2818	if (alternative->m_minimumSize == `1`)
2819	setMatchStart(index);
2820	else {
2821	move(index, regT0);
2822	if (alternative->m_minimumSize)
2823	sub32(Imm32 (alternative->m_minimumSize - `1`), regT0);
2824	else
2825	add32(TrustedImm32 (`1`), regT0);
2826	setMatchStart(regT0);
2827	}
2828	}
2829
2830	// Generate code to loop. Check whether the last alternative is longer than the
2831	// first (e.g. /a\|xy/ or /a\|xyz/).
2832	if (alternative->m_minimumSize > beginOp->m_alternative->m_minimumSize) {
2833	// We want to loop, and increment input position. If the delta is 1, it is
2834	// already correctly incremented, if more than one then decrement as appropriate.
2835	unsigned delta = alternative->m_minimumSize - beginOp->m_alternative->m_minimumSize;
2836	ASSERT(delta);
2837	if (delta != `1`)
2838	sub32(Imm32 (delta - `1`), index);
2839	jump(beginOp->m_reentry);
2840	} else {
2841	// If the first alternative has minimum size 0xFFFFFFFFu, then there cannot
2842	// be sufficent input available to handle this, so just fall through.
2843	unsigned delta = beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize;
2844	if (delta != `0xFFFFFFFFu`) {
2845	// We need to check input because we are incrementing the input.
2846	add32(Imm32 (delta + `1`), index);
2847	checkInput().linkTo(beginOp->m_reentry, this);
2848	}
2849	}
2850	}
2851	}
2852	}
2853
2854	// We can reach this point in the code in two ways:
2855	// - Fallthrough from the code above (a repeating alternative backtracked out of its
2856	// last alternative, and did not have sufficent input to run the first).
2857	// - We will loop back up to the following label when a repeating alternative loops,
2858	// following a failed input check.
2859	//
2860	// Either way, we have just failed the input check for the first alternative.
2861	Label firstInputCheckFailed(this);
2862
2863	// Generate code to handle input check failures from alternatives except the last.
2864	// prevOp is the alternative we're handling a bail out from (initially Begin), and
2865	// nextOp is the alternative we will be attempting to reenter into.
2866	//
2867	// We will link input check failures from the forwards matching path back to the code
2868	// that can handle them.
2869	YarrOp* prevOp = beginOp;
2870	YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
2871	while (nextOp->m_op != OpBodyAlternativeEnd) {
2872	prevOp->m_jumps.link(this);
2873
2874	// We only get here if an input check fails, it is only worth checking again
2875	// if the next alternative has a minimum size less than the last.
2876	if (prevOp->m_alternative->m_minimumSize > nextOp->m_alternative->m_minimumSize) {
2877	// FIXME: if we added an extra label to YarrOp, we could avoid needing to
2878	// subtract delta back out, and reduce this code. Should performance test
2879	// the benefit of this.
2880	unsigned delta = prevOp->m_alternative->m_minimumSize - nextOp->m_alternative->m_minimumSize;
2881	sub32(Imm32 (delta), index);
2882	Jump fail = jumpIfNoAvailableInput();
2883	add32(Imm32 (delta), index);
2884	jump(nextOp->m_reentry);
2885	fail.link(masm: this);
2886	} else if (prevOp->m_alternative->m_minimumSize < nextOp->m_alternative->m_minimumSize)
2887	add32(Imm32(nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize), index);
2888	prevOp = nextOp;
2889	nextOp = &m_ops[nextOp->m_nextOp];
2890	}
2891
2892	// We fall through to here if there is insufficient input to run the last alternative.
2893
2894	// If there is insufficient input to run the last alternative, then for 'once through'
2895	// alternatives we are done - just jump back up into the forwards matching path at the End.
2896	if (onceThrough) {
2897	op.m_jumps.linkTo(endOp.m_reentry, this);
2898	jump(endOp.m_reentry);
2899	break;
2900	}
2901
2902	// For repeating alternatives, link any input check failure from the last alternative to
2903	// this point.
2904	op.m_jumps.link(this);
2905
2906	bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
2907
2908	// Check for cases where input position is already incremented by 1 for the last
2909	// alternative (this is particularly useful where the minimum size of the body
2910	// disjunction is 0, e.g. /a\|b/).*
2911	if (needsToUpdateMatchStart && alternative->m_minimumSize == `1`) {
2912	// index is already incremented by 1, so just store it now!
2913	setMatchStart(index);
2914	needsToUpdateMatchStart = false;
2915	}
2916
2917	if (!m_pattern.sticky()) {
2918	// Check whether there is sufficient input to loop. Increment the input position by
2919	// one, and check. Also add in the minimum disjunction size before checking - there
2920	// is no point in looping if we're just going to fail all the input checks around
2921	// the next iteration.
2922	ASSERT(alternative->m_minimumSize >= m_pattern.m_body->m_minimumSize);
2923	if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
2924	// If the last alternative had the same minimum size as the disjunction,
2925	// just simply increment input pos by 1, no adjustment based on minimum size.
2926	add32(TrustedImm32 (`1`), index);
2927	} else {
2928	// If the minumum for the last alternative was one greater than than that
2929	// for the disjunction, we're already progressed by 1, nothing to do!
2930	unsigned delta = (alternative->m_minimumSize - m_pattern.m_body->m_minimumSize) - `1`;
2931	if (delta)
2932	sub32(Imm32 (delta), index);
2933	}
2934	Jump matchFailed = jumpIfNoAvailableInput();
2935
2936	if (needsToUpdateMatchStart) {
2937	if (!m_pattern.m_body->m_minimumSize)
2938	setMatchStart(index);
2939	else {
2940	move(index, regT0);
2941	sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
2942	setMatchStart(regT0);
2943	}
2944	}
2945
2946	// Calculate how much more input the first alternative requires than the minimum
2947	// for the body as a whole. If no more is needed then we dont need an additional
2948	// input check here - jump straight back up to the start of the first alternative.
2949	if (beginOp->m_alternative->m_minimumSize == m_pattern.m_body->m_minimumSize)
2950	jump(beginOp->m_reentry);
2951	else {
2952	if (beginOp->m_alternative->m_minimumSize > m_pattern.m_body->m_minimumSize)
2953	add32(Imm32(beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize), index);
2954	else
2955	sub32(Imm32(m_pattern.m_body->m_minimumSize - beginOp->m_alternative->m_minimumSize), index);
2956	checkInput().linkTo(beginOp->m_reentry, this);
2957	jump(firstInputCheckFailed);
2958	}
2959
2960	// We jump to here if we iterate to the point that there is insufficient input to
2961	// run any matches, and need to return a failure state from JIT code.
2962	matchFailed.link(masm: this);
2963	}
2964
2965	lastStickyAlternativeFailures.link(masm: this);
2966	removeCallFrame();
2967	generateFailReturn();
2968	break;
2969	}
2970	case OpBodyAlternativeEnd: {
2971	// We should never backtrack back into a body disjunction.
2972	ASSERT(m_backtrackingState.isEmpty());
2973
2974	PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
2975	m_checkedOffset += priorAlternative->m_minimumSize;
2976	break;
2977	}
2978
2979	// OpSimpleNestedAlternativeBegin/Next/End
2980	// OpNestedAlternativeBegin/Next/End
2981	//
2982	// Generate code for when we backtrack back out of an alternative into
2983	// a Begin or Next node, or when the entry input count check fails. If
2984	// there are more alternatives we need to jump to the next alternative,
2985	// if not we backtrack back out of the current set of parentheses.
2986	//
2987	// In the case of non-simple nested assertions we need to also link the
2988	// 'return address' appropriately to backtrack back out into the correct
2989	// alternative.
2990	case OpSimpleNestedAlternativeBegin:
2991	case OpSimpleNestedAlternativeNext:
2992	case OpNestedAlternativeBegin:
2993	case OpNestedAlternativeNext: {
2994	YarrOp& nextOp = m_ops[op.m_nextOp];
2995	bool isBegin = op.m_previousOp == notFound;
2996	bool isLastAlternative = nextOp.m_nextOp == notFound;
2997	ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin \|\| op.m_op == OpNestedAlternativeBegin));
2998	ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd \|\| nextOp.m_op == OpNestedAlternativeEnd));
2999
3000	// Treat an input check failure the same as a failed match.
3001	m_backtrackingState.append(op.m_jumps);
3002
3003	// Set the backtracks to jump to the appropriate place. We may need
3004	// to link the backtracks in one of three different way depending on
3005	// the type of alternative we are dealing with:
3006	// - A single alternative, with no simplings.
3007	// - The last alternative of a set of two or more.
3008	// - An alternative other than the last of a set of two or more.
3009	//
3010	// In the case of a single alternative on its own, we don't need to
3011	// jump anywhere - if the alternative fails to match we can just
3012	// continue to backtrack out of the parentheses without jumping.
3013	//
3014	// In the case of the last alternative in a set of more than one, we
3015	// need to jump to return back out to the beginning. We'll do so by
3016	// adding a jump to the End node's m_jumps list, and linking this
3017	// when we come to generate the Begin node. For alternatives other
3018	// than the last, we need to jump to the next alternative.
3019	//
3020	// If the alternative had adjusted the input position we must link
3021	// backtracking to here, correct, and then jump on. If not we can
3022	// link the backtracks directly to their destination.
3023	if (op.m_checkAdjust) {
3024	// Handle the cases where we need to link the backtracks here.
3025	m_backtrackingState.link(this);
3026	sub32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3027	if (!isLastAlternative) {
3028	// An alternative that is not the last should jump to its successor.
3029	jump(nextOp.m_reentry);
3030	} else if (!isBegin) {
3031	// The last of more than one alternatives must jump back to the beginning.
3032	nextOp.m_jumps.append(jump());
3033	} else {
3034	// A single alternative on its own can fall through.
3035	m_backtrackingState.fallthrough();
3036	}
3037	} else {
3038	// Handle the cases where we can link the backtracks directly to their destinations.
3039	if (!isLastAlternative) {
3040	// An alternative that is not the last should jump to its successor.
3041	m_backtrackingState.linkTo(nextOp.m_reentry, this);
3042	} else if (!isBegin) {
3043	// The last of more than one alternatives must jump back to the beginning.
3044	m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
3045	}
3046	// In the case of a single alternative on its own do nothing - it can fall through.
3047	}
3048
3049	// If there is a backtrack jump from a zero length match link it here.
3050	if (op.m_zeroLengthMatch.isSet())
3051	m_backtrackingState.append(op.m_zeroLengthMatch);
3052
3053	// At this point we've handled the backtracking back into this node.
3054	// Now link any backtracks that need to jump to here.
3055
3056	// For non-simple alternatives, link the alternative's 'return address'
3057	// so that we backtrack back out into the previous alternative.
3058	if (op.m_op == OpNestedAlternativeNext)
3059	m_backtrackingState.append(op.m_returnAddress);
3060
3061	// If there is more than one alternative, then the last alternative will
3062	// have planted a jump to be linked to the end. This jump was added to the
3063	// End node's m_jumps list. If we are back at the beginning, link it here.
3064	if (isBegin) {
3065	YarrOp* endOp = &m_ops[op.m_nextOp];
3066	while (endOp->m_nextOp != notFound) {
3067	ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext \|\| endOp->m_op == OpNestedAlternativeNext);
3068	endOp = &m_ops[endOp->m_nextOp];
3069	}
3070	ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd \|\| endOp->m_op == OpNestedAlternativeEnd);
3071	m_backtrackingState.append(endOp->m_jumps);
3072	}
3073
3074	if (!isBegin) {
3075	YarrOp& lastOp = m_ops[op.m_previousOp];
3076	m_checkedOffset += lastOp.m_checkAdjust;
3077	}
3078	m_checkedOffset -= op.m_checkAdjust;
3079	break;
3080	}
3081	case OpSimpleNestedAlternativeEnd:
3082	case OpNestedAlternativeEnd: {
3083	PatternTerm* term = op.m_term;
3084
3085	// If there is a backtrack jump from a zero length match link it here.
3086	if (op.m_zeroLengthMatch.isSet())
3087	m_backtrackingState.append(op.m_zeroLengthMatch);
3088
3089	// If we backtrack into the end of a simple subpattern do nothing;
3090	// just continue through into the last alternative. If we backtrack
3091	// into the end of a non-simple set of alterntives we need to jump
3092	// to the backtracking return address set up during generation.
3093	if (op.m_op == OpNestedAlternativeEnd) {
3094	m_backtrackingState.link(this);
3095
3096	// Plant a jump to the return address.
3097	unsigned parenthesesFrameLocation = term->frameLocation;
3098	loadFromFrameAndJump(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::returnAddressIndex());
3099
3100	// Link the DataLabelPtr associated with the end of the last
3101	// alternative to this point.
3102	m_backtrackingState.append(op.m_returnAddress);
3103	}
3104
3105	YarrOp& lastOp = m_ops[op.m_previousOp];
3106	m_checkedOffset += lastOp.m_checkAdjust;
3107	break;
3108	}
3109
3110	// OpParenthesesSubpatternOnceBegin/End
3111	//
3112	// When we are backtracking back out of a capturing subpattern we need
3113	// to clear the start index in the matches output array, to record that
3114	// this subpattern has not been captured.
3115	//
3116	// When backtracking back out of a Greedy quantified subpattern we need
3117	// to catch this, and try running the remainder of the alternative after
3118	// the subpattern again, skipping the parentheses.
3119	//
3120	// Upon backtracking back into a quantified set of parentheses we need to
3121	// check whether we were currently skipping the subpattern. If not, we
3122	// can backtrack into them, if we were we need to either backtrack back
3123	// out of the start of the parentheses, or jump back to the forwards
3124	// matching start, depending of whether the match is Greedy or NonGreedy.
3125	case OpParenthesesSubpatternOnceBegin: {
3126	PatternTerm* term = op.m_term;
3127	ASSERT(term->quantityMaxCount == `1`);
3128
3129	// We only need to backtrack to this point if capturing or greedy.
3130	if ((term->capture() && compileMode == IncludeSubpatterns) \|\| term->quantityType == QuantifierGreedy) {
3131	m_backtrackingState.link(this);
3132
3133	// If capturing, clear the capture (we only need to reset start).
3134	if (term->capture() && compileMode == IncludeSubpatterns)
3135	clearSubpatternStart(subpattern: term->parentheses.subpatternId);
3136
3137	// If Greedy, jump to the end.
3138	if (term->quantityType == QuantifierGreedy) {
3139	// Clear the flag in the stackframe indicating we ran through the subpattern.
3140	unsigned parenthesesFrameLocation = term->frameLocation;
3141	storeToFrame(TrustedImm32 (-`1`), parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex());
3142	// Jump to after the parentheses, skipping the subpattern.
3143	jump(m_ops[op.m_nextOp].m_reentry);
3144	// A backtrack from after the parentheses, when skipping the subpattern,
3145	// will jump back to here.
3146	op.m_jumps.link(this);
3147	}
3148
3149	m_backtrackingState.fallthrough();
3150	}
3151	break;
3152	}
3153	case OpParenthesesSubpatternOnceEnd: {
3154	PatternTerm* term = op.m_term;
3155
3156	if (term->quantityType != QuantifierFixedCount) {
3157	m_backtrackingState.link(this);
3158
3159	// Check whether we should backtrack back into the parentheses, or if we
3160	// are currently in a state where we had skipped over the subpattern
3161	// (in which case the flag value on the stack will be -1).
3162	unsigned parenthesesFrameLocation = term->frameLocation;
3163	Jump hadSkipped = branch32(Equal, Address (stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParenthesesOnce::beginIndex()) * sizeof(void*)), TrustedImm32 (-`1`));
3164
3165	if (term->quantityType == QuantifierGreedy) {
3166	// For Greedy parentheses, we skip after having already tried going
3167	// through the subpattern, so if we get here we're done.
3168	YarrOp& beginOp = m_ops[op.m_previousOp];
3169	beginOp.m_jumps.append(hadSkipped);
3170	} else {
3171	// For NonGreedy parentheses, we try skipping the subpattern first,
3172	// so if we get here we need to try running through the subpattern
3173	// next. Jump back to the start of the parentheses in the forwards
3174	// matching path.
3175	ASSERT(term->quantityType == QuantifierNonGreedy);
3176	YarrOp& beginOp = m_ops[op.m_previousOp];
3177	hadSkipped.linkTo(label: beginOp.m_reentry, masm: this);
3178	}
3179
3180	m_backtrackingState.fallthrough();
3181	}
3182
3183	m_backtrackingState.append(op.m_jumps);
3184	break;
3185	}
3186
3187	// OpParenthesesSubpatternTerminalBegin/End
3188	//
3189	// Terminal subpatterns will always match - there is nothing after them to
3190	// force a backtrack, and they have a minimum count of 0, and as such will
3191	// always produce an acceptable result.
3192	case OpParenthesesSubpatternTerminalBegin: {
3193	// We will backtrack to this point once the subpattern cannot match any
3194	// more. Since no match is accepted as a successful match (we are Greedy
3195	// quantified with a minimum of zero) jump back to the forwards matching
3196	// path at the end.
3197	YarrOp& endOp = m_ops[op.m_nextOp];
3198	m_backtrackingState.linkTo(endOp.m_reentry, this);
3199	break;
3200	}
3201	case OpParenthesesSubpatternTerminalEnd:
3202	// We should never be backtracking to here (hence the 'terminal' in the name).
3203	ASSERT(m_backtrackingState.isEmpty());
3204	m_backtrackingState.append(op.m_jumps);
3205	break;
3206
3207	// OpParenthesesSubpatternBegin/End
3208	//
3209	// When we are backtracking back out of a capturing subpattern we need
3210	// to clear the start index in the matches output array, to record that
3211	// this subpattern has not been captured.
3212	//
3213	// When backtracking back out of a Greedy quantified subpattern we need
3214	// to catch this, and try running the remainder of the alternative after
3215	// the subpattern again, skipping the parentheses.
3216	//
3217	// Upon backtracking back into a quantified set of parentheses we need to
3218	// check whether we were currently skipping the subpattern. If not, we
3219	// can backtrack into them, if we were we need to either backtrack back
3220	// out of the start of the parentheses, or jump back to the forwards
3221	// matching start, depending of whether the match is Greedy or NonGreedy.
3222	case OpParenthesesSubpatternBegin: {
3223	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3224	PatternTerm* term = op.m_term;
3225	unsigned parenthesesFrameLocation = term->frameLocation;
3226
3227	if (term->quantityType != QuantifierFixedCount) {
3228	m_backtrackingState.link(this);
3229
3230	RegisterID currParenContextReg = regT0;
3231	RegisterID newParenContextReg = regT1;
3232
3233	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex(), reg: currParenContextReg);
3234
3235	restoreParenContext(parenContextReg: currParenContextReg, tempReg: regT2, firstSubpattern: term->parentheses.subpatternId, lastSubpattern: term->parentheses.lastSubpatternId, subpatternBaseFrameLocation: parenthesesFrameLocation);
3236
3237	freeParenContext(headPtrRegister: currParenContextReg, newHeadPtrRegister: newParenContextReg);
3238	storeToFrame(newParenContextReg, parenthesesFrameLocation + BackTrackInfoParentheses::parenContextHeadIndex());
3239
3240	const RegisterID countTemporary = regT0;
3241	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: countTemporary);
3242	Jump zeroLengthMatch = branchTest32(Zero, countTemporary);
3243
3244	sub32(TrustedImm32 (`1`), countTemporary);
3245	storeToFrame(countTemporary, parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex());
3246
3247	jump(m_ops[op.m_nextOp].m_reentry);
3248
3249	zeroLengthMatch.link(masm: this);
3250
3251	// Clear the flag in the stackframe indicating we didn't run through the subpattern.
3252	storeToFrame(TrustedImm32 (-`1`), parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex());
3253
3254	if (term->quantityType == QuantifierGreedy)
3255	jump(m_ops[op.m_nextOp].m_reentry);
3256
3257	// If Greedy, jump to the end.
3258	if (term->quantityType == QuantifierGreedy) {
3259	// A backtrack from after the parentheses, when skipping the subpattern,
3260	// will jump back to here.
3261	op.m_jumps.link(this);
3262	}
3263
3264	m_backtrackingState.fallthrough();
3265	}
3266	#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3267	RELEASE_ASSERT_NOT_REACHED();
3268	#endif
3269	break;
3270	}
3271	case OpParenthesesSubpatternEnd: {
3272	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3273	PatternTerm* term = op.m_term;
3274
3275	if (term->quantityType != QuantifierFixedCount) {
3276	m_backtrackingState.link(this);
3277
3278	unsigned parenthesesFrameLocation = term->frameLocation;
3279
3280	if (term->quantityType == QuantifierGreedy) {
3281	// Check whether we should backtrack back into the parentheses, or if we
3282	// are currently in a state where we had skipped over the subpattern
3283	// (in which case the flag value on the stack will be -1).
3284	Jump hadSkipped = branch32(Equal, Address (stackPointerRegister, (parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex()) * sizeof(void*)), TrustedImm32 (-`1`));
3285
3286	// For Greedy parentheses, we skip after having already tried going
3287	// through the subpattern, so if we get here we're done.
3288	YarrOp& beginOp = m_ops[op.m_previousOp];
3289	beginOp.m_jumps.append(hadSkipped);
3290	} else {
3291	// For NonGreedy parentheses, we try skipping the subpattern first,
3292	// so if we get here we need to try running through the subpattern
3293	// next. Jump back to the start of the parentheses in the forwards
3294	// matching path.
3295	ASSERT(term->quantityType == QuantifierNonGreedy);
3296
3297	const RegisterID beginTemporary = regT0;
3298	const RegisterID countTemporary = regT1;
3299
3300	YarrOp& beginOp = m_ops[op.m_previousOp];
3301
3302	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::beginIndex(), reg: beginTemporary);
3303	branch32(Equal, beginTemporary, TrustedImm32 (-`1`)).linkTo(beginOp.m_reentry, this);
3304
3305	JumpList exceededMatchLimit;
3306
3307	if (term->quantityMaxCount != quantifyInfinite) {
3308	loadFromFrame(frameLocation: parenthesesFrameLocation + BackTrackInfoParentheses::matchAmountIndex(), reg: countTemporary);
3309	exceededMatchLimit.append(branch32(AboveOrEqual, countTemporary, Imm32 (term->quantityMaxCount.unsafeGet())));
3310	}
3311
3312	branch32(Above, index, beginTemporary).linkTo(beginOp.m_reentry, this);
3313
3314	exceededMatchLimit.link(masm: this);
3315	}
3316
3317	m_backtrackingState.fallthrough();
3318	}
3319
3320	m_backtrackingState.append(op.m_jumps);
3321	#else // !YARR_JIT_ALL_PARENS_EXPRESSIONS
3322	RELEASE_ASSERT_NOT_REACHED();
3323	#endif
3324	break;
3325	}
3326
3327	// OpParentheticalAssertionBegin/End
3328	case OpParentheticalAssertionBegin: {
3329	PatternTerm* term = op.m_term;
3330	YarrOp& endOp = m_ops[op.m_nextOp];
3331
3332	// We need to handle the backtracks upon backtracking back out
3333	// of a parenthetical assertion if either we need to correct
3334	// the input index, or the assertion was inverted.
3335	if (op.m_checkAdjust \|\| term->invert()) {
3336	m_backtrackingState.link(this);
3337
3338	if (op.m_checkAdjust)
3339	add32(Imm32(op.m_checkAdjust.unsafeGet()), index);
3340
3341	// In an inverted assertion failure to match the subpattern
3342	// is treated as a successful match - jump to the end of the
3343	// subpattern. We already have adjusted the input position
3344	// back to that before the assertion, which is correct.
3345	if (term->invert())
3346	jump(endOp.m_reentry);
3347
3348	m_backtrackingState.fallthrough();
3349	}
3350
3351	// The End node's jump list will contain any backtracks into
3352	// the end of the assertion. Also, if inverted, we will have
3353	// added the failure caused by a successful match to this.
3354	m_backtrackingState.append(endOp.m_jumps);
3355
3356	m_checkedOffset += op.m_checkAdjust;
3357	break;
3358	}
3359	case OpParentheticalAssertionEnd: {
3360	// FIXME: We should really be clearing any nested subpattern
3361	// matches on bailing out from after the pattern. Firefox has
3362	// this bug too (presumably because they use YARR!)
3363
3364	// Never backtrack into an assertion; later failures bail to before the begin.
3365	m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
3366
3367	YarrOp& lastOp = m_ops[op.m_previousOp];
3368	m_checkedOffset -= lastOp.m_checkAdjust;
3369	break;
3370	}
3371
3372	case OpMatchFailed:
3373	break;
3374	}
3375
3376	} while (opIndex);
3377	}
3378
3379	// Compilation methods:
3380	// ====================
3381
3382	// opCompileParenthesesSubpattern
3383	// Emits ops for a subpattern (set of parentheses). These consist
3384	// of a set of alternatives wrapped in an outer set of nodes for
3385	// the parentheses.
3386	// Supported types of parentheses are 'Once' (quantityMaxCount == 1),
3387	// 'Terminal' (non-capturing parentheses quantified as greedy
3388	// and infinite), and 0 based greedy / non-greedy quantified parentheses.
3389	// Alternatives will use the 'Simple' set of ops if either the
3390	// subpattern is terminal (in which case we will never need to
3391	// backtrack), or if the subpattern only contains one alternative.
3392	void opCompileParenthesesSubpattern(PatternTerm* term)
3393	{
3394	YarrOpCode parenthesesBeginOpCode;
3395	YarrOpCode parenthesesEndOpCode;
3396	YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
3397	YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
3398	YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
3399
3400	// We can currently only compile quantity 1 subpatterns that are
3401	// not copies. We generate a copy in the case of a range quantifier,
3402	// e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
3403	// /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)/ repectively). The problem*
3404	// comes where the subpattern is capturing, in which case we would
3405	// need to restore the capture from the first subpattern upon a
3406	// failure in the second.
3407	if (term->quantityMinCount && term->quantityMinCount != term->quantityMaxCount) {
3408	m_failureReason = JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum;
3409	return;
3410	}
3411
3412	if (term->quantityMaxCount == `1` && !term->parentheses.isCopy) {
3413	// Select the 'Once' nodes.
3414	parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
3415	parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
3416
3417	// If there is more than one alternative we cannot use the 'simple' nodes.
3418	if (term->parentheses.disjunction->m_alternatives.size() != `1`) {
3419	alternativeBeginOpCode = OpNestedAlternativeBegin;
3420	alternativeNextOpCode = OpNestedAlternativeNext;
3421	alternativeEndOpCode = OpNestedAlternativeEnd;
3422	}
3423	} else if (term->parentheses.isTerminal) {
3424	// Select the 'Terminal' nodes.
3425	parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
3426	parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
3427	} else {
3428	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3429	// We only handle generic parenthesis with non-fixed counts.
3430	if (term->quantityType == QuantifierFixedCount) {
3431	// This subpattern is not supported by the JIT.
3432	m_failureReason = JITFailureReason::FixedCountParenthesizedSubpattern;
3433	return;
3434	}
3435
3436	m_containsNestedSubpatterns = true;
3437
3438	// Select the 'Generic' nodes.
3439	parenthesesBeginOpCode = OpParenthesesSubpatternBegin;
3440	parenthesesEndOpCode = OpParenthesesSubpatternEnd;
3441
3442	// If there is more than one alternative we cannot use the 'simple' nodes.
3443	if (term->parentheses.disjunction->m_alternatives.size() != `1`) {
3444	alternativeBeginOpCode = OpNestedAlternativeBegin;
3445	alternativeNextOpCode = OpNestedAlternativeNext;
3446	alternativeEndOpCode = OpNestedAlternativeEnd;
3447	}
3448	#else
3449	// This subpattern is not supported by the JIT.
3450	m_failureReason = JITFailureReason::ParenthesizedSubpattern;
3451	return;
3452	#endif
3453	}
3454
3455	size_t parenBegin = m_ops.size();
3456	m_ops.append(parenthesesBeginOpCode);
3457
3458	m_ops.append(alternativeBeginOpCode);
3459	m_ops.last().m_previousOp = notFound;
3460	m_ops.last().m_term = term;
3461	Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3462	for (unsigned i = `0`; i < alternatives.size(); ++i) {
3463	size_t lastOpIndex = m_ops.size() - `1`;
3464
3465	PatternAlternative* nestedAlternative = alternatives [i].get();
3466	opCompileAlternative(alternative: nestedAlternative);
3467
3468	size_t thisOpIndex = m_ops.size();
3469	m_ops.append(YarrOp(alternativeNextOpCode));
3470
3471	YarrOp& lastOp = m_ops[lastOpIndex];
3472	YarrOp& thisOp = m_ops[thisOpIndex];
3473
3474	lastOp.m_alternative = nestedAlternative;
3475	lastOp.m_nextOp = thisOpIndex;
3476	thisOp.m_previousOp = lastOpIndex;
3477	thisOp.m_term = term;
3478	}
3479	YarrOp& lastOp = m_ops.last();
3480	ASSERT(lastOp.m_op == alternativeNextOpCode);
3481	lastOp.m_op = alternativeEndOpCode;
3482	lastOp.m_alternative = `0`;
3483	lastOp.m_nextOp = notFound;
3484
3485	size_t parenEnd = m_ops.size();
3486	m_ops.append(parenthesesEndOpCode);
3487
3488	m_ops[parenBegin].m_term = term;
3489	m_ops[parenBegin].m_previousOp = notFound;
3490	m_ops[parenBegin].m_nextOp = parenEnd;
3491	m_ops[parenEnd].m_term = term;
3492	m_ops[parenEnd].m_previousOp = parenBegin;
3493	m_ops[parenEnd].m_nextOp = notFound;
3494	}
3495
3496	// opCompileParentheticalAssertion
3497	// Emits ops for a parenthetical assertion. These consist of an
3498	// OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
3499	// the alternatives, with these wrapped by an outer pair of
3500	// OpParentheticalAssertionBegin/End nodes.
3501	// We can always use the OpSimpleNestedAlternative nodes in the
3502	// case of parenthetical assertions since these only ever match
3503	// once, and will never backtrack back into the assertion.
3504	void opCompileParentheticalAssertion(PatternTerm* term)
3505	{
3506	size_t parenBegin = m_ops.size();
3507	m_ops.append(OpParentheticalAssertionBegin);
3508
3509	m_ops.append(OpSimpleNestedAlternativeBegin);
3510	m_ops.last().m_previousOp = notFound;
3511	m_ops.last().m_term = term;
3512	Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
3513	for (unsigned i = `0`; i < alternatives.size(); ++i) {
3514	size_t lastOpIndex = m_ops.size() - `1`;
3515
3516	PatternAlternative* nestedAlternative = alternatives [i].get();
3517	opCompileAlternative(alternative: nestedAlternative);
3518
3519	size_t thisOpIndex = m_ops.size();
3520	m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
3521
3522	YarrOp& lastOp = m_ops[lastOpIndex];
3523	YarrOp& thisOp = m_ops[thisOpIndex];
3524
3525	lastOp.m_alternative = nestedAlternative;
3526	lastOp.m_nextOp = thisOpIndex;
3527	thisOp.m_previousOp = lastOpIndex;
3528	thisOp.m_term = term;
3529	}
3530	YarrOp& lastOp = m_ops.last();
3531	ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
3532	lastOp.m_op = OpSimpleNestedAlternativeEnd;
3533	lastOp.m_alternative = `0`;
3534	lastOp.m_nextOp = notFound;
3535
3536	size_t parenEnd = m_ops.size();
3537	m_ops.append(OpParentheticalAssertionEnd);
3538
3539	m_ops[parenBegin].m_term = term;
3540	m_ops[parenBegin].m_previousOp = notFound;
3541	m_ops[parenBegin].m_nextOp = parenEnd;
3542	m_ops[parenEnd].m_term = term;
3543	m_ops[parenEnd].m_previousOp = parenBegin;
3544	m_ops[parenEnd].m_nextOp = notFound;
3545	}
3546
3547	// opCompileAlternative
3548	// Called to emit nodes for all terms in an alternative.
3549	void opCompileAlternative(PatternAlternative* alternative)
3550	{
3551	optimizeAlternative(alternative);
3552
3553	for (unsigned i = `0`; i < alternative->m_terms.size(); ++i) {
3554	PatternTerm* term = &alternative->m_terms [i];
3555
3556	switch (term->type) {
3557	case PatternTerm::TypeParenthesesSubpattern:
3558	opCompileParenthesesSubpattern(term);
3559	break;
3560
3561	case PatternTerm::TypeParentheticalAssertion:
3562	opCompileParentheticalAssertion(term);
3563	break;
3564
3565	default:
3566	m_ops.append(term);
3567	}
3568	}
3569	}
3570
3571	// opCompileBody
3572	// This method compiles the body disjunction of the regular expression.
3573	// The body consists of two sets of alternatives - zero or more 'once
3574	// through' (BOL anchored) alternatives, followed by zero or more
3575	// repeated alternatives.
3576	// For each of these two sets of alteratives, if not empty they will be
3577	// wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
3578	// 'begin' node referencing the first alternative, and 'next' nodes
3579	// referencing any further alternatives. The begin/next/end nodes are
3580	// linked together in a doubly linked list. In the case of repeating
3581	// alternatives, the end node is also linked back to the beginning.
3582	// If no repeating alternatives exist, then a OpMatchFailed node exists
3583	// to return the failing result.
3584	void opCompileBody(PatternDisjunction* disjunction)
3585	{
3586	Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
3587	size_t currentAlternativeIndex = `0`;
3588
3589	// Emit the 'once through' alternatives.
3590	if (alternatives.size() && alternatives [`0`]->onceThrough()) {
3591	m_ops.append(YarrOp(OpBodyAlternativeBegin));
3592	m_ops.last().m_previousOp = notFound;
3593
3594	do {
3595	size_t lastOpIndex = m_ops.size() - `1`;
3596	PatternAlternative* alternative = alternatives [currentAlternativeIndex].get();
3597	opCompileAlternative(alternative);
3598
3599	size_t thisOpIndex = m_ops.size();
3600	m_ops.append(YarrOp(OpBodyAlternativeNext));
3601
3602	YarrOp& lastOp = m_ops[lastOpIndex];
3603	YarrOp& thisOp = m_ops[thisOpIndex];
3604
3605	lastOp.m_alternative = alternative;
3606	lastOp.m_nextOp = thisOpIndex;
3607	thisOp.m_previousOp = lastOpIndex;
3608
3609	++currentAlternativeIndex;
3610	} while (currentAlternativeIndex < alternatives.size() && alternatives [currentAlternativeIndex]->onceThrough());
3611
3612	YarrOp& lastOp = m_ops.last();
3613
3614	ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3615	lastOp.m_op = OpBodyAlternativeEnd;
3616	lastOp.m_alternative = `0`;
3617	lastOp.m_nextOp = notFound;
3618	}
3619
3620	if (currentAlternativeIndex == alternatives.size()) {
3621	m_ops.append(YarrOp(OpMatchFailed));
3622	return;
3623	}
3624
3625	// Emit the repeated alternatives.
3626	size_t repeatLoop = m_ops.size();
3627	m_ops.append(YarrOp(OpBodyAlternativeBegin));
3628	m_ops.last().m_previousOp = notFound;
3629	do {
3630	size_t lastOpIndex = m_ops.size() - `1`;
3631	PatternAlternative* alternative = alternatives [currentAlternativeIndex].get();
3632	ASSERT(!alternative->onceThrough());
3633	opCompileAlternative(alternative);
3634
3635	size_t thisOpIndex = m_ops.size();
3636	m_ops.append(YarrOp(OpBodyAlternativeNext));
3637
3638	YarrOp& lastOp = m_ops[lastOpIndex];
3639	YarrOp& thisOp = m_ops[thisOpIndex];
3640
3641	lastOp.m_alternative = alternative;
3642	lastOp.m_nextOp = thisOpIndex;
3643	thisOp.m_previousOp = lastOpIndex;
3644
3645	++currentAlternativeIndex;
3646	} while (currentAlternativeIndex < alternatives.size());
3647	YarrOp& lastOp = m_ops.last();
3648	ASSERT(lastOp.m_op == OpBodyAlternativeNext);
3649	lastOp.m_op = OpBodyAlternativeEnd;
3650	lastOp.m_alternative = `0`;
3651	lastOp.m_nextOp = repeatLoop;
3652	}
3653
3654	void generateTryReadUnicodeCharacterHelper()
3655	{
3656	#ifdef JIT_UNICODE_EXPRESSIONS
3657	if (m_tryReadUnicodeCharacterCalls.isEmpty())
3658	return;
3659
3660	ASSERT(m_decodeSurrogatePairs);
3661
3662	m_tryReadUnicodeCharacterEntry = label();
3663
3664	tryReadUnicodeCharImpl(resultReg: regT0);
3665
3666	ret();
3667	#endif
3668	}
3669
3670	void generateEnter()
3671	{
3672	#if CPU(X86_64)
3673	push(X86Registers::ebp);
3674	move(stackPointerRegister, X86Registers::ebp);
3675
3676	if (m_pattern.m_saveInitialStartValue)
3677	push(X86Registers::ebx);
3678
3679	#if OS(WINDOWS)
3680	push(X86Registers::edi);
3681	#endif
3682	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3683	if (m_containsNestedSubpatterns) {
3684	#if OS(WINDOWS)
3685	push(X86Registers::esi);
3686	#endif
3687	push(X86Registers::r12);
3688	}
3689	#endif
3690
3691	if (m_decodeSurrogatePairs) {
3692	push(X86Registers::r13);
3693	push(X86Registers::r14);
3694	push(X86Registers::r15);
3695
3696	move(TrustedImm32 (`0xd800`), leadingSurrogateTag);
3697	move(TrustedImm32 (`0xdc00`), trailingSurrogateTag);
3698	}
3699	// The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3700	zeroExtend32ToPtr(src: index, dest: index);
3701	zeroExtend32ToPtr(src: length, dest: length);
3702	#if OS(WINDOWS)
3703	if (compileMode == IncludeSubpatterns)
3704	loadPtr(Address(X86Registers::ebp, `6` * sizeof(void*)), output);
3705	// rcx is the pointer to the allocated space for result in x64 Windows.
3706	push(X86Registers::ecx);
3707	#endif
3708	#elif CPU(X86)
3709	push(X86Registers::ebp);
3710	move(stackPointerRegister, X86Registers::ebp);
3711	// TODO: do we need spill registers to fill the output pointer if there are no sub captures?
3712	push(X86Registers::ebx);
3713	push(X86Registers::edi);
3714	push(X86Registers::esi);
3715	// load output into edi (2 = saved ebp + return address).
3716	#if COMPILER(MSVC)
3717	loadPtr(Address(X86Registers::ebp, `2` * sizeof(void*)), input);
3718	loadPtr(Address(X86Registers::ebp, `3` * sizeof(void*)), index);
3719	loadPtr(Address(X86Registers::ebp, `4` * sizeof(void*)), length);
3720	if (compileMode == IncludeSubpatterns)
3721	loadPtr(Address(X86Registers::ebp, `5` * sizeof(void*)), output);
3722	#else
3723	if (compileMode == IncludeSubpatterns)
3724	loadPtr(Address(X86Registers::ebp, `2` * sizeof(void*)), output);
3725	#endif
3726	#elif CPU(ARM64)
3727	if (m_decodeSurrogatePairs) {
3728	pushPair(framePointerRegister, linkRegister);
3729	move(TrustedImm32(`0x10000`), supplementaryPlanesBase);
3730	move(TrustedImm32(`0xfffffc00`), surrogateTagMask);
3731	move(TrustedImm32(`0xd800`), leadingSurrogateTag);
3732	move(TrustedImm32(`0xdc00`), trailingSurrogateTag);
3733	}
3734
3735	// The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
3736	zeroExtend32ToPtr(index, index);
3737	zeroExtend32ToPtr(length, length);
3738	#elif CPU(ARM_THUMB2)
3739	push(ARMRegisters::r4);
3740	push(ARMRegisters::r5);
3741	push(ARMRegisters::r6);
3742	push(ARMRegisters::r8);
3743	#elif CPU(MIPS)
3744	// Do nothing.
3745	#endif
3746
3747	store8(TrustedImm32 (`1`), &m_vm->isExecutingInRegExpJIT);
3748	}
3749
3750	void generateReturn()
3751	{
3752	store8(TrustedImm32 (`0`), &m_vm->isExecutingInRegExpJIT);
3753
3754	#if CPU(X86_64)
3755	#if OS(WINDOWS)
3756	// Store the return value in the allocated space pointed by rcx.
3757	pop(X86Registers::ecx);
3758	store64(returnRegister, Address(X86Registers::ecx));
3759	store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
3760	move(X86Registers::ecx, returnRegister);
3761	#endif
3762	if (m_decodeSurrogatePairs) {
3763	pop(X86Registers::r15);
3764	pop(X86Registers::r14);
3765	pop(X86Registers::r13);
3766	}
3767
3768	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3769	if (m_containsNestedSubpatterns) {
3770	pop(X86Registers::r12);
3771	#if OS(WINDOWS)
3772	pop(X86Registers::esi);
3773	#endif
3774	}
3775	#endif
3776	#if OS(WINDOWS)
3777	pop(X86Registers::edi);
3778	#endif
3779
3780	if (m_pattern.m_saveInitialStartValue)
3781	pop(X86Registers::ebx);
3782	pop(X86Registers::ebp);
3783	#elif CPU(X86)
3784	pop(X86Registers::esi);
3785	pop(X86Registers::edi);
3786	pop(X86Registers::ebx);
3787	pop(X86Registers::ebp);
3788	#elif CPU(ARM64)
3789	if (m_decodeSurrogatePairs)
3790	popPair(framePointerRegister, linkRegister);
3791	#elif CPU(ARM_THUMB2)
3792	pop(ARMRegisters::r8);
3793	pop(ARMRegisters::r6);
3794	pop(ARMRegisters::r5);
3795	pop(ARMRegisters::r4);
3796	#elif CPU(MIPS)
3797	// Do nothing
3798	#endif
3799	ret();
3800	}
3801
3802	public:
3803	YarrGenerator(VM* vm, YarrPattern& pattern, YarrCodeBlock& codeBlock, YarrCharSize charSize)
3804	: m_vm(vm)
3805	, m_pattern(pattern)
3806	, m_codeBlock(codeBlock)
3807	, m_charSize(charSize)
3808	, m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
3809	, m_unicodeIgnoreCase(m_pattern.unicode() && m_pattern.ignoreCase())
3810	, m_canonicalMode(m_pattern.unicode() ? CanonicalMode::Unicode : CanonicalMode::UCS2)
3811	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3812	, m_containsNestedSubpatterns(false)
3813	, m_parenContextSizes(compileMode == IncludeSubpatterns ? m_pattern.m_numSubpatterns : `0`, m_pattern.m_body->m_callFrameSize)
3814	#endif
3815	{
3816	}
3817
3818	void compile()
3819	{
3820	YarrCodeBlock& codeBlock = m_codeBlock;
3821
3822	#ifndef JIT_UNICODE_EXPRESSIONS
3823	if (m_decodeSurrogatePairs) {
3824	codeBlock.setFallBackWithFailureReason(JITFailureReason::DecodeSurrogatePair);
3825	return;
3826	}
3827	#endif
3828
3829	if (m_pattern.m_containsBackreferences
3830	#if ENABLE(YARR_JIT_BACKREFERENCES)
3831	&& (compileMode == MatchOnly \|\| (m_pattern.ignoreCase() && m_charSize != Char8))
3832	#endif
3833	) {
3834	codeBlock.setFallBackWithFailureReason(JITFailureReason::BackReference);
3835	return;
3836	}
3837
3838	// We need to compile before generating code since we set flags based on compilation that
3839	// are used during generation.
3840	opCompileBody(disjunction: m_pattern.m_body);
3841
3842	if (m_failureReason) {
3843	codeBlock.setFallBackWithFailureReason(*m_failureReason);
3844	return;
3845	}
3846
3847	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3848	if (m_containsNestedSubpatterns)
3849	codeBlock.setUsesPatternContextBuffer();
3850	#endif
3851
3852	generateEnter();
3853
3854	Jump hasInput = checkInput();
3855	generateFailReturn();
3856	hasInput.link(masm: this);
3857
3858	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3859	if (m_containsNestedSubpatterns)
3860	move(TrustedImm32 (matchLimit), remainingMatchCount);
3861	#endif
3862
3863	if (compileMode == IncludeSubpatterns) {
3864	for (unsigned i = `0`; i < m_pattern.m_numSubpatterns + `1`; ++i)
3865	store32(TrustedImm32 (-`1`), Address (output, (i << `1`) * sizeof(int)));
3866	}
3867
3868	if (!m_pattern.m_body->m_hasFixedSize)
3869	setMatchStart(index);
3870
3871	initCallFrame();
3872
3873	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3874	if (m_containsNestedSubpatterns)
3875	initParenContextFreeList();
3876	#endif
3877
3878	if (m_pattern.m_saveInitialStartValue) {
3879	#ifdef HAVE_INITIAL_START_REG
3880	move(index, initialStart);
3881	#else
3882	storeToFrame(index, m_pattern.m_initialStartValueFrameLocation);
3883	#endif
3884	}
3885
3886	generate();
3887	backtrack();
3888
3889	generateTryReadUnicodeCharacterHelper();
3890
3891	generateJITFailReturn();
3892
3893	JSGlobalData data(m_vm->regExpAllocator);
3894	DefaultLinkBuffer linkBuffer(data, this, REGEXP_CODE_ID, JITCompilationCanFail);
3895	if (linkBuffer.didFailToAllocate()) {
3896	codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure);
3897	return;
3898	}
3899
3900	if (!m_tryReadUnicodeCharacterCalls.isEmpty()) {
3901	CodeLocationLabel tryReadUnicodeCharacterHelper = linkBuffer.locationOf(label: m_tryReadUnicodeCharacterEntry);
3902
3903	for (auto call : m_tryReadUnicodeCharacterCalls)
3904	linkBuffer.link(call, function: tryReadUnicodeCharacterHelper);
3905	}
3906
3907	m_backtrackingState.linkDataLabels(linkBuffer);
3908
3909	CodeRef codeRef;
3910	if (compileMode == MatchOnly) {
3911	if (m_charSize == Char8) {
3912	codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT",
3913	"Match-only 8-bit regular expression");
3914	codeBlock.set8BitCodeMatchOnly(codeRef);
3915	} else {
3916	codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT",
3917	"Match-only 16-bit regular expression");
3918	codeBlock.set16BitCodeMatchOnly(codeRef);
3919	}
3920	} else {
3921	if (m_charSize == Char8) {
3922	codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT", "8-bit regular expression");
3923	codeBlock.set8BitCode(codeRef);
3924	} else {
3925	codeRef = FINALIZE_CODE(linkBuffer, "YarrJIT", "16-bit regular expression");
3926	codeBlock.set16BitCode(codeRef);
3927	}
3928	}
3929	QV4::generateFunctionTable(function: nullptr, codeRef: &codeRef);
3930
3931	if (Q_UNLIKELY(!linkBuffer.makeExecutable()))
3932	m_failureReason = JITFailureReason::ExecutableMemoryAllocationFailure;
3933
3934	if (m_failureReason)
3935	codeBlock.setFallBackWithFailureReason(*m_failureReason);
3936	}
3937
3938	private:
3939	VM* m_vm;
3940
3941	YarrPattern& m_pattern;
3942
3943	YarrCodeBlock& m_codeBlock;
3944	YarrCharSize m_charSize;
3945
3946	// Used to detect regular expression constructs that are not currently
3947	// supported in the JIT; fall back to the interpreter when this is detected.
3948	std::optional<JITFailureReason> m_failureReason;
3949
3950	bool m_decodeSurrogatePairs;
3951	bool m_unicodeIgnoreCase;
3952	CanonicalMode m_canonicalMode;
3953	#if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
3954	bool m_containsNestedSubpatterns;
3955	ParenContextSizes m_parenContextSizes;
3956	#endif
3957	JumpList m_abortExecution;
3958	JumpList m_hitMatchLimit;
3959	Vector<Call> m_tryReadUnicodeCharacterCalls;
3960	Label m_tryReadUnicodeCharacterEntry;
3961
3962	// The regular expression expressed as a linear sequence of operations.
3963	Vector<YarrOp, `128`> m_ops;
3964
3965	// This records the current input offset being applied due to the current
3966	// set of alternatives we are nested within. E.g. when matching the
3967	// character 'b' within the regular expression /abc/, we will know that
3968	// the minimum size for the alternative is 3, checked upon entry to the
3969	// alternative, and that 'b' is at offset 1 from the start, and as such
3970	// when matching 'b' we need to apply an offset of -2 to the load.
3971	//
3972	// FIXME: This should go away. Rather than tracking this value throughout
3973	// code generation, we should gather this information up front & store it
3974	// on the YarrOp structure.
3975	Checked<unsigned> m_checkedOffset;
3976
3977	// This class records state whilst generating the backtracking path of code.
3978	BacktrackingState m_backtrackingState;
3979	};
3980
3981	void YarrCodeBlock::replaceCodeRef(MacroAssemblerCodeRef &target,
3982	const MacroAssemblerCodeRef &source)
3983	{
3984	if (!!target && target.code().executableAddress() != source.code().executableAddress())
3985	QV4::destroyFunctionTable(function: nullptr, codeRef: &target);
3986
3987	target = source;
3988	}
3989
3990	static void dumpCompileFailure(JITFailureReason failure)
3991	{
3992	switch (failure) {
3993	case JITFailureReason::DecodeSurrogatePair:
3994	dataLog(value: "Can't JIT a pattern decoding surrogate pairs\n");
3995	break;
3996	case JITFailureReason::BackReference:
3997	dataLog(value: "Can't JIT some patterns containing back references\n");
3998	break;
3999	case JITFailureReason::ForwardReference:
4000	dataLog(value: "Can't JIT a pattern containing forward references\n");
4001	break;
4002	case JITFailureReason::VariableCountedParenthesisWithNonZeroMinimum:
4003	dataLog(value: "Can't JIT a pattern containing a variable counted parenthesis with a non-zero minimum\n");
4004	break;
4005	case JITFailureReason::ParenthesizedSubpattern:
4006	dataLog(value: "Can't JIT a pattern containing parenthesized subpatterns\n");
4007	break;
4008	case JITFailureReason::FixedCountParenthesizedSubpattern:
4009	dataLog(value: "Can't JIT a pattern containing fixed count parenthesized subpatterns\n");
4010	break;
4011	case JITFailureReason::ExecutableMemoryAllocationFailure:
4012	dataLog(value: "Can't JIT because of failure of allocation of executable memory\n");
4013	break;
4014	}
4015	}
4016
4017	void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
4018	{
4019	if (mode == MatchOnly)
4020	YarrGenerator<MatchOnly>(vm, pattern, codeBlock, charSize).compile();
4021	else
4022	YarrGenerator<IncludeSubpatterns>(vm, pattern, codeBlock, charSize).compile();
4023
4024	if (auto failureReason = codeBlock.failureReason()) {
4025	if (Options::dumpCompiledRegExpPatterns())
4026	dumpCompileFailure(failure: *failureReason);
4027	}
4028	}
4029
4030	}}
4031
4032	#endif
4033

source code of qtdeclarative/src/3rdparty/masm/yarr/YarrJIT.cpp