YarrInterpreter.cpp source code [qtdeclarative/src/3rdparty/masm/yarr/YarrInterpreter.cpp]

1	/*
2	* Copyright (C) 2009, 2013-2017 Apple Inc. All rights reserved.
3	* Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	* 1. Redistributions of source code must retain the above copyright
9	* notice, this list of conditions and the following disclaimer.
10	* 2. Redistributions in binary form must reproduce the above copyright
11	* notice, this list of conditions and the following disclaimer in the
12	* documentation and/or other materials provided with the distribution.
13	*
14	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25	*/
26
27	#include "config.h"
28	#include "YarrInterpreter.h"
29
30	#include "Options.h"
31	#include "SuperSampler.h"
32	#include "Yarr.h"
33	#include "YarrCanonicalize.h"
34	#include <wtf/BumpPointerAllocator.h>
35	#include <wtf/CheckedArithmetic.h>
36	#include <wtf/DataLog.h>
37	#include <wtf/StdLibExtras.h>
38	#include <wtf/text/CString.h>
39	#include <wtf/text/WTFString.h>
40
41	namespace JSC { namespace Yarr {
42
43	template<typename CharType>
44	class Interpreter {
45	public:
46	struct ParenthesesDisjunctionContext;
47
48	struct BackTrackInfoParentheses {
49	uintptr_t matchAmount;
50	ParenthesesDisjunctionContext* lastContext;
51	};
52
53	static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
54	{
55	context->next = backTrack->lastContext;
56	backTrack->lastContext = context;
57	++backTrack->matchAmount;
58	}
59
60	static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
61	{
62	RELEASE_ASSERT(backTrack->matchAmount);
63	RELEASE_ASSERT(backTrack->lastContext);
64	backTrack->lastContext = backTrack->lastContext->next;
65	--backTrack->matchAmount;
66	}
67
68	struct DisjunctionContext
69	{
70	DisjunctionContext() = default;
71
72	void* operator new(size_t, void* where)
73	{
74	return where;
75	}
76
77	static size_t allocationSize(unsigned numberOfFrames)
78	{
79	static_assert(alignof(DisjunctionContext) <= sizeof(void*), "");
80	size_t rawSize = (sizeof(DisjunctionContext) - sizeof(uintptr_t) + Checked<size_t>(numberOfFrames) * sizeof(uintptr_t)).unsafeGet();
81	size_t roundedSize = WTF::roundUpToMultipleOf<sizeof(void*)>(x: rawSize);
82	RELEASE_ASSERT(roundedSize >= rawSize);
83	return roundedSize;
84	}
85
86	int term { `0` };
87	unsigned matchBegin;
88	unsigned matchEnd;
89	uintptr_t frame[`1`];
90	};
91
92	DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
93	{
94	size_t size = DisjunctionContext::allocationSize(disjunction->m_frameSize);
95	allocatorPool = allocatorPool->ensureCapacity(size);
96	RELEASE_ASSERT(allocatorPool);
97	return new (allocatorPool->alloc(size)) DisjunctionContext();
98	}
99
100	void freeDisjunctionContext(DisjunctionContext* context)
101	{
102	allocatorPool = allocatorPool->dealloc(position: context);
103	}
104
105	struct ParenthesesDisjunctionContext
106	{
107	ParenthesesDisjunctionContext(unsigned* output, ByteTerm& term)
108	{
109	unsigned firstSubpatternId = term.atom.subpatternId;
110	unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
111
112	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i) {
113	subpatternBackup[i] = output[(firstSubpatternId << `1`) + i];
114	output[(firstSubpatternId << `1`) + i] = offsetNoMatch;
115	}
116
117	new (getDisjunctionContext(term)) DisjunctionContext();
118	}
119
120	void* operator new(size_t, void* where)
121	{
122	return where;
123	}
124
125	void restoreOutput(unsigned* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
126	{
127	for (unsigned i = `0`; i < (numNestedSubpatterns << `1`); ++i)
128	output[(firstSubpatternId << `1`) + i] = subpatternBackup[i];
129	}
130
131	DisjunctionContext* getDisjunctionContext(ByteTerm& term)
132	{
133	return bitwise_cast<DisjunctionContext>(bitwise_cast<uintptr_t>(this*) + allocationSize(numberOfSubpatterns: term.atom.parenthesesDisjunction->m_numSubpatterns));
134	}
135
136	static size_t allocationSize(unsigned numberOfSubpatterns)
137	{
138	static_assert(alignof(ParenthesesDisjunctionContext) <= sizeof(void*), "");
139	size_t rawSize = (sizeof(ParenthesesDisjunctionContext) - sizeof(unsigned) + (Checked<size_t>(numberOfSubpatterns) * `2U`) * sizeof(unsigned)).unsafeGet();
140	size_t roundedSize = WTF::roundUpToMultipleOf<sizeof(void*)>(x: rawSize);
141	RELEASE_ASSERT(roundedSize >= rawSize);
142	return roundedSize;
143	}
144
145	ParenthesesDisjunctionContext* next { nullptr };
146	unsigned subpatternBackup[`1`];
147	};
148
149	ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, unsigned* output, ByteTerm& term)
150	{
151	size_t size = (Checked<size_t>(ParenthesesDisjunctionContext::allocationSize(term.atom.parenthesesDisjunction->m_numSubpatterns)) + DisjunctionContext::allocationSize(disjunction->m_frameSize)).unsafeGet();
152	allocatorPool = allocatorPool->ensureCapacity(size);
153	RELEASE_ASSERT(allocatorPool);
154	return new (allocatorPool->alloc(size)) ParenthesesDisjunctionContext(output, term);
155	}
156
157	void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
158	{
159	allocatorPool = allocatorPool->dealloc(position: context);
160	}
161
162	class InputStream {
163	public:
164	InputStream(const CharType* input, unsigned start, unsigned length, bool decodeSurrogatePairs)
165	: input(input)
166	, pos(start)
167	, length(length)
168	, decodeSurrogatePairs(decodeSurrogatePairs)
169	{
170	}
171
172	void next()
173	{
174	++pos;
175	}
176
177	void rewind(unsigned amount)
178	{
179	ASSERT(pos >= amount);
180	pos -= amount;
181	}
182
183	int read()
184	{
185	ASSERT(pos < length);
186	if (pos < length)
187	return input[pos];
188	return -`1`;
189	}
190
191	int readPair()
192	{
193	ASSERT(pos + `1` < length);
194	return input[pos] \| input[pos + `1`] << `16`;
195	}
196
197	int readChecked(unsigned negativePositionOffest)
198	{
199	RELEASE_ASSERT(pos >= negativePositionOffest);
200	unsigned p = pos - negativePositionOffest;
201	ASSERT(p < length);
202	int result = input[p];
203	if (U16_IS_LEAD(result) && decodeSurrogatePairs && p + `1` < length && U16_IS_TRAIL(input[p + `1`])) {
204	if (atEnd())
205	return -`1`;
206
207	result = U16_GET_SUPPLEMENTARY(result, input[p + `1`]);
208	next();
209	}
210	return result;
211	}
212
213	int readSurrogatePairChecked(unsigned negativePositionOffset)
214	{
215	RELEASE_ASSERT(pos >= negativePositionOffset);
216	unsigned p = pos - negativePositionOffset;
217	ASSERT(p < length);
218	if (p + `1` >= length)
219	return -`1`;
220
221	int first = input[p];
222	int second = input[p + `1`];
223	if (U16_IS_LEAD(first) && U16_IS_TRAIL(second))
224	return U16_GET_SUPPLEMENTARY(first, second);
225
226	return -`1`;
227	}
228
229	int reread(unsigned from)
230	{
231	ASSERT(from < length);
232	int result = input[from];
233	if (U16_IS_LEAD(result) && decodeSurrogatePairs && from + `1` < length && U16_IS_TRAIL(input[from + `1`]))
234	result = U16_GET_SUPPLEMENTARY(result, input[from + `1`]);
235	return result;
236	}
237
238	int prev()
239	{
240	ASSERT(!(pos > length));
241	if (pos && length)
242	return input[pos - `1`];
243	return -`1`;
244	}
245
246	unsigned getPos()
247	{
248	return pos;
249	}
250
251	void setPos(unsigned p)
252	{
253	pos = p;
254	}
255
256	bool atStart()
257	{
258	return pos == `0`;
259	}
260
261	bool atEnd()
262	{
263	return pos == length;
264	}
265
266	unsigned end()
267	{
268	return length;
269	}
270
271	bool checkInput(unsigned count)
272	{
273	if (((pos + count) <= length) && ((pos + count) >= pos)) {
274	pos += count;
275	return true;
276	}
277	return false;
278	}
279
280	void uncheckInput(unsigned count)
281	{
282	RELEASE_ASSERT(pos >= count);
283	pos -= count;
284	}
285
286	bool atStart(unsigned negativePositionOffset)
287	{
288	return pos == negativePositionOffset;
289	}
290
291	bool atEnd(unsigned negativePositionOffest)
292	{
293	RELEASE_ASSERT(pos >= negativePositionOffest);
294	return (pos - negativePositionOffest) == length;
295	}
296
297	bool isAvailableInput(unsigned offset)
298	{
299	return (((pos + offset) <= length) && ((pos + offset) >= pos));
300	}
301
302	private:
303	const CharType* input;
304	unsigned pos;
305	unsigned length;
306	bool decodeSurrogatePairs;
307	};
308
309	bool testCharacterClass(CharacterClass* characterClass, int ch)
310	{
311	auto linearSearchMatches = [&ch](const Vector<UChar32>& matches) {
312	for (unsigned i = `0`; i < matches.size(); ++i) {
313	if (ch == matches [i])
314	return true;
315	}
316
317	return false;
318	};
319
320	auto binarySearchMatches = [&ch](const Vector<UChar32>& matches) {
321	size_t low = `0`;
322	size_t high = matches.size() - `1`;
323
324	while (low <= high) {
325	size_t mid = low + (high - low) / `2`;
326	int diff = ch - matches [mid];
327	if (!diff)
328	return true;
329
330	if (diff < `0`) {
331	if (mid == low)
332	return false;
333	high = mid - `1`;
334	} else
335	low = mid + `1`;
336	}
337	return false;
338	};
339
340	auto linearSearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
341	for (unsigned i = `0`; i < ranges.size(); ++i) {
342	if ((ch >= ranges [i].begin) && (ch <= ranges [i].end))
343	return true;
344	}
345
346	return false;
347	};
348
349	auto binarySearchRanges = [&ch](const Vector<CharacterRange>& ranges) {
350	size_t low = `0`;
351	size_t high = ranges.size() - `1`;
352
353	while (low <= high) {
354	size_t mid = low + (high - low) / `2`;
355	int rangeBeginDiff = ch - ranges [mid].begin;
356	if (rangeBeginDiff >= `0` && ch <= ranges [mid].end)
357	return true;
358
359	if (rangeBeginDiff < `0`) {
360	if (mid == low)
361	return false;
362	high = mid - `1`;
363	} else
364	low = mid + `1`;
365	}
366	return false;
367	};
368
369	if (characterClass->m_anyCharacter)
370	return true;
371
372	const size_t thresholdForBinarySearch = `6`;
373
374	if (!isASCII(c: ch)) {
375	if (characterClass->m_matchesUnicode.size()) {
376	if (characterClass->m_matchesUnicode.size() > thresholdForBinarySearch) {
377	if (binarySearchMatches(characterClass->m_matchesUnicode))
378	return true;
379	} else if (linearSearchMatches(characterClass->m_matchesUnicode))
380	return true;
381	}
382
383	if (characterClass->m_rangesUnicode.size()) {
384	if (characterClass->m_rangesUnicode.size() > thresholdForBinarySearch) {
385	if (binarySearchRanges(characterClass->m_rangesUnicode))
386	return true;
387	} else if (linearSearchRanges(characterClass->m_rangesUnicode))
388	return true;
389	}
390	} else {
391	if (characterClass->m_matches.size()) {
392	if (characterClass->m_matches.size() > thresholdForBinarySearch) {
393	if (binarySearchMatches(characterClass->m_matches))
394	return true;
395	} else if (linearSearchMatches(characterClass->m_matches))
396	return true;
397	}
398
399	if (characterClass->m_ranges.size()) {
400	if (characterClass->m_ranges.size() > thresholdForBinarySearch) {
401	if (binarySearchRanges(characterClass->m_ranges))
402	return true;
403	} else if (linearSearchRanges(characterClass->m_ranges))
404	return true;
405	}
406	}
407
408	return false;
409	}
410
411	bool checkCharacter(int testChar, unsigned negativeInputOffset)
412	{
413	return testChar == input.readChecked(negativeInputOffset);
414	}
415
416	bool checkSurrogatePair(int testUnicodeChar, unsigned negativeInputOffset)
417	{
418	return testUnicodeChar == input.readSurrogatePairChecked(negativeInputOffset);
419	}
420
421	bool checkCasedCharacter(int loChar, int hiChar, unsigned negativeInputOffset)
422	{
423	int ch = input.readChecked(negativeInputOffset);
424	return (loChar == ch) \|\| (hiChar == ch);
425	}
426
427	bool checkCharacterClass(CharacterClass* characterClass, bool invert, unsigned negativeInputOffset)
428	{
429	bool match = testCharacterClass(characterClass, ch: input.readChecked(negativeInputOffset));
430	return invert ? !match : match;
431	}
432
433	bool tryConsumeBackReference(int matchBegin, int matchEnd, unsigned negativeInputOffset)
434	{
435	unsigned matchSize = (unsigned)(matchEnd - matchBegin);
436
437	if (!input.checkInput(matchSize))
438	return false;
439
440	for (unsigned i = `0`; i < matchSize; ++i) {
441	int oldCh = input.reread(matchBegin + i);
442	int ch;
443	if (!U_IS_BMP(oldCh)) {
444	ch = input.readSurrogatePairChecked(negativeInputOffset + matchSize - i);
445	++i;
446	} else
447	ch = input.readChecked(negativeInputOffset + matchSize - i);
448
449	if (oldCh == ch)
450	continue;
451
452	if (pattern->ignoreCase()) {
453	// See ES 6.0, 21.2.2.8.2 for the definition of Canonicalize(). For non-Unicode
454	// patterns, Unicode values are never allowed to match against ASCII ones.
455	// For Unicode, we need to check all canonical equivalents of a character.
456	if (!unicode && (isASCII(c: oldCh) \|\| isASCII(c: ch))) {
457	if (toASCIIUpper(c: oldCh) == toASCIIUpper(c: ch))
458	continue;
459	} else if (areCanonicallyEquivalent(a: oldCh, b: ch, canonicalMode: unicode ? CanonicalMode::Unicode : CanonicalMode::UCS2))
460	continue;
461	}
462
463	input.uncheckInput(matchSize);
464	return false;
465	}
466
467	return true;
468	}
469
470	bool matchAssertionBOL(ByteTerm& term)
471	{
472	return (input.atStart(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.readChecked(term.inputPosition + `1`)));
473	}
474
475	bool matchAssertionEOL(ByteTerm& term)
476	{
477	if (term.inputPosition)
478	return (input.atEnd(term.inputPosition)) \|\| (pattern->multiline() && testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.readChecked(term.inputPosition)));
479
480	return (input.atEnd()) \|\| (pattern->multiline() && testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.read()));
481	}
482
483	bool matchAssertionWordBoundary(ByteTerm& term)
484	{
485	bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(characterClass: pattern->wordcharCharacterClass, ch: input.readChecked(term.inputPosition + `1`));
486	bool readIsWordchar;
487	if (term.inputPosition)
488	readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(characterClass: pattern->wordcharCharacterClass, ch: input.readChecked(term.inputPosition));
489	else
490	readIsWordchar = !input.atEnd() && testCharacterClass(characterClass: pattern->wordcharCharacterClass, ch: input.read());
491
492	bool wordBoundary = prevIsWordchar != readIsWordchar;
493	return term.invert() ? !wordBoundary : wordBoundary;
494	}
495
496	bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
497	{
498	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
499
500	switch (term.atom.quantityType) {
501	case QuantifierFixedCount:
502	break;
503
504	case QuantifierGreedy:
505	if (backTrack->matchAmount) {
506	--backTrack->matchAmount;
507	input.uncheckInput(U16_LENGTH(term.atom.patternCharacter));
508	return true;
509	}
510	break;
511
512	case QuantifierNonGreedy:
513	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
514	++backTrack->matchAmount;
515	if (checkCharacter(testChar: term.atom.patternCharacter, negativeInputOffset: term.inputPosition + `1`))
516	return true;
517	}
518	input.setPos(backTrack->begin);
519	break;
520	}
521
522	return false;
523	}
524
525	bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
526	{
527	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
528
529	switch (term.atom.quantityType) {
530	case QuantifierFixedCount:
531	break;
532
533	case QuantifierGreedy:
534	if (backTrack->matchAmount) {
535	--backTrack->matchAmount;
536	input.uncheckInput(`1`);
537	return true;
538	}
539	break;
540
541	case QuantifierNonGreedy:
542	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
543	++backTrack->matchAmount;
544	if (checkCasedCharacter(loChar: term.atom.casedCharacter.lo, hiChar: term.atom.casedCharacter.hi, negativeInputOffset: term.inputPosition + `1`))
545	return true;
546	}
547	input.uncheckInput(backTrack->matchAmount);
548	break;
549	}
550
551	return false;
552	}
553
554	bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
555	{
556	ASSERT(term.type == ByteTerm::TypeCharacterClass);
557	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
558
559	switch (term.atom.quantityType) {
560	case QuantifierFixedCount: {
561	if (unicode) {
562	backTrack->begin = input.getPos();
563	unsigned matchAmount = `0`;
564	for (matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
565	if (!checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), negativeInputOffset: term.inputPosition - matchAmount)) {
566	input.setPos(backTrack->begin);
567	return false;
568	}
569	}
570
571	return true;
572	}
573
574	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
575	if (!checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), negativeInputOffset: term.inputPosition - matchAmount))
576	return false;
577	}
578	return true;
579	}
580
581	case QuantifierGreedy: {
582	unsigned position = input.getPos();
583	backTrack->begin = position;
584	unsigned matchAmount = `0`;
585	while ((matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
586	if (!checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), negativeInputOffset: term.inputPosition + `1`)) {
587	input.setPos(position);
588	break;
589	}
590	++matchAmount;
591	position = input.getPos();
592	}
593	backTrack->matchAmount = matchAmount;
594
595	return true;
596	}
597
598	case QuantifierNonGreedy:
599	backTrack->begin = input.getPos();
600	backTrack->matchAmount = `0`;
601	return true;
602	}
603
604	RELEASE_ASSERT_NOT_REACHED();
605	return false;
606	}
607
608	bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
609	{
610	ASSERT(term.type == ByteTerm::TypeCharacterClass);
611	BackTrackInfoCharacterClass* backTrack = reinterpret_cast<BackTrackInfoCharacterClass*>(context->frame + term.frameLocation);
612
613	switch (term.atom.quantityType) {
614	case QuantifierFixedCount:
615	if (unicode)
616	input.setPos(backTrack->begin);
617	break;
618
619	case QuantifierGreedy:
620	if (backTrack->matchAmount) {
621	if (unicode) {
622	// Rematch one less match
623	input.setPos(backTrack->begin);
624	--backTrack->matchAmount;
625	for (unsigned matchAmount = `0`; (matchAmount < backTrack->matchAmount) && input.checkInput(`1`); ++matchAmount) {
626	if (!checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), negativeInputOffset: term.inputPosition + `1`)) {
627	input.uncheckInput(`1`);
628	break;
629	}
630	}
631	return true;
632	}
633	--backTrack->matchAmount;
634	input.uncheckInput(`1`);
635	return true;
636	}
637	break;
638
639	case QuantifierNonGreedy:
640	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && input.checkInput(`1`)) {
641	++backTrack->matchAmount;
642	if (checkCharacterClass(characterClass: term.atom.characterClass, invert: term.invert(), negativeInputOffset: term.inputPosition + `1`))
643	return true;
644	}
645	input.setPos(backTrack->begin);
646	break;
647	}
648
649	return false;
650	}
651
652	bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
653	{
654	ASSERT(term.type == ByteTerm::TypeBackReference);
655	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
656
657	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
658	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
659
660	// If the end position of the referenced match hasn't set yet then the backreference in the same parentheses where it references to that.
661	// In this case the result of match is empty string like when it references to a parentheses with zero-width match.
662	// Eg.: /(a\1)/
663	if (matchEnd == offsetNoMatch)
664	return true;
665
666	if (matchBegin == offsetNoMatch)
667	return true;
668
669	ASSERT(matchBegin <= matchEnd);
670
671	if (matchBegin == matchEnd)
672	return true;
673
674	switch (term.atom.quantityType) {
675	case QuantifierFixedCount: {
676	backTrack->begin = input.getPos();
677	for (unsigned matchAmount = `0`; matchAmount < term.atom.quantityMaxCount; ++matchAmount) {
678	if (!tryConsumeBackReference(matchBegin, matchEnd, negativeInputOffset: term.inputPosition)) {
679	input.setPos(backTrack->begin);
680	return false;
681	}
682	}
683	return true;
684	}
685
686	case QuantifierGreedy: {
687	unsigned matchAmount = `0`;
688	while ((matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, negativeInputOffset: term.inputPosition))
689	++matchAmount;
690	backTrack->matchAmount = matchAmount;
691	return true;
692	}
693
694	case QuantifierNonGreedy:
695	backTrack->begin = input.getPos();
696	backTrack->matchAmount = `0`;
697	return true;
698	}
699
700	RELEASE_ASSERT_NOT_REACHED();
701	return false;
702	}
703
704	bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
705	{
706	ASSERT(term.type == ByteTerm::TypeBackReference);
707	BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
708
709	unsigned matchBegin = output[(term.atom.subpatternId << `1`)];
710	unsigned matchEnd = output[(term.atom.subpatternId << `1`) + `1`];
711
712	if (matchBegin == offsetNoMatch)
713	return false;
714
715	ASSERT(matchBegin <= matchEnd);
716
717	if (matchBegin == matchEnd)
718	return false;
719
720	switch (term.atom.quantityType) {
721	case QuantifierFixedCount:
722	// for quantityMaxCount == 1, could rewind.
723	input.setPos(backTrack->begin);
724	break;
725
726	case QuantifierGreedy:
727	if (backTrack->matchAmount) {
728	--backTrack->matchAmount;
729	input.rewind(matchEnd - matchBegin);
730	return true;
731	}
732	break;
733
734	case QuantifierNonGreedy:
735	if ((backTrack->matchAmount < term.atom.quantityMaxCount) && tryConsumeBackReference(matchBegin, matchEnd, negativeInputOffset: term.inputPosition)) {
736	++backTrack->matchAmount;
737	return true;
738	}
739	input.setPos(backTrack->begin);
740	break;
741	}
742
743	return false;
744	}
745
746	void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
747	{
748	if (term.capture()) {
749	unsigned subpatternId = term.atom.subpatternId;
750	output[(subpatternId << `1`)] = context->getDisjunctionContext(term)->matchBegin - term.inputPosition;
751	output[(subpatternId << `1`) + `1`] = context->getDisjunctionContext(term)->matchEnd - term.inputPosition;
752	}
753	}
754	void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
755	{
756	unsigned firstSubpatternId = term.atom.subpatternId;
757	unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
758	context->restoreOutput(output, firstSubpatternId, count);
759	}
760	JSRegExpResult parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
761	{
762	while (backTrack->matchAmount) {
763	ParenthesesDisjunctionContext* context = backTrack->lastContext;
764
765	JSRegExpResult result = matchDisjunction(disjunction: term.atom.parenthesesDisjunction, context: context->getDisjunctionContext(term), btrack: true);
766	if (result == JSRegExpMatch)
767	return JSRegExpMatch;
768
769	resetMatches(term, context);
770	popParenthesesDisjunctionContext(backTrack);
771	freeParenthesesDisjunctionContext(context);
772
773	if (result != JSRegExpNoMatch)
774	return result;
775	}
776
777	return JSRegExpNoMatch;
778	}
779
780	bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
781	{
782	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
783	ASSERT(term.atom.quantityMaxCount == `1`);
784
785	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
786
787	switch (term.atom.quantityType) {
788	case QuantifierGreedy: {
789	// set this speculatively; if we get to the parens end this will be true.
790	backTrack->begin = input.getPos();
791	break;
792	}
793	case QuantifierNonGreedy: {
794	backTrack->begin = notFound;
795	context->term += term.atom.parenthesesWidth;
796	return true;
797	}
798	case QuantifierFixedCount:
799	break;
800	}
801
802	if (term.capture()) {
803	unsigned subpatternId = term.atom.subpatternId;
804	output[(subpatternId << `1`)] = input.getPos() - term.inputPosition;
805	}
806
807	return true;
808	}
809
810	bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
811	{
812	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
813	ASSERT(term.atom.quantityMaxCount == `1`);
814
815	if (term.capture()) {
816	unsigned subpatternId = term.atom.subpatternId;
817	output[(subpatternId << `1`) + `1`] = input.getPos() - term.inputPosition;
818	}
819
820	if (term.atom.quantityType == QuantifierFixedCount)
821	return true;
822
823	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
824	return backTrack->begin != input.getPos();
825	}
826
827	bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
828	{
829	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
830	ASSERT(term.atom.quantityMaxCount == `1`);
831
832	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
833
834	if (term.capture()) {
835	unsigned subpatternId = term.atom.subpatternId;
836	output[(subpatternId << `1`)] = offsetNoMatch;
837	output[(subpatternId << `1`) + `1`] = offsetNoMatch;
838	}
839
840	switch (term.atom.quantityType) {
841	case QuantifierGreedy:
842	// if we backtrack to this point, there is another chance - try matching nothing.
843	ASSERT(backTrack->begin != notFound);
844	backTrack->begin = notFound;
845	context->term += term.atom.parenthesesWidth;
846	return true;
847	case QuantifierNonGreedy:
848	ASSERT(backTrack->begin != notFound);
849	FALLTHROUGH;
850	case QuantifierFixedCount:
851	break;
852	}
853
854	return false;
855	}
856
857	bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
858	{
859	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
860	ASSERT(term.atom.quantityMaxCount == `1`);
861
862	BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
863
864	switch (term.atom.quantityType) {
865	case QuantifierGreedy:
866	if (backTrack->begin == notFound) {
867	context->term -= term.atom.parenthesesWidth;
868	return false;
869	}
870	FALLTHROUGH;
871	case QuantifierNonGreedy:
872	if (backTrack->begin == notFound) {
873	backTrack->begin = input.getPos();
874	if (term.capture()) {
875	// Technically this access to inputPosition should be accessing the begin term's
876	// inputPosition, but for repeats other than fixed these values should be
877	// the same anyway! (We don't pre-check for greedy or non-greedy matches.)
878	ASSERT((&term - term.atom.parenthesesWidth)->type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
879	ASSERT((&term - term.atom.parenthesesWidth)->inputPosition == term.inputPosition);
880	unsigned subpatternId = term.atom.subpatternId;
881	output[subpatternId << `1`] = input.getPos() - term.inputPosition;
882	}
883	context->term -= term.atom.parenthesesWidth;
884	return true;
885	}
886	FALLTHROUGH;
887	case QuantifierFixedCount:
888	break;
889	}
890
891	return false;
892	}
893
894	bool matchParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
895	{
896	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
897	ASSERT(term.atom.quantityType == QuantifierGreedy);
898	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
899	ASSERT(!term.capture());
900
901	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
902	backTrack->begin = input.getPos();
903	return true;
904	}
905
906	bool matchParenthesesTerminalEnd(ByteTerm& term, DisjunctionContext* context)
907	{
908	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalEnd);
909
910	BackTrackInfoParenthesesTerminal* backTrack = reinterpret_cast<BackTrackInfoParenthesesTerminal*>(context->frame + term.frameLocation);
911	// Empty match is a failed match.
912	if (backTrack->begin == input.getPos())
913	return false;
914
915	// Successful match! Okay, what's next? - loop around and try to match more!
916	context->term -= (term.atom.parenthesesWidth + `1`);
917	return true;
918	}
919
920	bool backtrackParenthesesTerminalBegin(ByteTerm& term, DisjunctionContext* context)
921	{
922	ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
923	ASSERT(term.atom.quantityType == QuantifierGreedy);
924	ASSERT(term.atom.quantityMaxCount == quantifyInfinite);
925	ASSERT(!term.capture());
926
927	// If we backtrack to this point, we have failed to match this iteration of the parens.
928	// Since this is greedy / zero minimum a failed is also accepted as a match!
929	context->term += term.atom.parenthesesWidth;
930	return true;
931	}
932
933	bool backtrackParenthesesTerminalEnd(ByteTerm&, DisjunctionContext*)
934	{
935	// 'Terminal' parentheses are at the end of the regex, and as such a match past end
936	// should always be returned as a successful match - we should never backtrack to here.
937	RELEASE_ASSERT_NOT_REACHED();
938	return false;
939	}
940
941	bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
942	{
943	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
944	ASSERT(term.atom.quantityMaxCount == `1`);
945
946	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
947
948	backTrack->begin = input.getPos();
949	return true;
950	}
951
952	bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
953	{
954	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
955	ASSERT(term.atom.quantityMaxCount == `1`);
956
957	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
958
959	input.setPos(backTrack->begin);
960
961	// We've reached the end of the parens; if they are inverted, this is failure.
962	if (term.invert()) {
963	context->term -= term.atom.parenthesesWidth;
964	return false;
965	}
966
967	return true;
968	}
969
970	bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
971	{
972	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
973	ASSERT(term.atom.quantityMaxCount == `1`);
974
975	// We've failed to match parens; if they are inverted, this is win!
976	if (term.invert()) {
977	context->term += term.atom.parenthesesWidth;
978	return true;
979	}
980
981	return false;
982	}
983
984	bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
985	{
986	ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
987	ASSERT(term.atom.quantityMaxCount == `1`);
988
989	BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
990
991	input.setPos(backTrack->begin);
992
993	context->term -= term.atom.parenthesesWidth;
994	return false;
995	}
996
997	JSRegExpResult matchParentheses(ByteTerm& term, DisjunctionContext* context)
998	{
999	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1000
1001	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1002	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1003
1004	backTrack->matchAmount = `0`;
1005	backTrack->lastContext = `0`;
1006
1007	ASSERT(term.atom.quantityType != QuantifierFixedCount \|\| term.atom.quantityMinCount == term.atom.quantityMaxCount);
1008
1009	unsigned minimumMatchCount = term.atom.quantityMinCount;
1010	JSRegExpResult fixedMatchResult;
1011
1012	// Handle fixed matches and the minimum part of a variable length match.
1013	if (minimumMatchCount) {
1014	// While we haven't yet reached our fixed limit,
1015	while (backTrack->matchAmount < minimumMatchCount) {
1016	// Try to do a match, and it it succeeds, add it to the list.
1017	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term);
1018	fixedMatchResult = matchDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term));
1019	if (fixedMatchResult == JSRegExpMatch)
1020	appendParenthesesDisjunctionContext(backTrack, context);
1021	else {
1022	// The match failed; try to find an alternate point to carry on from.
1023	resetMatches(term, context);
1024	freeParenthesesDisjunctionContext(context);
1025
1026	if (fixedMatchResult != JSRegExpNoMatch)
1027	return fixedMatchResult;
1028	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1029	if (backtrackResult != JSRegExpMatch)
1030	return backtrackResult;
1031	}
1032	}
1033
1034	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1035	recordParenthesesMatch(term, context);
1036	}
1037
1038	switch (term.atom.quantityType) {
1039	case QuantifierFixedCount: {
1040	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1041	return JSRegExpMatch;
1042	}
1043
1044	case QuantifierGreedy: {
1045	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1046	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term);
1047	JSRegExpResult result = matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term));
1048	if (result == JSRegExpMatch)
1049	appendParenthesesDisjunctionContext(backTrack, context);
1050	else {
1051	resetMatches(term, context);
1052	freeParenthesesDisjunctionContext(context);
1053
1054	if (result != JSRegExpNoMatch)
1055	return result;
1056
1057	break;
1058	}
1059	}
1060
1061	if (backTrack->matchAmount) {
1062	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1063	recordParenthesesMatch(term, context);
1064	}
1065	return JSRegExpMatch;
1066	}
1067
1068	case QuantifierNonGreedy:
1069	return JSRegExpMatch;
1070	}
1071
1072	RELEASE_ASSERT_NOT_REACHED();
1073	return JSRegExpErrorNoMatch;
1074	}
1075
1076	// Rules for backtracking differ depending on whether this is greedy or non-greedy.
1077	//
1078	// Greedy matches never should try just adding more - you should already have done
1079	// the 'more' cases. Always backtrack, at least a leetle bit. However cases where
1080	// you backtrack an item off the list needs checking, since we'll never have matched
1081	// the one less case. Tracking forwards, still add as much as possible.
1082	//
1083	// Non-greedy, we've already done the one less case, so don't match on popping.
1084	// We haven't done the one more case, so always try to add that.
1085	//
1086	JSRegExpResult backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
1087	{
1088	ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
1089
1090	BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
1091	ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
1092
1093	switch (term.atom.quantityType) {
1094	case QuantifierFixedCount: {
1095	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1096
1097	ParenthesesDisjunctionContext* context = `0`;
1098	JSRegExpResult result = parenthesesDoBacktrack(term, backTrack);
1099
1100	if (result != JSRegExpMatch)
1101	return result;
1102
1103	// While we haven't yet reached our fixed limit,
1104	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1105	// Try to do a match, and it it succeeds, add it to the list.
1106	context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term);
1107	result = matchDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term));
1108
1109	if (result == JSRegExpMatch)
1110	appendParenthesesDisjunctionContext(backTrack, context);
1111	else {
1112	// The match failed; try to find an alternate point to carry on from.
1113	resetMatches(term, context);
1114	freeParenthesesDisjunctionContext(context);
1115
1116	if (result != JSRegExpNoMatch)
1117	return result;
1118	JSRegExpResult backtrackResult = parenthesesDoBacktrack(term, backTrack);
1119	if (backtrackResult != JSRegExpMatch)
1120	return backtrackResult;
1121	}
1122	}
1123
1124	ASSERT(backTrack->matchAmount == term.atom.quantityMaxCount);
1125	context = backTrack->lastContext;
1126	recordParenthesesMatch(term, context);
1127	return JSRegExpMatch;
1128	}
1129
1130	case QuantifierGreedy: {
1131	if (!backTrack->matchAmount)
1132	return JSRegExpNoMatch;
1133
1134	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1135	JSRegExpResult result = matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term), btrack: true);
1136	if (result == JSRegExpMatch) {
1137	while (backTrack->matchAmount < term.atom.quantityMaxCount) {
1138	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term);
1139	JSRegExpResult parenthesesResult = matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term));
1140	if (parenthesesResult == JSRegExpMatch)
1141	appendParenthesesDisjunctionContext(backTrack, context);
1142	else {
1143	resetMatches(term, context);
1144	freeParenthesesDisjunctionContext(context);
1145
1146	if (parenthesesResult != JSRegExpNoMatch)
1147	return parenthesesResult;
1148
1149	break;
1150	}
1151	}
1152	} else {
1153	resetMatches(term, context);
1154	popParenthesesDisjunctionContext(backTrack);
1155	freeParenthesesDisjunctionContext(context);
1156
1157	if (result != JSRegExpNoMatch \|\| backTrack->matchAmount < term.atom.quantityMinCount)
1158	return result;
1159	}
1160
1161	if (backTrack->matchAmount) {
1162	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1163	recordParenthesesMatch(term, context);
1164	}
1165	return JSRegExpMatch;
1166	}
1167
1168	case QuantifierNonGreedy: {
1169	// If we've not reached the limit, try to add one more match.
1170	if (backTrack->matchAmount < term.atom.quantityMaxCount) {
1171	ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunction: disjunctionBody, output, term);
1172	JSRegExpResult result = matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term));
1173	if (result == JSRegExpMatch) {
1174	appendParenthesesDisjunctionContext(backTrack, context);
1175	recordParenthesesMatch(term, context);
1176	return JSRegExpMatch;
1177	}
1178
1179	resetMatches(term, context);
1180	freeParenthesesDisjunctionContext(context);
1181
1182	if (result != JSRegExpNoMatch)
1183	return result;
1184	}
1185
1186	// Nope - okay backtrack looking for an alternative.
1187	while (backTrack->matchAmount) {
1188	ParenthesesDisjunctionContext* context = backTrack->lastContext;
1189	JSRegExpResult result = matchNonZeroDisjunction(disjunction: disjunctionBody, context: context->getDisjunctionContext(term), btrack: true);
1190	if (result == JSRegExpMatch) {
1191	// successful backtrack! we're back in the game!
1192	if (backTrack->matchAmount) {
1193	context = backTrack->lastContext;
1194	recordParenthesesMatch(term, context);
1195	}
1196	return JSRegExpMatch;
1197	}
1198
1199	// pop a match off the stack
1200	resetMatches(term, context);
1201	popParenthesesDisjunctionContext(backTrack);
1202	freeParenthesesDisjunctionContext(context);
1203
1204	if (result != JSRegExpNoMatch)
1205	return result;
1206	}
1207
1208	return JSRegExpNoMatch;
1209	}
1210	}
1211
1212	RELEASE_ASSERT_NOT_REACHED();
1213	return JSRegExpErrorNoMatch;
1214	}
1215
1216	bool matchDotStarEnclosure(ByteTerm& term, DisjunctionContext* context)
1217	{
1218	UNUSED_PARAM(term);
1219
1220	if (pattern->dotAll()) {
1221	context->matchBegin = startOffset;
1222	context->matchEnd = input.end();
1223	return true;
1224	}
1225
1226	unsigned matchBegin = context->matchBegin;
1227
1228	if (matchBegin > startOffset) {
1229	for (matchBegin--; true; matchBegin--) {
1230	if (testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.reread(matchBegin))) {
1231	++matchBegin;
1232	break;
1233	}
1234
1235	if (matchBegin == startOffset)
1236	break;
1237	}
1238	}
1239
1240	unsigned matchEnd = input.getPos();
1241
1242	for (; (matchEnd != input.end())
1243	&& (!testCharacterClass(characterClass: pattern->newlineCharacterClass, ch: input.reread(matchEnd))); matchEnd++) { }
1244
1245	if (((matchBegin && term.anchors.m_bol)
1246	\|\| ((matchEnd != input.end()) && term.anchors.m_eol))
1247	&& !pattern->multiline())
1248	return false;
1249
1250	context->matchBegin = matchBegin;
1251	context->matchEnd = matchEnd;
1252	return true;
1253	}
1254
1255	#define MATCH_NEXT() { ++context->term; goto matchAgain; }
1256	#define BACKTRACK() { --context->term; goto backtrack; }
1257	#define currentTerm() (disjunction->terms[context->term])
1258	JSRegExpResult matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1259	{
1260	if (!--remainingMatchCount)
1261	return JSRegExpErrorHitLimit;
1262
1263	if (btrack)
1264	BACKTRACK();
1265
1266	context->matchBegin = input.getPos();
1267	context->term = `0`;
1268
1269	matchAgain:
1270	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1271
1272	switch (currentTerm().type) {
1273	case ByteTerm::TypeSubpatternBegin:
1274	MATCH_NEXT();
1275	case ByteTerm::TypeSubpatternEnd:
1276	context->matchEnd = input.getPos();
1277	return JSRegExpMatch;
1278
1279	case ByteTerm::TypeBodyAlternativeBegin:
1280	MATCH_NEXT();
1281	case ByteTerm::TypeBodyAlternativeDisjunction:
1282	case ByteTerm::TypeBodyAlternativeEnd:
1283	context->matchEnd = input.getPos();
1284	return JSRegExpMatch;
1285
1286	case ByteTerm::TypeAlternativeBegin:
1287	MATCH_NEXT();
1288	case ByteTerm::TypeAlternativeDisjunction:
1289	case ByteTerm::TypeAlternativeEnd: {
1290	int offset = currentTerm().alternative.end;
1291	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1292	backTrack->offset = offset;
1293	context->term += offset;
1294	MATCH_NEXT();
1295	}
1296
1297	case ByteTerm::TypeAssertionBOL:
1298	if (matchAssertionBOL(currentTerm()))
1299	MATCH_NEXT();
1300	BACKTRACK();
1301	case ByteTerm::TypeAssertionEOL:
1302	if (matchAssertionEOL(currentTerm()))
1303	MATCH_NEXT();
1304	BACKTRACK();
1305	case ByteTerm::TypeAssertionWordBoundary:
1306	if (matchAssertionWordBoundary(currentTerm()))
1307	MATCH_NEXT();
1308	BACKTRACK();
1309
1310	case ByteTerm::TypePatternCharacterOnce:
1311	case ByteTerm::TypePatternCharacterFixed: {
1312	if (unicode) {
1313	if (!U_IS_BMP(currentTerm().atom.patternCharacter)) {
1314	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1315	if (!checkSurrogatePair(currentTerm().atom.patternCharacter, currentTerm().inputPosition - `2` * matchAmount)) {
1316	BACKTRACK();
1317	}
1318	}
1319	MATCH_NEXT();
1320	}
1321	}
1322	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1323
1324	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1325	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - matchAmount)) {
1326	input.setPos(position);
1327	BACKTRACK();
1328	}
1329	}
1330	MATCH_NEXT();
1331	}
1332	case ByteTerm::TypePatternCharacterGreedy: {
1333	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1334	unsigned matchAmount = `0`;
1335	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1336	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1337	if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + `1`)) {
1338	input.setPos(position);
1339	break;
1340	}
1341	++matchAmount;
1342	position = input.getPos();
1343	}
1344	backTrack->matchAmount = matchAmount;
1345
1346	MATCH_NEXT();
1347	}
1348	case ByteTerm::TypePatternCharacterNonGreedy: {
1349	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1350	backTrack->begin = input.getPos();
1351	backTrack->matchAmount = `0`;
1352	MATCH_NEXT();
1353	}
1354
1355	case ByteTerm::TypePatternCasedCharacterOnce:
1356	case ByteTerm::TypePatternCasedCharacterFixed: {
1357	if (unicode) {
1358	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1359	ASSERT(U_IS_BMP(currentTerm().atom.patternCharacter));
1360
1361	unsigned position = input.getPos(); // May need to back out reading a surrogate pair.
1362
1363	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1364	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount)) {
1365	input.setPos(position);
1366	BACKTRACK();
1367	}
1368	}
1369	MATCH_NEXT();
1370	}
1371
1372	for (unsigned matchAmount = `0`; matchAmount < currentTerm().atom.quantityMaxCount; ++matchAmount) {
1373	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - matchAmount))
1374	BACKTRACK();
1375	}
1376	MATCH_NEXT();
1377	}
1378	case ByteTerm::TypePatternCasedCharacterGreedy: {
1379	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1380
1381	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1382	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1383
1384	unsigned matchAmount = `0`;
1385	while ((matchAmount < currentTerm().atom.quantityMaxCount) && input.checkInput(`1`)) {
1386	if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + `1`)) {
1387	input.uncheckInput(`1`);
1388	break;
1389	}
1390	++matchAmount;
1391	}
1392	backTrack->matchAmount = matchAmount;
1393
1394	MATCH_NEXT();
1395	}
1396	case ByteTerm::TypePatternCasedCharacterNonGreedy: {
1397	BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
1398
1399	// Case insensitive matching of unicode characters is handled as TypeCharacterClass.
1400	ASSERT(!unicode \|\| U_IS_BMP(currentTerm().atom.patternCharacter));
1401
1402	backTrack->matchAmount = `0`;
1403	MATCH_NEXT();
1404	}
1405
1406	case ByteTerm::TypeCharacterClass:
1407	if (matchCharacterClass(currentTerm(), context))
1408	MATCH_NEXT();
1409	BACKTRACK();
1410	case ByteTerm::TypeBackReference:
1411	if (matchBackReference(currentTerm(), context))
1412	MATCH_NEXT();
1413	BACKTRACK();
1414	case ByteTerm::TypeParenthesesSubpattern: {
1415	JSRegExpResult result = matchParentheses(currentTerm(), context);
1416
1417	if (result == JSRegExpMatch) {
1418	MATCH_NEXT();
1419	} else if (result != JSRegExpNoMatch)
1420	return result;
1421
1422	BACKTRACK();
1423	}
1424	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1425	if (matchParenthesesOnceBegin(currentTerm(), context))
1426	MATCH_NEXT();
1427	BACKTRACK();
1428	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1429	if (matchParenthesesOnceEnd(currentTerm(), context))
1430	MATCH_NEXT();
1431	BACKTRACK();
1432	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1433	if (matchParenthesesTerminalBegin(currentTerm(), context))
1434	MATCH_NEXT();
1435	BACKTRACK();
1436	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1437	if (matchParenthesesTerminalEnd(currentTerm(), context))
1438	MATCH_NEXT();
1439	BACKTRACK();
1440	case ByteTerm::TypeParentheticalAssertionBegin:
1441	if (matchParentheticalAssertionBegin(currentTerm(), context))
1442	MATCH_NEXT();
1443	BACKTRACK();
1444	case ByteTerm::TypeParentheticalAssertionEnd:
1445	if (matchParentheticalAssertionEnd(currentTerm(), context))
1446	MATCH_NEXT();
1447	BACKTRACK();
1448
1449	case ByteTerm::TypeCheckInput:
1450	if (input.checkInput(currentTerm().checkInputCount))
1451	MATCH_NEXT();
1452	BACKTRACK();
1453
1454	case ByteTerm::TypeUncheckInput:
1455	input.uncheckInput(currentTerm().checkInputCount);
1456	MATCH_NEXT();
1457
1458	case ByteTerm::TypeDotStarEnclosure:
1459	if (matchDotStarEnclosure(currentTerm(), context))
1460	return JSRegExpMatch;
1461	BACKTRACK();
1462	}
1463
1464	// We should never fall-through to here.
1465	RELEASE_ASSERT_NOT_REACHED();
1466
1467	backtrack:
1468	ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
1469
1470	switch (currentTerm().type) {
1471	case ByteTerm::TypeSubpatternBegin:
1472	return JSRegExpNoMatch;
1473	case ByteTerm::TypeSubpatternEnd:
1474	RELEASE_ASSERT_NOT_REACHED();
1475
1476	case ByteTerm::TypeBodyAlternativeBegin:
1477	case ByteTerm::TypeBodyAlternativeDisjunction: {
1478	int offset = currentTerm().alternative.next;
1479	context->term += offset;
1480	if (offset > `0`)
1481	MATCH_NEXT();
1482
1483	if (input.atEnd() \|\| pattern->sticky())
1484	return JSRegExpNoMatch;
1485
1486	input.next();
1487
1488	context->matchBegin = input.getPos();
1489
1490	if (currentTerm().alternative.onceThrough)
1491	context->term += currentTerm().alternative.next;
1492
1493	MATCH_NEXT();
1494	}
1495	case ByteTerm::TypeBodyAlternativeEnd:
1496	RELEASE_ASSERT_NOT_REACHED();
1497
1498	case ByteTerm::TypeAlternativeBegin:
1499	case ByteTerm::TypeAlternativeDisjunction: {
1500	int offset = currentTerm().alternative.next;
1501	context->term += offset;
1502	if (offset > `0`)
1503	MATCH_NEXT();
1504	BACKTRACK();
1505	}
1506	case ByteTerm::TypeAlternativeEnd: {
1507	// We should never backtrack back into an alternative of the main body of the regex.
1508	BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
1509	unsigned offset = backTrack->offset;
1510	context->term -= offset;
1511	BACKTRACK();
1512	}
1513
1514	case ByteTerm::TypeAssertionBOL:
1515	case ByteTerm::TypeAssertionEOL:
1516	case ByteTerm::TypeAssertionWordBoundary:
1517	BACKTRACK();
1518
1519	case ByteTerm::TypePatternCharacterOnce:
1520	case ByteTerm::TypePatternCharacterFixed:
1521	case ByteTerm::TypePatternCharacterGreedy:
1522	case ByteTerm::TypePatternCharacterNonGreedy:
1523	if (backtrackPatternCharacter(currentTerm(), context))
1524	MATCH_NEXT();
1525	BACKTRACK();
1526	case ByteTerm::TypePatternCasedCharacterOnce:
1527	case ByteTerm::TypePatternCasedCharacterFixed:
1528	case ByteTerm::TypePatternCasedCharacterGreedy:
1529	case ByteTerm::TypePatternCasedCharacterNonGreedy:
1530	if (backtrackPatternCasedCharacter(currentTerm(), context))
1531	MATCH_NEXT();
1532	BACKTRACK();
1533	case ByteTerm::TypeCharacterClass:
1534	if (backtrackCharacterClass(currentTerm(), context))
1535	MATCH_NEXT();
1536	BACKTRACK();
1537	case ByteTerm::TypeBackReference:
1538	if (backtrackBackReference(currentTerm(), context))
1539	MATCH_NEXT();
1540	BACKTRACK();
1541	case ByteTerm::TypeParenthesesSubpattern: {
1542	JSRegExpResult result = backtrackParentheses(currentTerm(), context);
1543
1544	if (result == JSRegExpMatch) {
1545	MATCH_NEXT();
1546	} else if (result != JSRegExpNoMatch)
1547	return result;
1548
1549	BACKTRACK();
1550	}
1551	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
1552	if (backtrackParenthesesOnceBegin(currentTerm(), context))
1553	MATCH_NEXT();
1554	BACKTRACK();
1555	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
1556	if (backtrackParenthesesOnceEnd(currentTerm(), context))
1557	MATCH_NEXT();
1558	BACKTRACK();
1559	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
1560	if (backtrackParenthesesTerminalBegin(currentTerm(), context))
1561	MATCH_NEXT();
1562	BACKTRACK();
1563	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
1564	if (backtrackParenthesesTerminalEnd(currentTerm(), context))
1565	MATCH_NEXT();
1566	BACKTRACK();
1567	case ByteTerm::TypeParentheticalAssertionBegin:
1568	if (backtrackParentheticalAssertionBegin(currentTerm(), context))
1569	MATCH_NEXT();
1570	BACKTRACK();
1571	case ByteTerm::TypeParentheticalAssertionEnd:
1572	if (backtrackParentheticalAssertionEnd(currentTerm(), context))
1573	MATCH_NEXT();
1574	BACKTRACK();
1575
1576	case ByteTerm::TypeCheckInput:
1577	input.uncheckInput(currentTerm().checkInputCount);
1578	BACKTRACK();
1579
1580	case ByteTerm::TypeUncheckInput:
1581	input.checkInput(currentTerm().checkInputCount);
1582	BACKTRACK();
1583
1584	case ByteTerm::TypeDotStarEnclosure:
1585	RELEASE_ASSERT_NOT_REACHED();
1586	}
1587
1588	RELEASE_ASSERT_NOT_REACHED();
1589	return JSRegExpErrorNoMatch;
1590	}
1591
1592	JSRegExpResult matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
1593	{
1594	JSRegExpResult result = matchDisjunction(disjunction, context, btrack);
1595
1596	if (result == JSRegExpMatch) {
1597	while (context->matchBegin == context->matchEnd) {
1598	result = matchDisjunction(disjunction, context, btrack: true);
1599	if (result != JSRegExpMatch)
1600	return result;
1601	}
1602	return JSRegExpMatch;
1603	}
1604
1605	return result;
1606	}
1607
1608	unsigned interpret()
1609	{
1610	if (!input.isAvailableInput(`0`))
1611	return offsetNoMatch;
1612
1613	if (pattern->m_lock)
1614	pattern->m_lock->lock();
1615
1616	for (unsigned i = `0`; i < pattern->m_body ->m_numSubpatterns + `1`; ++i)
1617	output[i << `1`] = offsetNoMatch;
1618
1619	allocatorPool = pattern->m_allocator->startAllocator();
1620	RELEASE_ASSERT(allocatorPool);
1621
1622	DisjunctionContext* context = allocDisjunctionContext(disjunction: pattern->m_body.get());
1623
1624	JSRegExpResult result = matchDisjunction(disjunction: pattern->m_body.get(), context, btrack: false);
1625	if (result == JSRegExpMatch) {
1626	output[`0`] = context->matchBegin;
1627	output[`1`] = context->matchEnd;
1628	}
1629
1630	freeDisjunctionContext(context);
1631
1632	pattern->m_allocator->stopAllocator();
1633
1634	ASSERT((result == JSRegExpMatch) == (output[`0`] != offsetNoMatch));
1635
1636	if (pattern->m_lock)
1637	pattern->m_lock->unlock();
1638
1639	return output[`0`];
1640	}
1641
1642	Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
1643	: pattern(pattern)
1644	, unicode(pattern->unicode())
1645	, output(output)
1646	, input(input, start, length, pattern->unicode())
1647	, startOffset(start)
1648	, remainingMatchCount(matchLimit)
1649	{
1650	}
1651
1652	private:
1653	BytecodePattern* pattern;
1654	bool unicode;
1655	unsigned* output;
1656	InputStream input;
1657	WTF::BumpPointerPool* allocatorPool { nullptr };
1658	unsigned startOffset;
1659	unsigned remainingMatchCount;
1660	};
1661
1662	class ByteCompiler {
1663	struct ParenthesesStackEntry {
1664	unsigned beginTerm;
1665	unsigned savedAlternativeIndex;
1666	ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/, unsigned subpatternId, bool capture = false/)
1667	: beginTerm(beginTerm)
1668	, savedAlternativeIndex(savedAlternativeIndex)
1669	{
1670	}
1671	};
1672
1673	public:
1674	ByteCompiler(YarrPattern& pattern)
1675	: m_pattern(pattern)
1676	{
1677	m_currentAlternativeIndex = `0`;
1678	}
1679
1680	std::unique_ptr<BytecodePattern> compile(BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
1681	{
1682	regexBegin(numSubpatterns: m_pattern.m_numSubpatterns, callFrameSize: m_pattern.m_body->m_callFrameSize, onceThrough: m_pattern.m_body->m_alternatives [`0`]->onceThrough());
1683	emitDisjunction(disjunction: m_pattern.m_body);
1684	regexEnd();
1685
1686	#ifndef NDEBUG
1687	if (Options::dumpCompiledRegExpPatterns())
1688	dumpDisjunction(disjunction: m_bodyDisjunction.get());
1689	#endif
1690
1691	return std::make_unique<BytecodePattern>(WTFMove(m_bodyDisjunction), args&: m_allParenthesesInfo, args&: m_pattern, args&: allocator, args&: lock);
1692	}
1693
1694	void checkInput(unsigned count)
1695	{
1696	m_bodyDisjunction ->terms.append(other: ByteTerm::CheckInput(count));
1697	}
1698
1699	void uncheckInput(unsigned count)
1700	{
1701	m_bodyDisjunction ->terms.append(other: ByteTerm::UncheckInput(count));
1702	}
1703
1704	void assertionBOL(unsigned inputPosition)
1705	{
1706	m_bodyDisjunction ->terms.append(other: ByteTerm::BOL(inputPos: inputPosition));
1707	}
1708
1709	void assertionEOL(unsigned inputPosition)
1710	{
1711	m_bodyDisjunction ->terms.append(other: ByteTerm::EOL(inputPos: inputPosition));
1712	}
1713
1714	void assertionWordBoundary(bool invert, unsigned inputPosition)
1715	{
1716	m_bodyDisjunction ->terms.append(other: ByteTerm::WordBoundary(invert, inputPos: inputPosition));
1717	}
1718
1719	void atomPatternCharacter(UChar32 ch, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1720	{
1721	if (m_pattern.ignoreCase()) {
1722	UChar32 lo = u_tolower(ch);
1723	UChar32 hi = u_toupper(ch);
1724
1725	if (lo != hi) {
1726	m_bodyDisjunction ->terms.append(other: ByteTerm (lo, hi, inputPosition, frameLocation, quantityMaxCount, quantityType));
1727	return;
1728	}
1729	}
1730
1731	m_bodyDisjunction ->terms.append(other: ByteTerm (ch, inputPosition, frameLocation, quantityMaxCount, quantityType));
1732	}
1733
1734	void atomCharacterClass(CharacterClass* characterClass, bool invert, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1735	{
1736	m_bodyDisjunction ->terms.append(other: ByteTerm (characterClass, invert, inputPosition));
1737
1738	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1739	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityType = quantityType;
1740	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1741	}
1742
1743	void atomBackReference(unsigned subpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1744	{
1745	ASSERT(subpatternId);
1746
1747	m_bodyDisjunction ->terms.append(other: ByteTerm::BackReference(subpatternId, inputPos: inputPosition));
1748
1749	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1750	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].atom.quantityType = quantityType;
1751	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1752	}
1753
1754	void atomParenthesesOnceBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1755	{
1756	int beginTerm = m_bodyDisjunction ->terms.size();
1757
1758	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1759	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1760	m_bodyDisjunction ->terms.append(other: ByteTerm::AlternativeBegin());
1761	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1762
1763	m_parenthesesStack.append(other: ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1764	m_currentAlternativeIndex = beginTerm + `1`;
1765	}
1766
1767	void atomParenthesesTerminalBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1768	{
1769	int beginTerm = m_bodyDisjunction ->terms.size();
1770
1771	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalBegin, subpatternId, capture, false, inputPosition));
1772	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1773	m_bodyDisjunction ->terms.append(other: ByteTerm::AlternativeBegin());
1774	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1775
1776	m_parenthesesStack.append(other: ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1777	m_currentAlternativeIndex = beginTerm + `1`;
1778	}
1779
1780	void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, unsigned inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
1781	{
1782	// Errrk! - this is a little crazy, we initially generate as a TypeParenthesesSubpatternOnceBegin,
1783	// then fix this up at the end! - simplifying this should make it much clearer.
1784	// https://bugs.webkit.org/show_bug.cgi?id=50136
1785
1786	int beginTerm = m_bodyDisjunction ->terms.size();
1787
1788	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, false, inputPosition));
1789	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1790	m_bodyDisjunction ->terms.append(other: ByteTerm::AlternativeBegin());
1791	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1792
1793	m_parenthesesStack.append(other: ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1794	m_currentAlternativeIndex = beginTerm + `1`;
1795	}
1796
1797	void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
1798	{
1799	int beginTerm = m_bodyDisjunction ->terms.size();
1800
1801	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParentheticalAssertionBegin, subpatternId, false, invert, `0`));
1802	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = frameLocation;
1803	m_bodyDisjunction ->terms.append(other: ByteTerm::AlternativeBegin());
1804	m_bodyDisjunction ->terms [m_bodyDisjunction ->terms.size() - `1`].frameLocation = alternativeFrameLocation;
1805
1806	m_parenthesesStack.append(other: ParenthesesStackEntry (beginTerm, m_currentAlternativeIndex));
1807	m_currentAlternativeIndex = beginTerm + `1`;
1808	}
1809
1810	void atomParentheticalAssertionEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1811	{
1812	unsigned beginTerm = popParenthesesStack();
1813	closeAlternative(beginTerm: beginTerm + `1`);
1814	unsigned endTerm = m_bodyDisjunction ->terms.size();
1815
1816	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin);
1817
1818	bool invert = m_bodyDisjunction ->terms [beginTerm].invert();
1819	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1820
1821	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParentheticalAssertionEnd, subpatternId, false, invert, inputPosition));
1822	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1823	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1824	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1825
1826	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1827	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1828	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1829	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1830	}
1831
1832	void assertionDotStarEnclosure(bool bolAnchored, bool eolAnchored)
1833	{
1834	m_bodyDisjunction ->terms.append(other: ByteTerm::DotStarEnclosure(bolAnchor: bolAnchored, eolAnchor: eolAnchored));
1835	}
1836
1837	unsigned popParenthesesStack()
1838	{
1839	ASSERT(m_parenthesesStack.size());
1840	int stackEnd = m_parenthesesStack.size() - `1`;
1841	unsigned beginTerm = m_parenthesesStack [stackEnd].beginTerm;
1842	m_currentAlternativeIndex = m_parenthesesStack [stackEnd].savedAlternativeIndex;
1843	m_parenthesesStack.shrink(size: stackEnd);
1844
1845	ASSERT(beginTerm < m_bodyDisjunction ->terms.size());
1846	ASSERT(m_currentAlternativeIndex < m_bodyDisjunction ->terms.size());
1847
1848	return beginTerm;
1849	}
1850
1851	void closeAlternative(int beginTerm)
1852	{
1853	int origBeginTerm = beginTerm;
1854	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
1855	int endIndex = m_bodyDisjunction ->terms.size();
1856
1857	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1858
1859	if (!m_bodyDisjunction ->terms [beginTerm].alternative.next)
1860	m_bodyDisjunction ->terms.remove(position: beginTerm);
1861	else {
1862	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1863	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1864	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
1865	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1866	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1867	}
1868
1869	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1870
1871	m_bodyDisjunction ->terms.append(other: ByteTerm::AlternativeEnd());
1872	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1873	}
1874	}
1875
1876	void closeBodyAlternative()
1877	{
1878	int beginTerm = `0`;
1879	int origBeginTerm = `0`;
1880	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
1881	int endIndex = m_bodyDisjunction ->terms.size();
1882
1883	unsigned frameLocation = m_bodyDisjunction ->terms [beginTerm].frameLocation;
1884
1885	while (m_bodyDisjunction ->terms [beginTerm].alternative.next) {
1886	beginTerm += m_bodyDisjunction ->terms [beginTerm].alternative.next;
1887	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
1888	m_bodyDisjunction ->terms [beginTerm].alternative.end = endIndex - beginTerm;
1889	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1890	}
1891
1892	m_bodyDisjunction ->terms [beginTerm].alternative.next = origBeginTerm - beginTerm;
1893
1894	m_bodyDisjunction ->terms.append(other: ByteTerm::BodyAlternativeEnd());
1895	m_bodyDisjunction ->terms [endIndex].frameLocation = frameLocation;
1896	}
1897
1898	void atomParenthesesSubpatternEnd(unsigned lastSubpatternId, unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType, unsigned callFrameSize = `0`)
1899	{
1900	unsigned beginTerm = popParenthesesStack();
1901	closeAlternative(beginTerm: beginTerm + `1`);
1902	unsigned endTerm = m_bodyDisjunction ->terms.size();
1903
1904	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1905
1906	ByteTerm& parenthesesBegin = m_bodyDisjunction ->terms [beginTerm];
1907
1908	bool capture = parenthesesBegin.capture();
1909	unsigned subpatternId = parenthesesBegin.atom.subpatternId;
1910
1911	unsigned numSubpatterns = lastSubpatternId - subpatternId + `1`;
1912	auto parenthesesDisjunction = std::make_unique<ByteDisjunction>(args&: numSubpatterns, args&: callFrameSize);
1913
1914	unsigned firstTermInParentheses = beginTerm + `1`;
1915	parenthesesDisjunction ->terms.reserveInitialCapacity(size: endTerm - firstTermInParentheses + `2`);
1916
1917	parenthesesDisjunction ->terms.append(other: ByteTerm::SubpatternBegin());
1918	for (unsigned termInParentheses = firstTermInParentheses; termInParentheses < endTerm; ++termInParentheses)
1919	parenthesesDisjunction ->terms.append(value: m_bodyDisjunction ->terms [termInParentheses]);
1920	parenthesesDisjunction ->terms.append(other: ByteTerm::SubpatternEnd());
1921
1922	m_bodyDisjunction ->terms.shrink(size: beginTerm);
1923
1924	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction.get(), capture, inputPosition));
1925	m_allParenthesesInfo.append(WTFMove(parenthesesDisjunction));
1926
1927	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1928	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1929	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1930	m_bodyDisjunction ->terms [beginTerm].frameLocation = frameLocation;
1931	}
1932
1933	void atomParenthesesOnceEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1934	{
1935	unsigned beginTerm = popParenthesesStack();
1936	closeAlternative(beginTerm: beginTerm + `1`);
1937	unsigned endTerm = m_bodyDisjunction ->terms.size();
1938
1939	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
1940
1941	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1942	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1943
1944	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, capture, false, inputPosition));
1945	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1946	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1947	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1948
1949	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1950	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1951	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1952	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1953	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1954	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1955	}
1956
1957	void atomParenthesesTerminalEnd(unsigned inputPosition, unsigned frameLocation, Checked<unsigned> quantityMinCount, Checked<unsigned> quantityMaxCount, QuantifierType quantityType)
1958	{
1959	unsigned beginTerm = popParenthesesStack();
1960	closeAlternative(beginTerm: beginTerm + `1`);
1961	unsigned endTerm = m_bodyDisjunction ->terms.size();
1962
1963	ASSERT(m_bodyDisjunction ->terms[beginTerm].type == ByteTerm::TypeParenthesesSubpatternTerminalBegin);
1964
1965	bool capture = m_bodyDisjunction ->terms [beginTerm].capture();
1966	unsigned subpatternId = m_bodyDisjunction ->terms [beginTerm].atom.subpatternId;
1967
1968	m_bodyDisjunction ->terms.append(other: ByteTerm (ByteTerm::TypeParenthesesSubpatternTerminalEnd, subpatternId, capture, false, inputPosition));
1969	m_bodyDisjunction ->terms [beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
1970	m_bodyDisjunction ->terms [endTerm].atom.parenthesesWidth = endTerm - beginTerm;
1971	m_bodyDisjunction ->terms [endTerm].frameLocation = frameLocation;
1972
1973	m_bodyDisjunction ->terms [beginTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1974	m_bodyDisjunction ->terms [beginTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1975	m_bodyDisjunction ->terms [beginTerm].atom.quantityType = quantityType;
1976	m_bodyDisjunction ->terms [endTerm].atom.quantityMinCount = quantityMinCount.unsafeGet();
1977	m_bodyDisjunction ->terms [endTerm].atom.quantityMaxCount = quantityMaxCount.unsafeGet();
1978	m_bodyDisjunction ->terms [endTerm].atom.quantityType = quantityType;
1979	}
1980
1981	void regexBegin(unsigned numSubpatterns, unsigned callFrameSize, bool onceThrough)
1982	{
1983	m_bodyDisjunction = std::make_unique<ByteDisjunction>(args&: numSubpatterns, args&: callFrameSize);
1984	m_bodyDisjunction ->terms.append(other: ByteTerm::BodyAlternativeBegin(onceThrough));
1985	m_bodyDisjunction ->terms [`0`].frameLocation = `0`;
1986	m_currentAlternativeIndex = `0`;
1987	}
1988
1989	void regexEnd()
1990	{
1991	closeBodyAlternative();
1992	}
1993
1994	void alternativeBodyDisjunction(bool onceThrough)
1995	{
1996	int newAlternativeIndex = m_bodyDisjunction ->terms.size();
1997	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
1998	m_bodyDisjunction ->terms.append(other: ByteTerm::BodyAlternativeDisjunction(onceThrough));
1999
2000	m_currentAlternativeIndex = newAlternativeIndex;
2001	}
2002
2003	void alternativeDisjunction()
2004	{
2005	int newAlternativeIndex = m_bodyDisjunction ->terms.size();
2006	m_bodyDisjunction ->terms [m_currentAlternativeIndex].alternative.next = newAlternativeIndex - m_currentAlternativeIndex;
2007	m_bodyDisjunction ->terms.append(other: ByteTerm::AlternativeDisjunction());
2008
2009	m_currentAlternativeIndex = newAlternativeIndex;
2010	}
2011
2012	void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = `0`, unsigned parenthesesInputCountAlreadyChecked = `0`)
2013	{
2014	for (unsigned alt = `0`; alt < disjunction->m_alternatives.size(); ++alt) {
2015	unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
2016
2017	PatternAlternative* alternative = disjunction->m_alternatives [alt].get();
2018
2019	if (alt) {
2020	if (disjunction == m_pattern.m_body)
2021	alternativeBodyDisjunction(onceThrough: alternative->onceThrough());
2022	else
2023	alternativeDisjunction();
2024	}
2025
2026	unsigned minimumSize = alternative->m_minimumSize;
2027	ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
2028	unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
2029
2030	if (countToCheck) {
2031	checkInput(count: countToCheck);
2032	currentCountAlreadyChecked += countToCheck;
2033	}
2034
2035	for (auto& term : alternative->m_terms) {
2036	switch (term.type) {
2037	case PatternTerm::TypeAssertionBOL:
2038	assertionBOL(inputPosition: currentCountAlreadyChecked - term.inputPosition);
2039	break;
2040
2041	case PatternTerm::TypeAssertionEOL:
2042	assertionEOL(inputPosition: currentCountAlreadyChecked - term.inputPosition);
2043	break;
2044
2045	case PatternTerm::TypeAssertionWordBoundary:
2046	assertionWordBoundary(invert: term.invert(), inputPosition: currentCountAlreadyChecked - term.inputPosition);
2047	break;
2048
2049	case PatternTerm::TypePatternCharacter:
2050	atomPatternCharacter(ch: term.patternCharacter, inputPosition: currentCountAlreadyChecked - term.inputPosition, frameLocation: term.frameLocation, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType);
2051	break;
2052
2053	case PatternTerm::TypeCharacterClass:
2054	atomCharacterClass(characterClass: term.characterClass, invert: term.invert(), inputPosition: currentCountAlreadyChecked- term.inputPosition, frameLocation: term.frameLocation, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType);
2055	break;
2056
2057	case PatternTerm::TypeBackReference:
2058	atomBackReference(subpatternId: term.backReferenceSubpatternId, inputPosition: currentCountAlreadyChecked - term.inputPosition, frameLocation: term.frameLocation, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType);
2059	break;
2060
2061	case PatternTerm::TypeForwardReference:
2062	break;
2063
2064	case PatternTerm::TypeParenthesesSubpattern: {
2065	unsigned disjunctionAlreadyCheckedCount = `0`;
2066	if (term.quantityMaxCount == `1` && !term.parentheses.isCopy) {
2067	unsigned alternativeFrameLocation = term.frameLocation;
2068	// For QuantifierFixedCount we pre-check the minimum size; for greedy/non-greedy we reserve a slot in the frame.
2069	if (term.quantityType == QuantifierFixedCount)
2070	disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
2071	else
2072	alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
2073	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2074	unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2075	atomParenthesesOnceBegin(subpatternId: term.parentheses.subpatternId, capture: term.capture(), inputPosition: disjunctionAlreadyCheckedCount + delegateEndInputOffset, frameLocation: term.frameLocation, alternativeFrameLocation);
2076	emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: disjunctionAlreadyCheckedCount);
2077	atomParenthesesOnceEnd(inputPosition: delegateEndInputOffset, frameLocation: term.frameLocation, quantityMinCount: term.quantityMinCount, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType);
2078	} else if (term.parentheses.isTerminal) {
2079	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2080	unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2081	atomParenthesesTerminalBegin(subpatternId: term.parentheses.subpatternId, capture: term.capture(), inputPosition: disjunctionAlreadyCheckedCount + delegateEndInputOffset, frameLocation: term.frameLocation, alternativeFrameLocation: term.frameLocation + YarrStackSpaceForBackTrackInfoParenthesesTerminal);
2082	emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: disjunctionAlreadyCheckedCount);
2083	atomParenthesesTerminalEnd(inputPosition: delegateEndInputOffset, frameLocation: term.frameLocation, quantityMinCount: term.quantityMinCount, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType);
2084	} else {
2085	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2086	unsigned delegateEndInputOffset = currentCountAlreadyChecked - term.inputPosition;
2087	atomParenthesesSubpatternBegin(subpatternId: term.parentheses.subpatternId, capture: term.capture(), inputPosition: disjunctionAlreadyCheckedCount + delegateEndInputOffset, frameLocation: term.frameLocation, alternativeFrameLocation: `0`);
2088	emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: `0`);
2089	atomParenthesesSubpatternEnd(lastSubpatternId: term.parentheses.lastSubpatternId, inputPosition: delegateEndInputOffset, frameLocation: term.frameLocation, quantityMinCount: term.quantityMinCount, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType, callFrameSize: term.parentheses.disjunction->m_callFrameSize);
2090	}
2091	break;
2092	}
2093
2094	case PatternTerm::TypeParentheticalAssertion: {
2095	unsigned alternativeFrameLocation = term.frameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
2096
2097	ASSERT(currentCountAlreadyChecked >= term.inputPosition);
2098	unsigned positiveInputOffset = currentCountAlreadyChecked - term.inputPosition;
2099	unsigned uncheckAmount = `0`;
2100	if (positiveInputOffset > term.parentheses.disjunction->m_minimumSize) {
2101	uncheckAmount = positiveInputOffset - term.parentheses.disjunction->m_minimumSize;
2102	uncheckInput(count: uncheckAmount);
2103	currentCountAlreadyChecked -= uncheckAmount;
2104	}
2105
2106	atomParentheticalAssertionBegin(subpatternId: term.parentheses.subpatternId, invert: term.invert(), frameLocation: term.frameLocation, alternativeFrameLocation);
2107	emitDisjunction(disjunction: term.parentheses.disjunction, inputCountAlreadyChecked: currentCountAlreadyChecked, parenthesesInputCountAlreadyChecked: positiveInputOffset - uncheckAmount);
2108	atomParentheticalAssertionEnd(inputPosition: `0`, frameLocation: term.frameLocation, quantityMaxCount: term.quantityMaxCount, quantityType: term.quantityType);
2109	if (uncheckAmount) {
2110	checkInput(count: uncheckAmount);
2111	currentCountAlreadyChecked += uncheckAmount;
2112	}
2113	break;
2114	}
2115
2116	case PatternTerm::TypeDotStarEnclosure:
2117	assertionDotStarEnclosure(bolAnchored: term.anchors.bolAnchor, eolAnchored: term.anchors.eolAnchor);
2118	break;
2119	}
2120	}
2121	}
2122	}
2123	#ifndef NDEBUG
2124	void dumpDisjunction(ByteDisjunction* disjunction, unsigned nesting = `0`)
2125	{
2126	PrintStream& out = WTF::dataFile();
2127
2128	unsigned termIndexNest = `0`;
2129
2130	if (!nesting) {
2131	out.printf(format: "ByteDisjunction(%p):\n", disjunction);
2132	nesting = `1`;
2133	} else {
2134	termIndexNest = nesting - `1`;
2135	nesting = `2`;
2136	}
2137
2138	auto outputTermIndexAndNest = [&](size_t index, unsigned termNesting) {
2139	for (unsigned nestingDepth = `0`; nestingDepth < termIndexNest; nestingDepth++)
2140	out.print(value: " ");
2141	#if defined(WIN32) && defined(__MINGW32__)
2142	# if __SIZEOF_POINTER__ == 8
2143	out.printf("%4I64u", index);
2144	# else
2145	out.printf("%4I32u", index);
2146	# endif
2147	#else
2148	out.printf(format: "%4zu", index);
2149	#endif
2150	for (unsigned nestingDepth = `0`; nestingDepth < termNesting; nestingDepth++)
2151	out.print(value: " ");
2152	};
2153
2154	auto dumpQuantity = [&](ByteTerm& term) {
2155	if (term.atom.quantityType == QuantifierFixedCount && term.atom.quantityMinCount == `1` && term.atom.quantityMaxCount == `1`)
2156	return;
2157
2158	out.print(value1: " {", value2: term.atom.quantityMinCount);
2159	if (term.atom.quantityMinCount != term.atom.quantityMaxCount) {
2160	if (term.atom.quantityMaxCount == UINT_MAX)
2161	out.print(value: ",inf");
2162	else
2163	out.print(value1: ",", value2: term.atom.quantityMaxCount);
2164	}
2165	out.print(value: "}");
2166	if (term.atom.quantityType == QuantifierGreedy)
2167	out.print(value: " greedy");
2168	else if (term.atom.quantityType == QuantifierNonGreedy)
2169	out.print(value: " non-greedy");
2170	};
2171
2172	auto dumpCaptured = [&](ByteTerm& term) {
2173	if (term.capture())
2174	out.print(value1: " captured (#", value2: term.atom.subpatternId, value3: ")");
2175	};
2176
2177	auto dumpInverted = [&](ByteTerm& term) {
2178	if (term.invert())
2179	out.print(value: " inverted");
2180	};
2181
2182	auto dumpInputPosition = [&](ByteTerm& term) {
2183	out.printf(format: " inputPosition %u", term.inputPosition);
2184	};
2185
2186	auto dumpFrameLocation = [&](ByteTerm& term) {
2187	out.printf(format: " frameLocation %u", term.frameLocation);
2188	};
2189
2190	auto dumpCharacter = [&](ByteTerm& term) {
2191	out.print(value: " ");
2192	dumpUChar32(out, term.atom.patternCharacter);
2193	};
2194
2195	auto dumpCharClass = [&](ByteTerm& term) {
2196	out.print(value: " ");
2197	dumpCharacterClass(out, &m_pattern, term.atom.characterClass);
2198	};
2199
2200	for (size_t idx = `0`; idx < disjunction->terms.size(); ++idx) {
2201	ByteTerm term = disjunction->terms [idx];
2202
2203	bool outputNewline = true;
2204
2205	switch (term.type) {
2206	case ByteTerm::TypeBodyAlternativeBegin:
2207	outputTermIndexAndNest(idx, nesting++);
2208	out.print(value: "BodyAlternativeBegin");
2209	if (term.alternative.onceThrough)
2210	out.print(value: " onceThrough");
2211	dumpFrameLocation(term);
2212	break;
2213	case ByteTerm::TypeBodyAlternativeDisjunction:
2214	outputTermIndexAndNest(idx, nesting - `1`);
2215	out.print(value: "BodyAlternativeDisjunction");
2216	dumpFrameLocation(term);
2217	break;
2218	case ByteTerm::TypeBodyAlternativeEnd:
2219	outputTermIndexAndNest(idx, --nesting);
2220	out.print(value: "BodyAlternativeEnd");
2221	dumpFrameLocation(term);
2222	break;
2223	case ByteTerm::TypeAlternativeBegin:
2224	outputTermIndexAndNest(idx, nesting++);
2225	out.print(value: "AlternativeBegin");
2226	dumpFrameLocation(term);
2227	break;
2228	case ByteTerm::TypeAlternativeDisjunction:
2229	outputTermIndexAndNest(idx, nesting - `1`);
2230	out.print(value: "AlternativeDisjunction");
2231	dumpFrameLocation(term);
2232	break;
2233	case ByteTerm::TypeAlternativeEnd:
2234	outputTermIndexAndNest(idx, --nesting);
2235	out.print(value: "AlternativeEnd");
2236	dumpFrameLocation(term);
2237	break;
2238	case ByteTerm::TypeSubpatternBegin:
2239	outputTermIndexAndNest(idx, nesting++);
2240	out.print(value: "SubpatternBegin");
2241	break;
2242	case ByteTerm::TypeSubpatternEnd:
2243	outputTermIndexAndNest(idx, --nesting);
2244	out.print(value: "SubpatternEnd");
2245	break;
2246	case ByteTerm::TypeAssertionBOL:
2247	outputTermIndexAndNest(idx, nesting);
2248	out.print(value: "AssertionBOL");
2249	break;
2250	case ByteTerm::TypeAssertionEOL:
2251	outputTermIndexAndNest(idx, nesting);
2252	out.print(value: "AssertionEOL");
2253	break;
2254	case ByteTerm::TypeAssertionWordBoundary:
2255	outputTermIndexAndNest(idx, nesting);
2256	out.print(value: "AssertionWordBoundary");
2257	break;
2258	case ByteTerm::TypePatternCharacterOnce:
2259	outputTermIndexAndNest(idx, nesting);
2260	out.print(value: "PatternCharacterOnce");
2261	dumpInverted(term);
2262	dumpInputPosition(term);
2263	dumpFrameLocation(term);
2264	dumpCharacter(term);
2265	dumpQuantity(term);
2266	break;
2267	case ByteTerm::TypePatternCharacterFixed:
2268	outputTermIndexAndNest(idx, nesting);
2269	out.print(value: "PatternCharacterFixed");
2270	dumpInverted(term);
2271	dumpInputPosition(term);
2272	dumpFrameLocation(term);
2273	dumpCharacter(term);
2274	out.print(value1: " {", value2: term.atom.quantityMinCount, value3: "}");
2275	break;
2276	case ByteTerm::TypePatternCharacterGreedy:
2277	outputTermIndexAndNest(idx, nesting);
2278	out.print(value: "PatternCharacterGreedy");
2279	dumpInverted(term);
2280	dumpInputPosition(term);
2281	dumpFrameLocation(term);
2282	dumpCharacter(term);
2283	dumpQuantity(term);
2284	break;
2285	case ByteTerm::TypePatternCharacterNonGreedy:
2286	outputTermIndexAndNest(idx, nesting);
2287	out.print(value: "PatternCharacterNonGreedy");
2288	dumpInverted(term);
2289	dumpInputPosition(term);
2290	dumpFrameLocation(term);
2291	dumpCharacter(term);
2292	dumpQuantity(term);
2293	break;
2294	case ByteTerm::TypePatternCasedCharacterOnce:
2295	outputTermIndexAndNest(idx, nesting);
2296	out.print(value: "PatternCasedCharacterOnce");
2297	break;
2298	case ByteTerm::TypePatternCasedCharacterFixed:
2299	outputTermIndexAndNest(idx, nesting);
2300	out.print(value: "PatternCasedCharacterFixed");
2301	break;
2302	case ByteTerm::TypePatternCasedCharacterGreedy:
2303	outputTermIndexAndNest(idx, nesting);
2304	out.print(value: "PatternCasedCharacterGreedy");
2305	break;
2306	case ByteTerm::TypePatternCasedCharacterNonGreedy:
2307	outputTermIndexAndNest(idx, nesting);
2308	out.print(value: "PatternCasedCharacterNonGreedy");
2309	break;
2310	case ByteTerm::TypeCharacterClass:
2311	outputTermIndexAndNest(idx, nesting);
2312	out.print(value: "CharacterClass");
2313	dumpInverted(term);
2314	dumpInputPosition(term);
2315	dumpFrameLocation(term);
2316	dumpCharClass(term);
2317	dumpQuantity(term);
2318	break;
2319	case ByteTerm::TypeBackReference:
2320	outputTermIndexAndNest(idx, nesting);
2321	out.print(value1: "BackReference #", value2: term.atom.subpatternId);
2322	dumpQuantity(term);
2323	break;
2324	case ByteTerm::TypeParenthesesSubpattern:
2325	outputTermIndexAndNest(idx, nesting);
2326	out.print(value: "ParenthesesSubpattern");
2327	dumpCaptured(term);
2328	dumpInverted(term);
2329	dumpInputPosition(term);
2330	dumpFrameLocation(term);
2331	dumpQuantity(term);
2332	out.print(value: "\n");
2333	outputNewline = false;
2334	dumpDisjunction(disjunction: term.atom.parenthesesDisjunction, nesting);
2335	break;
2336	case ByteTerm::TypeParenthesesSubpatternOnceBegin:
2337	outputTermIndexAndNest(idx, nesting++);
2338	out.print(value: "ParenthesesSubpatternOnceBegin");
2339	dumpCaptured(term);
2340	dumpInverted(term);
2341	dumpInputPosition(term);
2342	dumpFrameLocation(term);
2343	break;
2344	case ByteTerm::TypeParenthesesSubpatternOnceEnd:
2345	outputTermIndexAndNest(idx, --nesting);
2346	out.print(value: "ParenthesesSubpatternOnceEnd");
2347	dumpFrameLocation(term);
2348	break;
2349	case ByteTerm::TypeParenthesesSubpatternTerminalBegin:
2350	outputTermIndexAndNest(idx, nesting++);
2351	out.print(value: "ParenthesesSubpatternTerminalBegin");
2352	dumpInverted(term);
2353	dumpInputPosition(term);
2354	dumpFrameLocation(term);
2355	break;
2356	case ByteTerm::TypeParenthesesSubpatternTerminalEnd:
2357	outputTermIndexAndNest(idx, --nesting);
2358	out.print(value: "ParenthesesSubpatternTerminalEnd");
2359	dumpFrameLocation(term);
2360	break;
2361	case ByteTerm::TypeParentheticalAssertionBegin:
2362	outputTermIndexAndNest(idx, nesting++);
2363	out.print(value: "ParentheticalAssertionBegin");
2364	dumpInverted(term);
2365	dumpInputPosition(term);
2366	dumpFrameLocation(term);
2367	break;
2368	case ByteTerm::TypeParentheticalAssertionEnd:
2369	outputTermIndexAndNest(idx, --nesting);
2370	out.print(value: "ParentheticalAssertionEnd");
2371	dumpFrameLocation(term);
2372	break;
2373	case ByteTerm::TypeCheckInput:
2374	outputTermIndexAndNest(idx, nesting);
2375	out.print(value1: "CheckInput ", value2: term.checkInputCount);
2376	break;
2377	case ByteTerm::TypeUncheckInput:
2378	outputTermIndexAndNest(idx, nesting);
2379	out.print(value1: "UncheckInput ", value2: term.checkInputCount);
2380	break;
2381	case ByteTerm::TypeDotStarEnclosure:
2382	outputTermIndexAndNest(idx, nesting);
2383	out.print(value: "DotStarEnclosure");
2384	break;
2385	}
2386	if (outputNewline)
2387	out.print(value: "\n");
2388	}
2389	}
2390	#endif
2391
2392	private:
2393	YarrPattern& m_pattern;
2394	std::unique_ptr<ByteDisjunction> m_bodyDisjunction;
2395	unsigned m_currentAlternativeIndex;
2396	Vector<ParenthesesStackEntry> m_parenthesesStack;
2397	Vector<std::unique_ptr<ByteDisjunction>> m_allParenthesesInfo;
2398	};
2399
2400	std::unique_ptr<BytecodePattern> byteCompile(YarrPattern& pattern, BumpPointerAllocator* allocator, ConcurrentJSLock* lock)
2401	{
2402	return ByteCompiler (pattern).compile(allocator, lock);
2403	}
2404
2405	unsigned interpret(BytecodePattern* bytecode, const String& input, unsigned start, unsigned* output)
2406	{
2407	SuperSamplerScope superSamplerScope(false);
2408	if (input.is8Bit())
2409	return Interpreter<LChar>(bytecode, output, input.characters8(), input.size(), start).interpret();
2410	return Interpreter<UChar>(bytecode, output, input.characters16(), input.size(), start).interpret();
2411	}
2412
2413	unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
2414	{
2415	SuperSamplerScope superSamplerScope(false);
2416	return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
2417	}
2418
2419	unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
2420	{
2421	SuperSamplerScope superSamplerScope(false);
2422	return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
2423	}
2424
2425	// These should be the same for both UChar & LChar.
2426	COMPILE_ASSERT(sizeof(BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
2427	COMPILE_ASSERT(sizeof(BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
2428	COMPILE_ASSERT(sizeof(BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
2429	COMPILE_ASSERT(sizeof(BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
2430	COMPILE_ASSERT(sizeof(BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
2431	COMPILE_ASSERT(sizeof(BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
2432	COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) <= (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
2433
2434
2435	} }
2436

source code of qtdeclarative/src/3rdparty/masm/yarr/YarrInterpreter.cpp