RegexParser.h source code [qtscript/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h]

1	/*
2	* Copyright (C) 2009 Apple Inc. All rights reserved.
3	*
4	* Redistribution and use in source and binary forms, with or without
5	* modification, are permitted provided that the following conditions
6	* are met:
7	* 1. Redistributions of source code must retain the above copyright
8	* notice, this list of conditions and the following disclaimer.
9	* 2. Redistributions in binary form must reproduce the above copyright
10	* notice, this list of conditions and the following disclaimer in the
11	* documentation and/or other materials provided with the distribution.
12	*
13	* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14	* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16	* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17	* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18	* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20	* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21	* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24	*/
25
26	#ifndef RegexParser_h
27	#define RegexParser_h
28
29	#include <wtf/Platform.h>
30
31	#if ENABLE(YARR)
32
33	#include <UString.h>
34	#include <wtf/ASCIICType.h>
35	#include <wtf/unicode/Unicode.h>
36	#include <limits.h>
37
38	namespace JSC { namespace Yarr {
39
40	enum BuiltInCharacterClassID {
41	DigitClassID,
42	SpaceClassID,
43	WordClassID,
44	NewlineClassID,
45	};
46
47	// The Parser class should not be used directly - only via the Yarr::parse() method.
48	template<class Delegate>
49	class Parser {
50	private:
51	template<class FriendDelegate>
52	friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
53
54	enum ErrorCode {
55	NoError,
56	PatternTooLarge,
57	QuantifierOutOfOrder,
58	QuantifierWithoutAtom,
59	MissingParentheses,
60	ParenthesesUnmatched,
61	ParenthesesTypeInvalid,
62	CharacterClassUnmatched,
63	CharacterClassOutOfOrder,
64	EscapeUnterminated,
65	NumberOfErrorCodes
66	};
67
68	/*
69	* CharacterClassParserDelegate:
70	*
71	* The class CharacterClassParserDelegate is used in the parsing of character
72	* classes. This class handles detection of character ranges. This class
73	* implements enough of the delegate interface such that it can be passed to
74	* parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
75	* to perform the parsing of escape characters in character sets.
76	*/
77	class CharacterClassParserDelegate {
78	public:
79	CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
80	: m_delegate(delegate)
81	, m_err(err)
82	, m_state(empty)
83	{
84	}
85
86	/*
87	* begin():
88	*
89	* Called at beginning of construction.
90	*/
91	void begin(bool invert)
92	{
93	m_delegate.atomCharacterClassBegin(invert);
94	}
95
96	/*
97	* atomPatternCharacterUnescaped():
98	*
99	* This method is called directly from parseCharacterClass(), to report a new
100	* pattern character token. This method differs from atomPatternCharacter(),
101	* which will be called from parseEscape(), since a hypen provided via this
102	* method may be indicating a character range, but a hyphen parsed by
103	* parseEscape() cannot be interpreted as doing so.
104	*/
105	void atomPatternCharacterUnescaped(UChar ch)
106	{
107	switch (m_state) {
108	case empty:
109	m_character = ch;
110	m_state = cachedCharacter;
111	break;
112
113	case cachedCharacter:
114	if (ch == `'-'`)
115	m_state = cachedCharacterHyphen;
116	else {
117	m_delegate.atomCharacterClassAtom(m_character);
118	m_character = ch;
119	}
120	break;
121
122	case cachedCharacterHyphen:
123	if (ch >= m_character)
124	m_delegate.atomCharacterClassRange(m_character, ch);
125	else
126	m_err = CharacterClassOutOfOrder;
127	m_state = empty;
128	}
129	}
130
131	/*
132	* atomPatternCharacter():
133	*
134	* Adds a pattern character, called by parseEscape(), as such will not
135	* interpret a hyphen as indicating a character range.
136	*/
137	void atomPatternCharacter(UChar ch)
138	{
139	// Flush if a character is already pending to prevent the
140	// hyphen from begin interpreted as indicating a range.
141	if((ch == `'-'`) && (m_state == cachedCharacter))
142	flush();
143
144	atomPatternCharacterUnescaped(ch);
145	}
146
147	/*
148	* atomBuiltInCharacterClass():
149	*
150	* Adds a built-in character class, called by parseEscape().
151	*/
152	void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
153	{
154	flush();
155	m_delegate.atomCharacterClassBuiltIn(classID, invert);
156	}
157
158	/*
159	* end():
160	*
161	* Called at end of construction.
162	*/
163	void end()
164	{
165	flush();
166	m_delegate.atomCharacterClassEnd();
167	}
168
169	// parseEscape() should never call these delegate methods when
170	// invoked with inCharacterClass set.
171	void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
172	void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
173
174	private:
175	void flush()
176	{
177	if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
178	m_delegate.atomCharacterClassAtom(m_character);
179	if (m_state == cachedCharacterHyphen)
180	m_delegate.atomCharacterClassAtom(`'-'`);
181	m_state = empty;
182	}
183
184	Delegate& m_delegate;
185	ErrorCode& m_err;
186	enum CharacterClassConstructionState {
187	empty,
188	cachedCharacter,
189	cachedCharacterHyphen,
190	} m_state;
191	UChar m_character;
192	};
193
194	Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
195	: m_delegate(delegate)
196	, m_backReferenceLimit(backReferenceLimit)
197	, m_err(NoError)
198	, m_data(pattern.data())
199	, m_size(pattern.size())
200	, m_index(`0`)
201	, m_parenthesesNestingDepth(`0`)
202	{
203	}
204
205	/*
206	* parseEscape():
207	*
208	* Helper for parseTokens() AND parseCharacterClass().
209	* Unlike the other parser methods, this function does not report tokens
210	* directly to the member delegate (m_delegate), instead tokens are
211	* emitted to the delegate provided as an argument. In the case of atom
212	* escapes, parseTokens() will call parseEscape() passing m_delegate as
213	* an argument, and as such the escape will be reported to the delegate.
214	*
215	* However this method may also be used by parseCharacterClass(), in which
216	* case a CharacterClassParserDelegate will be passed as the delegate that
217	* tokens should be added to. A boolean flag is also provided to indicate
218	* whether that an escape in a CharacterClass is being parsed (some parsing
219	* rules change in this context).
220	*
221	* The boolean value returned by this method indicates whether the token
222	* parsed was an atom (outside of a characted class \b and \B will be
223	* interpreted as assertions).
224	*/
225	template<bool inCharacterClass, class EscapeDelegate>
226	bool parseEscape(EscapeDelegate& delegate)
227	{
228	ASSERT(!m_err);
229	ASSERT(peek() == `'\\'`);
230	consume();
231
232	if (atEndOfPattern()) {
233	m_err = EscapeUnterminated;
234	return false;
235	}
236
237	switch (peek()) {
238	// Assertions
239	case `'b'`:
240	consume();
241	if (inCharacterClass)
242	delegate.atomPatternCharacter(`'\b'`);
243	else {
244	delegate.assertionWordBoundary(false);
245	return false;
246	}
247	break;
248	case `'B'`:
249	consume();
250	if (inCharacterClass)
251	delegate.atomPatternCharacter(`'B'`);
252	else {
253	delegate.assertionWordBoundary(true);
254	return false;
255	}
256	break;
257
258	// CharacterClassEscape
259	case `'d'`:
260	consume();
261	delegate.atomBuiltInCharacterClass(DigitClassID, false);
262	break;
263	case `'s'`:
264	consume();
265	delegate.atomBuiltInCharacterClass(SpaceClassID, false);
266	break;
267	case `'w'`:
268	consume();
269	delegate.atomBuiltInCharacterClass(WordClassID, false);
270	break;
271	case `'D'`:
272	consume();
273	delegate.atomBuiltInCharacterClass(DigitClassID, true);
274	break;
275	case `'S'`:
276	consume();
277	delegate.atomBuiltInCharacterClass(SpaceClassID, true);
278	break;
279	case `'W'`:
280	consume();
281	delegate.atomBuiltInCharacterClass(WordClassID, true);
282	break;
283
284	// DecimalEscape
285	case `'1'`:
286	case `'2'`:
287	case `'3'`:
288	case `'4'`:
289	case `'5'`:
290	case `'6'`:
291	case `'7'`:
292	case `'8'`:
293	case `'9'`: {
294	// To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
295	// First, try to parse this as backreference.
296	if (!inCharacterClass) {
297	ParseState state = saveState();
298
299	unsigned backReference = consumeNumber();
300	if (backReference <= m_backReferenceLimit) {
301	delegate.atomBackReference(backReference);
302	break;
303	}
304
305	restoreState(state);
306	}
307
308	// Not a backreference, and not octal.
309	if (peek() >= `'8'`) {
310	delegate.atomPatternCharacter(`'\\'`);
311	break;
312	}
313
314	// Fall-through to handle this as an octal escape.
315	}
316
317	// Octal escape
318	case `'0'`:
319	delegate.atomPatternCharacter(consumeOctal());
320	break;
321
322	// ControlEscape
323	case `'f'`:
324	consume();
325	delegate.atomPatternCharacter(`'\f'`);
326	break;
327	case `'n'`:
328	consume();
329	delegate.atomPatternCharacter(`'\n'`);
330	break;
331	case `'r'`:
332	consume();
333	delegate.atomPatternCharacter(`'\r'`);
334	break;
335	case `'t'`:
336	consume();
337	delegate.atomPatternCharacter(`'\t'`);
338	break;
339	case `'v'`:
340	consume();
341	delegate.atomPatternCharacter(`'\v'`);
342	break;
343
344	// ControlLetter
345	case `'c'`: {
346	ParseState state = saveState();
347	consume();
348	if (!atEndOfPattern()) {
349	int control = consume();
350
351	// To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
352	if (inCharacterClass ? WTF::isASCIIAlphanumeric(c: control) \|\| (control == `'_'`) : WTF::isASCIIAlpha(c: control)) {
353	delegate.atomPatternCharacter(control & `0x1f`);
354	break;
355	}
356	}
357	restoreState(state);
358	delegate.atomPatternCharacter(`'\\'`);
359	break;
360	}
361
362	// HexEscape
363	case `'x'`: {
364	consume();
365	int x = tryConsumeHex(count: `2`);
366	if (x == -`1`)
367	delegate.atomPatternCharacter(`'x'`);
368	else
369	delegate.atomPatternCharacter(x);
370	break;
371	}
372
373	// UnicodeEscape
374	case `'u'`: {
375	consume();
376	int u = tryConsumeHex(count: `4`);
377	if (u == -`1`)
378	delegate.atomPatternCharacter(`'u'`);
379	else
380	delegate.atomPatternCharacter(u);
381	break;
382	}
383
384	// IdentityEscape
385	default:
386	delegate.atomPatternCharacter(consume());
387	}
388
389	return true;
390	}
391
392	/*
393	* parseAtomEscape(), parseCharacterClassEscape():
394	*
395	* These methods alias to parseEscape().
396	*/
397	bool parseAtomEscape()
398	{
399	return parseEscape<false>(m_delegate);
400	}
401	void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
402	{
403	parseEscape<true>(delegate);
404	}
405
406	/*
407	* parseCharacterClass():
408	*
409	* Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
410	* to an instance of CharacterClassParserDelegate, to describe the character class to the
411	* delegate.
412	*/
413	void parseCharacterClass()
414	{
415	ASSERT(!m_err);
416	ASSERT(peek() == `'['`);
417	consume();
418
419	CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
420
421	characterClassConstructor.begin(tryConsume(ch: `'^'`));
422
423	while (!atEndOfPattern()) {
424	switch (peek()) {
425	case `']'`:
426	consume();
427	characterClassConstructor.end();
428	return;
429
430	case `'\\'`:
431	parseCharacterClassEscape(delegate&: characterClassConstructor);
432	break;
433
434	default:
435	characterClassConstructor.atomPatternCharacterUnescaped(consume());
436	}
437
438	if (m_err)
439	return;
440	}
441
442	m_err = CharacterClassUnmatched;
443	}
444
445	/*
446	* parseParenthesesBegin():
447	*
448	* Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
449	*/
450	void parseParenthesesBegin()
451	{
452	ASSERT(!m_err);
453	ASSERT(peek() == `'('`);
454	consume();
455
456	if (tryConsume(ch: `'?'`)) {
457	if (atEndOfPattern()) {
458	m_err = ParenthesesTypeInvalid;
459	return;
460	}
461
462	switch (consume()) {
463	case `':'`:
464	m_delegate.atomParenthesesSubpatternBegin(false);
465	break;
466
467	case `'='`:
468	m_delegate.atomParentheticalAssertionBegin();
469	break;
470
471	case `'!'`:
472	m_delegate.atomParentheticalAssertionBegin(true);
473	break;
474
475	default:
476	m_err = ParenthesesTypeInvalid;
477	}
478	} else
479	m_delegate.atomParenthesesSubpatternBegin();
480
481	++m_parenthesesNestingDepth;
482	}
483
484	/*
485	* parseParenthesesEnd():
486	*
487	* Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
488	*/
489	void parseParenthesesEnd()
490	{
491	ASSERT(!m_err);
492	ASSERT(peek() == `')'`);
493	consume();
494
495	if (m_parenthesesNestingDepth > `0`)
496	m_delegate.atomParenthesesEnd();
497	else
498	m_err = ParenthesesUnmatched;
499
500	--m_parenthesesNestingDepth;
501	}
502
503	/*
504	* parseQuantifier():
505	*
506	* Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
507	*/
508	void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
509	{
510	ASSERT(!m_err);
511	ASSERT(min <= max);
512
513	if (lastTokenWasAnAtom)
514	m_delegate.quantifyAtom(min, max, !tryConsume(ch: `'?'`));
515	else
516	m_err = QuantifierWithoutAtom;
517	}
518
519	/*
520	* parseTokens():
521	*
522	* This method loops over the input pattern reporting tokens to the delegate.
523	* The method returns when a parse error is detected, or the end of the pattern
524	* is reached. One piece of state is tracked around the loop, which is whether
525	* the last token passed to the delegate was an atom (this is necessary to detect
526	* a parse error when a quantifier provided without an atom to quantify).
527	*/
528	void parseTokens()
529	{
530	bool lastTokenWasAnAtom = false;
531
532	while (!atEndOfPattern()) {
533	switch (peek()) {
534	case `'\|'`:
535	consume();
536	m_delegate.disjunction();
537	lastTokenWasAnAtom = false;
538	break;
539
540	case `'('`:
541	parseParenthesesBegin();
542	lastTokenWasAnAtom = false;
543	break;
544
545	case `')'`:
546	parseParenthesesEnd();
547	lastTokenWasAnAtom = true;
548	break;
549
550	case `'^'`:
551	consume();
552	m_delegate.assertionBOL();
553	lastTokenWasAnAtom = false;
554	break;
555
556	case `'$'`:
557	consume();
558	m_delegate.assertionEOL();
559	lastTokenWasAnAtom = false;
560	break;
561
562	case `'.'`:
563	consume();
564	m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
565	lastTokenWasAnAtom = true;
566	break;
567
568	case `'['`:
569	parseCharacterClass();
570	lastTokenWasAnAtom = true;
571	break;
572
573	case `'\\'`:
574	lastTokenWasAnAtom = parseAtomEscape();
575	break;
576
577	case `'*'`:
578	consume();
579	parseQuantifier(lastTokenWasAnAtom, min: `0`, UINT_MAX);
580	lastTokenWasAnAtom = false;
581	break;
582
583	case `'+'`:
584	consume();
585	parseQuantifier(lastTokenWasAnAtom, min: `1`, UINT_MAX);
586	lastTokenWasAnAtom = false;
587	break;
588
589	case `'?'`:
590	consume();
591	parseQuantifier(lastTokenWasAnAtom, min: `0`, max: `1`);
592	lastTokenWasAnAtom = false;
593	break;
594
595	case `'{'`: {
596	ParseState state = saveState();
597
598	consume();
599	if (peekIsDigit()) {
600	unsigned min = consumeNumber();
601	unsigned max = min;
602
603	if (tryConsume(ch: `','`))
604	max = peekIsDigit() ? consumeNumber() : UINT_MAX;
605
606	if (tryConsume(ch: `'}'`)) {
607	if (min <= max)
608	parseQuantifier(lastTokenWasAnAtom, min, max);
609	else
610	m_err = QuantifierOutOfOrder;
611	lastTokenWasAnAtom = false;
612	break;
613	}
614	}
615
616	restoreState(state);
617	} // if we did not find a complete quantifer, fall through to the default case.
618
619	default:
620	m_delegate.atomPatternCharacter(consume());
621	lastTokenWasAnAtom = true;
622	}
623
624	if (m_err)
625	return;
626	}
627
628	if (m_parenthesesNestingDepth > `0`)
629	m_err = MissingParentheses;
630	}
631
632	/*
633	* parse():
634	*
635	* This method calls regexBegin(), calls parseTokens() to parse over the input
636	* patterns, calls regexEnd() or regexError() as appropriate, and converts any
637	* error code to a const char* for a result.
638	*/
639	const char* parse()
640	{
641	m_delegate.regexBegin();
642
643	if (m_size > MAX_PATTERN_SIZE)
644	m_err = PatternTooLarge;
645	else
646	parseTokens();
647	ASSERT(atEndOfPattern() \|\| m_err);
648
649	if (m_err)
650	m_delegate.regexError();
651	else
652	m_delegate.regexEnd();
653
654	// The order of this array must match the ErrorCode enum.
655	static const char* errorMessages[NumberOfErrorCodes] = {
656	`0`, // NoError
657	"regular expression too large",
658	"numbers out of order in {} quantifier",
659	"nothing to repeat",
660	"missing )",
661	"unmatched parentheses",
662	"unrecognized character after (?",
663	"missing terminating ] for character class",
664	"range out of order in character class",
665	"\\ at end of pattern"
666	};
667
668	return errorMessages[m_err];
669	}
670
671
672	// Misc helper functions:
673
674	typedef unsigned ParseState;
675
676	ParseState saveState()
677	{
678	return m_index;
679	}
680
681	void restoreState(ParseState state)
682	{
683	m_index = state;
684	}
685
686	bool atEndOfPattern()
687	{
688	ASSERT(m_index <= m_size);
689	return m_index == m_size;
690	}
691
692	int peek()
693	{
694	ASSERT(m_index < m_size);
695	return m_data[m_index];
696	}
697
698	bool peekIsDigit()
699	{
700	return !atEndOfPattern() && WTF::isASCIIDigit(peek());
701	}
702
703	unsigned peekDigit()
704	{
705	ASSERT(peekIsDigit());
706	return peek() - `'0'`;
707	}
708
709	int consume()
710	{
711	ASSERT(m_index < m_size);
712	return m_data[m_index++];
713	}
714
715	unsigned consumeDigit()
716	{
717	ASSERT(peekIsDigit());
718	return consume() - `'0'`;
719	}
720
721	unsigned consumeNumber()
722	{
723	unsigned n = consumeDigit();
724	// check for overflow.
725	for (unsigned newValue; peekIsDigit() && ((newValue = n * `10` + peekDigit()) >= n); ) {
726	n = newValue;
727	consume();
728	}
729	return n;
730	}
731
732	unsigned consumeOctal()
733	{
734	ASSERT(WTF::isASCIIOctalDigit(peek()));
735
736	unsigned n = consumeDigit();
737	while (n < `32` && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
738	n = n * `8` + consumeDigit();
739	return n;
740	}
741
742	bool tryConsume(UChar ch)
743	{
744	if (atEndOfPattern() \|\| (m_data[m_index] != ch))
745	return false;
746	++m_index;
747	return true;
748	}
749
750	int tryConsumeHex(int count)
751	{
752	ParseState state = saveState();
753
754	int n = `0`;
755	while (count--) {
756	if (atEndOfPattern() \|\| !WTF::isASCIIHexDigit(peek())) {
757	restoreState(state);
758	return -`1`;
759	}
760	n = (n << `4`) \| WTF::toASCIIHexValue(consume());
761	}
762	return n;
763	}
764
765	Delegate& m_delegate;
766	unsigned m_backReferenceLimit;
767	ErrorCode m_err;
768	const UChar* m_data;
769	unsigned m_size;
770	unsigned m_index;
771	unsigned m_parenthesesNestingDepth;
772
773	// Derived by empirical testing of compile time in PCRE and WREC.
774	static const unsigned MAX_PATTERN_SIZE = `1024` * `1024`;
775	};
776
777	/*
778	* Yarr::parse():
779	*
780	* The parse method is passed a pattern to be parsed and a delegate upon which
781	* callbacks will be made to record the parsed tokens forming the regex.
782	* Yarr::parse() returns null on success, or a const C string providing an error
783	* message where a parse error occurs.
784	*
785	* The Delegate must implement the following interface:
786	*
787	* void assertionBOL();
788	* void assertionEOL();
789	* void assertionWordBoundary(bool invert);
790	*
791	* void atomPatternCharacter(UChar ch);
792	* void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
793	* void atomCharacterClassBegin(bool invert)
794	* void atomCharacterClassAtom(UChar ch)
795	* void atomCharacterClassRange(UChar begin, UChar end)
796	* void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
797	* void atomCharacterClassEnd()
798	* void atomParenthesesSubpatternBegin(bool capture = true);
799	* void atomParentheticalAssertionBegin(bool invert = false);
800	* void atomParenthesesEnd();
801	* void atomBackReference(unsigned subpatternId);
802	*
803	* void quantifyAtom(unsigned min, unsigned max, bool greedy);
804	*
805	* void disjunction();
806	*
807	* void regexBegin();
808	* void regexEnd();
809	* void regexError();
810	*
811	* Before any call recording tokens are made, regexBegin() will be called on the
812	* delegate once. Once parsing is complete either regexEnd() or regexError() will
813	* be called, as appropriate.
814	*
815	* The regular expression is described by a sequence of assertion() and atom()
816	* callbacks to the delegate, describing the terms in the regular expression.
817	* Following an atom a quantifyAtom() call may occur to indicate that the previous
818	* atom should be quantified. In the case of atoms described across multiple
819	* calls (parentheses and character classes) the call to quantifyAtom() will come
820	* after the call to the atomEnd() method, never after atomBegin().
821	*
822	* Character classes may either be described by a single call to
823	* atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
824	* In the latter case, ...Begin() will be called, followed by a sequence of
825	* calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
826	*
827	* Sequences of atoms and assertions are broken into alternatives via calls to
828	* disjunction(). Assertions, atoms, and disjunctions emitted between calls to
829	* atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
830	* atomParenthesesBegin() is passed a subpatternId. In the case of a regular
831	* capturing subpattern, this will be the subpatternId associated with these
832	* parentheses, and will also by definition be the lowest subpatternId of these
833	* parentheses and of any nested paretheses. The atomParenthesesEnd() method
834	* is passed the subpatternId of the last capturing subexpression nested within
835	* these paretheses. In the case of a capturing subpattern with no nested
836	* capturing subpatterns, the same subpatternId will be passed to the begin and
837	* end functions. In the case of non-capturing subpatterns the subpatternId
838	* passed to the begin method is also the first possible subpatternId that might
839	* be nested within these paretheses. If a set of non-capturing parentheses does
840	* not contain any capturing subpatterns, then the subpatternId passed to begin
841	* will be greater than the subpatternId passed to end.
842	*/
843
844	template<class Delegate>
845	const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
846	{
847	return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
848	}
849
850	} } // namespace JSC::Yarr
851
852	#endif
853
854	#endif // RegexParser_h
855

source code of qtscript/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h