mod.rs - Codebrowser

1	/!*
2	Defines an abstract syntax for regular expressions.
3	*/
4
5	use std::cmp::Ordering;
6	use std::error;
7	use std::fmt;
8
9	pub use crate::ast::visitor::{visit, Visitor};
10
11	pub mod parse;
12	pub mod print;
13	mod visitor;
14
15	/// An error that occurred while parsing a regular expression into an abstract
16	/// syntax tree.
17	///
18	/// Note that not all ASTs represents a valid regular expression. For example,
19	/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
20	/// valid Unicode property name. That particular error is reported when
21	/// translating an AST to the high-level intermediate representation (`HIR`).
22	#[derive(Clone, Debug, Eq, PartialEq)]
23	pub struct Error {
24	/// The kind of error.
25	kind: ErrorKind,
26	/// The original pattern that the parser generated the error from. Every
27	/// span in an error is a valid range into this string.
28	pattern: String,
29	/// The span of this error.
30	span: Span,
31	}
32
33	impl Error {
34	/// Return the type of this error.
35	pub fn kind(&self) -> &ErrorKind {
36	&self.kind
37	}
38
39	/// The original pattern string in which this error occurred.
40	///
41	/// Every span reported by this error is reported in terms of this string.
42	pub fn pattern(&self) -> &str {
43	&self.pattern
44	}
45
46	/// Return the span at which this error occurred.
47	pub fn span(&self) -> &Span {
48	&self.span
49	}
50
51	/// Return an auxiliary span. This span exists only for some errors that
52	/// benefit from being able to point to two locations in the original
53	/// regular expression. For example, "duplicate" errors will have the
54	/// main error position set to the duplicate occurrence while its
55	/// auxiliary span will be set to the initial occurrence.
56	pub fn auxiliary_span(&self) -> Option<&Span> {
57	use self::ErrorKind::*;
58	match self.kind {
59	FlagDuplicate { ref original } => Some(original),
60	FlagRepeatedNegation { ref original, .. } => Some(original),
61	GroupNameDuplicate { ref original, .. } => Some(original),
62	_ => None,
63	}
64	}
65	}
66
67	/// The type of an error that occurred while building an AST.
68	#[derive(Clone, Debug, Eq, PartialEq)]
69	pub enum ErrorKind {
70	/// The capturing group limit was exceeded.
71	///
72	/// Note that this represents a limit on the total number of capturing
73	/// groups in a regex and not necessarily the number of nested capturing
74	/// groups. That is, the nest limit can be low and it is still possible for
75	/// this error to occur.
76	CaptureLimitExceeded,
77	/// An invalid escape sequence was found in a character class set.
78	ClassEscapeInvalid,
79	/// An invalid character class range was found. An invalid range is any
80	/// range where the start is greater than the end.
81	ClassRangeInvalid,
82	/// An invalid range boundary was found in a character class. Range
83	/// boundaries must be a single literal codepoint, but this error indicates
84	/// that something else was found, such as a nested class.
85	ClassRangeLiteral,
86	/// An opening `[` was found with no corresponding closing `]`.
87	ClassUnclosed,
88	/// Note that this error variant is no longer used. Namely, a decimal
89	/// number can only appear as a repetition quantifier. When the number
90	/// in a repetition quantifier is empty, then it gets its own specialized
91	/// error, `RepetitionCountDecimalEmpty`.
92	DecimalEmpty,
93	/// An invalid decimal number was given where one was expected.
94	DecimalInvalid,
95	/// A bracketed hex literal was empty.
96	EscapeHexEmpty,
97	/// A bracketed hex literal did not correspond to a Unicode scalar value.
98	EscapeHexInvalid,
99	/// An invalid hexadecimal digit was found.
100	EscapeHexInvalidDigit,
101	/// EOF was found before an escape sequence was completed.
102	EscapeUnexpectedEof,
103	/// An unrecognized escape sequence.
104	EscapeUnrecognized,
105	/// A dangling negation was used when setting flags, e.g., `i-`.
106	FlagDanglingNegation,
107	/// A flag was used twice, e.g., `i-i`.
108	FlagDuplicate {
109	/// The position of the original flag. The error position
110	/// points to the duplicate flag.
111	original: Span,
112	},
113	/// The negation operator was used twice, e.g., `-i-s`.
114	FlagRepeatedNegation {
115	/// The position of the original negation operator. The error position
116	/// points to the duplicate negation operator.
117	original: Span,
118	},
119	/// Expected a flag but got EOF, e.g., `(?`.
120	FlagUnexpectedEof,
121	/// Unrecognized flag, e.g., `a`.
122	FlagUnrecognized,
123	/// A duplicate capture name was found.
124	GroupNameDuplicate {
125	/// The position of the initial occurrence of the capture name. The
126	/// error position itself points to the duplicate occurrence.
127	original: Span,
128	},
129	/// A capture group name is empty, e.g., `(?P<>abc)`.
130	GroupNameEmpty,
131	/// An invalid character was seen for a capture group name. This includes
132	/// errors where the first character is a digit (even though subsequent
133	/// characters are allowed to be digits).
134	GroupNameInvalid,
135	/// A closing `>` could not be found for a capture group name.
136	GroupNameUnexpectedEof,
137	/// An unclosed group, e.g., `(ab`.
138	///
139	/// The span of this error corresponds to the unclosed parenthesis.
140	GroupUnclosed,
141	/// An unopened group, e.g., `ab)`.
142	GroupUnopened,
143	/// The nest limit was exceeded. The limit stored here is the limit
144	/// configured in the parser.
145	NestLimitExceeded(u32),
146	/// The range provided in a counted repetition operator is invalid. The
147	/// range is invalid if the start is greater than the end.
148	RepetitionCountInvalid,
149	/// An opening `{` was not followed by a valid decimal value.
150	/// For example, `x{}` or `x{]}` would fail.
151	RepetitionCountDecimalEmpty,
152	/// An opening `{` was found with no corresponding closing `}`.
153	RepetitionCountUnclosed,
154	/// A repetition operator was applied to a missing sub-expression. This
155	/// occurs, for example, in the regex consisting of just a `` or even*
156	/// `(?i)`. It is, however, possible to create a repetition operating on*
157	/// an empty sub-expression. For example, `()` is still considered valid.*
158	RepetitionMissing,
159	/// The Unicode class is not valid. This typically occurs when a `\p` is
160	/// followed by something other than a `{`.
161	UnicodeClassInvalid,
162	/// When octal support is disabled, this error is produced when an octal
163	/// escape is used. The octal escape is assumed to be an invocation of
164	/// a backreference, which is the common case.
165	UnsupportedBackreference,
166	/// When syntax similar to PCRE's look-around is used, this error is
167	/// returned. Some example syntaxes that are rejected include, but are
168	/// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
169	/// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
170	/// error is used to improve the user experience.
171	UnsupportedLookAround,
172	/// Hints that destructuring should not be exhaustive.
173	///
174	/// This enum may grow additional variants, so this makes sure clients
175	/// don't count on exhaustive matching. (Otherwise, adding a new variant
176	/// could break existing code.)
177	#[doc(hidden)]
178	__Nonexhaustive,
179	}
180
181	impl error::Error for Error {
182	// TODO: Remove this method entirely on the next breaking semver release.
183	#[allow(deprecated)]
184	fn description(&self) -> &str {
185	use self::ErrorKind::*;
186	match self.kind {
187	CaptureLimitExceeded => "capture group limit exceeded",
188	ClassEscapeInvalid => "invalid escape sequence in character class",
189	ClassRangeInvalid => "invalid character class range",
190	ClassRangeLiteral => "invalid range boundary, must be a literal",
191	ClassUnclosed => "unclosed character class",
192	DecimalEmpty => "empty decimal literal",
193	DecimalInvalid => "invalid decimal literal",
194	EscapeHexEmpty => "empty hexadecimal literal",
195	EscapeHexInvalid => "invalid hexadecimal literal",
196	EscapeHexInvalidDigit => "invalid hexadecimal digit",
197	EscapeUnexpectedEof => "unexpected eof (escape sequence)",
198	EscapeUnrecognized => "unrecognized escape sequence",
199	FlagDanglingNegation => "dangling flag negation operator",
200	FlagDuplicate { .. } => "duplicate flag",
201	FlagRepeatedNegation { .. } => "repeated negation",
202	FlagUnexpectedEof => "unexpected eof (flag)",
203	FlagUnrecognized => "unrecognized flag",
204	GroupNameDuplicate { .. } => "duplicate capture group name",
205	GroupNameEmpty => "empty capture group name",
206	GroupNameInvalid => "invalid capture group name",
207	GroupNameUnexpectedEof => "unclosed capture group name",
208	GroupUnclosed => "unclosed group",
209	GroupUnopened => "unopened group",
210	NestLimitExceeded(_) => "nest limit exceeded",
211	RepetitionCountInvalid => "invalid repetition count range",
212	RepetitionCountUnclosed => "unclosed counted repetition",
213	RepetitionMissing => "repetition operator missing expression",
214	UnicodeClassInvalid => "invalid Unicode character class",
215	UnsupportedBackreference => "backreferences are not supported",
216	UnsupportedLookAround => "look-around is not supported",
217	_ => unreachable!(),
218	}
219	}
220	}
221
222	impl fmt::Display for Error {
223	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
224	crate::error::Formatter::from(self).fmt(f)
225	}
226	}
227
228	impl fmt::Display for ErrorKind {
229	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230	use self::ErrorKind::*;
231	match *self {
232	CaptureLimitExceeded => write!(
233	f,
234	"exceeded the maximum number of \
235	capturing groups ({})",
236	::std::u32::MAX
237	),
238	ClassEscapeInvalid => {
239	write!(f, "invalid escape sequence found in character class")
240	}
241	ClassRangeInvalid => write!(
242	f,
243	"invalid character class range, \
244	the start must be <= the end"
245	),
246	ClassRangeLiteral => {
247	write!(f, "invalid range boundary, must be a literal")
248	}
249	ClassUnclosed => write!(f, "unclosed character class"),
250	DecimalEmpty => write!(f, "decimal literal empty"),
251	DecimalInvalid => write!(f, "decimal literal invalid"),
252	EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
253	EscapeHexInvalid => {
254	write!(f, "hexadecimal literal is not a Unicode scalar value")
255	}
256	EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
257	EscapeUnexpectedEof => write!(
258	f,
259	"incomplete escape sequence, \
260	reached end of pattern prematurely"
261	),
262	EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
263	FlagDanglingNegation => {
264	write!(f, "dangling flag negation operator")
265	}
266	FlagDuplicate { .. } => write!(f, "duplicate flag"),
267	FlagRepeatedNegation { .. } => {
268	write!(f, "flag negation operator repeated")
269	}
270	FlagUnexpectedEof => {
271	write!(f, "expected flag but got end of regex")
272	}
273	FlagUnrecognized => write!(f, "unrecognized flag"),
274	GroupNameDuplicate { .. } => {
275	write!(f, "duplicate capture group name")
276	}
277	GroupNameEmpty => write!(f, "empty capture group name"),
278	GroupNameInvalid => write!(f, "invalid capture group character"),
279	GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
280	GroupUnclosed => write!(f, "unclosed group"),
281	GroupUnopened => write!(f, "unopened group"),
282	NestLimitExceeded(limit) => write!(
283	f,
284	"exceed the maximum number of \
285	nested parentheses/brackets ({})",
286	limit
287	),
288	RepetitionCountInvalid => write!(
289	f,
290	"invalid repetition count range, \
291	the start must be <= the end"
292	),
293	RepetitionCountDecimalEmpty => {
294	write!(f, "repetition quantifier expects a valid decimal")
295	}
296	RepetitionCountUnclosed => {
297	write!(f, "unclosed counted repetition")
298	}
299	RepetitionMissing => {
300	write!(f, "repetition operator missing expression")
301	}
302	UnicodeClassInvalid => {
303	write!(f, "invalid Unicode character class")
304	}
305	UnsupportedBackreference => {
306	write!(f, "backreferences are not supported")
307	}
308	UnsupportedLookAround => write!(
309	f,
310	"look-around, including look-ahead and look-behind, \
311	is not supported"
312	),
313	_ => unreachable!(),
314	}
315	}
316	}
317
318	/// Span represents the position information of a single AST item.
319	///
320	/// All span positions are absolute byte offsets that can be used on the
321	/// original regular expression that was parsed.
322	#[derive(Clone, Copy, Eq, PartialEq)]
323	pub struct Span {
324	/// The start byte offset.
325	pub start: Position,
326	/// The end byte offset.
327	pub end: Position,
328	}
329
330	impl fmt::Debug for Span {
331	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
332	write!(f, "Span({:?}, {:?})", self.start, self.end)
333	}
334	}
335
336	impl Ord for Span {
337	fn cmp(&self, other: &Span) -> Ordering {
338	(&self.start, &self.end).cmp(&(&other.start, &other.end))
339	}
340	}
341
342	impl PartialOrd for Span {
343	fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
344	Some(self.cmp(other))
345	}
346	}
347
348	/// A single position in a regular expression.
349	///
350	/// A position encodes one half of a span, and include the byte offset, line
351	/// number and column number.
352	#[derive(Clone, Copy, Eq, PartialEq)]
353	pub struct Position {
354	/// The absolute offset of this position, starting at `0` from the
355	/// beginning of the regular expression pattern string.
356	pub offset: usize,
357	/// The line number, starting at `1`.
358	pub line: usize,
359	/// The approximate column number, starting at `1`.
360	pub column: usize,
361	}
362
363	impl fmt::Debug for Position {
364	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
365	write!(
366	f,
367	"Position(o: {:?}, l: {:?}, c: {:?})",
368	self.offset, self.line, self.column
369	)
370	}
371	}
372
373	impl Ord for Position {
374	fn cmp(&self, other: &Position) -> Ordering {
375	self.offset.cmp(&other.offset)
376	}
377	}
378
379	impl PartialOrd for Position {
380	fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
381	Some(self.cmp(other))
382	}
383	}
384
385	impl Span {
386	/// Create a new span with the given positions.
387	pub fn new(start: Position, end: Position) -> Span {
388	Span { start, end }
389	}
390
391	/// Create a new span using the given position as the start and end.
392	pub fn splat(pos: Position) -> Span {
393	Span::new(pos, pos)
394	}
395
396	/// Create a new span by replacing the starting the position with the one
397	/// given.
398	pub fn with_start(self, pos: Position) -> Span {
399	Span { start: pos, ..self }
400	}
401
402	/// Create a new span by replacing the ending the position with the one
403	/// given.
404	pub fn with_end(self, pos: Position) -> Span {
405	Span { end: pos, ..self }
406	}
407
408	/// Returns true if and only if this span occurs on a single line.
409	pub fn is_one_line(&self) -> bool {
410	self.start.line == self.end.line
411	}
412
413	/// Returns true if and only if this span is empty. That is, it points to
414	/// a single position in the concrete syntax of a regular expression.
415	pub fn is_empty(&self) -> bool {
416	self.start.offset == self.end.offset
417	}
418	}
419
420	impl Position {
421	/// Create a new position with the given information.
422	///
423	/// `offset` is the absolute offset of the position, starting at `0` from
424	/// the beginning of the regular expression pattern string.
425	///
426	/// `line` is the line number, starting at `1`.
427	///
428	/// `column` is the approximate column number, starting at `1`.
429	pub fn new(offset: usize, line: usize, column: usize) -> Position {
430	Position { offset, line, column }
431	}
432	}
433
434	/// An abstract syntax tree for a singular expression along with comments
435	/// found.
436	///
437	/// Comments are not stored in the tree itself to avoid complexity. Each
438	/// comment contains a span of precisely where it occurred in the original
439	/// regular expression.
440	#[derive(Clone, Debug, Eq, PartialEq)]
441	pub struct WithComments {
442	/// The actual ast.
443	pub ast: Ast,
444	/// All comments found in the original regular expression.
445	pub comments: Vec<Comment>,
446	}
447
448	/// A comment from a regular expression with an associated span.
449	///
450	/// A regular expression can only contain comments when the `x` flag is
451	/// enabled.
452	#[derive(Clone, Debug, Eq, PartialEq)]
453	pub struct Comment {
454	/// The span of this comment, including the beginning `#` and ending `\n`.
455	pub span: Span,
456	/// The comment text, starting with the first character following the `#`
457	/// and ending with the last character preceding the `\n`.
458	pub comment: String,
459	}
460
461	/// An abstract syntax tree for a single regular expression.
462	///
463	/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
464	/// space proportional to the size of the `Ast`.
465	///
466	/// This type defines its own destructor that uses constant stack space and
467	/// heap space proportional to the size of the `Ast`.
468	#[derive(Clone, Debug, Eq, PartialEq)]
469	pub enum Ast {
470	/// An empty regex that matches everything.
471	Empty(Span),
472	/// A set of flags, e.g., `(?is)`.
473	Flags(SetFlags),
474	/// A single character literal, which includes escape sequences.
475	Literal(Literal),
476	/// The "any character" class.
477	Dot(Span),
478	/// A single zero-width assertion.
479	Assertion(Assertion),
480	/// A single character class. This includes all forms of character classes
481	/// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
482	Class(Class),
483	/// A repetition operator applied to an arbitrary regular expression.
484	Repetition(Repetition),
485	/// A grouped regular expression.
486	Group(Group),
487	/// An alternation of regular expressions.
488	Alternation(Alternation),
489	/// A concatenation of regular expressions.
490	Concat(Concat),
491	}
492
493	impl Ast {
494	/// Return the span of this abstract syntax tree.
495	pub fn span(&self) -> &Span {
496	match *self {
497	Ast::Empty(ref span) => span,
498	Ast::Flags(ref x) => &x.span,
499	Ast::Literal(ref x) => &x.span,
500	Ast::Dot(ref span) => span,
501	Ast::Assertion(ref x) => &x.span,
502	Ast::Class(ref x) => x.span(),
503	Ast::Repetition(ref x) => &x.span,
504	Ast::Group(ref x) => &x.span,
505	Ast::Alternation(ref x) => &x.span,
506	Ast::Concat(ref x) => &x.span,
507	}
508	}
509
510	/// Return true if and only if this Ast is empty.
511	pub fn is_empty(&self) -> bool {
512	match *self {
513	Ast::Empty(_) => `true`,
514	_ => `false`,
515	}
516	}
517
518	/// Returns true if and only if this AST has any (including possibly empty)
519	/// subexpressions.
520	fn has_subexprs(&self) -> bool {
521	match *self {
522	Ast::Empty(_)
523	\| Ast::Flags(_)
524	\| Ast::Literal(_)
525	\| Ast::Dot(_)
526	\| Ast::Assertion(_) => `false`,
527	Ast::Class(_)
528	\| Ast::Repetition(_)
529	\| Ast::Group(_)
530	\| Ast::Alternation(_)
531	\| Ast::Concat(_) => `true`,
532	}
533	}
534	}
535
536	/// Print a display representation of this Ast.
537	///
538	/// This does not preserve any of the original whitespace formatting that may
539	/// have originally been present in the concrete syntax from which this Ast
540	/// was generated.
541	///
542	/// This implementation uses constant stack space and heap space proportional
543	/// to the size of the `Ast`.
544	impl fmt::Display for Ast {
545	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
546	use crate::ast::print::Printer;
547	Printer::new().print(self, f)
548	}
549	}
550
551	/// An alternation of regular expressions.
552	#[derive(Clone, Debug, Eq, PartialEq)]
553	pub struct Alternation {
554	/// The span of this alternation.
555	pub span: Span,
556	/// The alternate regular expressions.
557	pub asts: Vec<Ast>,
558	}
559
560	impl Alternation {
561	/// Return this alternation as an AST.
562	///
563	/// If this alternation contains zero ASTs, then Ast::Empty is
564	/// returned. If this alternation contains exactly 1 AST, then the
565	/// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
566	pub fn into_ast(mut self) -> Ast {
567	match self.asts.len() {
568	`0` => Ast::Empty(self.span),
569	`1` => self.asts.pop().unwrap(),
570	_ => Ast::Alternation(self),
571	}
572	}
573	}
574
575	/// A concatenation of regular expressions.
576	#[derive(Clone, Debug, Eq, PartialEq)]
577	pub struct Concat {
578	/// The span of this concatenation.
579	pub span: Span,
580	/// The concatenation regular expressions.
581	pub asts: Vec<Ast>,
582	}
583
584	impl Concat {
585	/// Return this concatenation as an AST.
586	///
587	/// If this concatenation contains zero ASTs, then Ast::Empty is
588	/// returned. If this concatenation contains exactly 1 AST, then the
589	/// corresponding AST is returned. Otherwise, Ast::Concat is returned.
590	pub fn into_ast(mut self) -> Ast {
591	match self.asts.len() {
592	`0` => Ast::Empty(self.span),
593	`1` => self.asts.pop().unwrap(),
594	_ => Ast::Concat(self),
595	}
596	}
597	}
598
599	/// A single literal expression.
600	///
601	/// A literal corresponds to a single Unicode scalar value. Literals may be
602	/// represented in their literal form, e.g., `a` or in their escaped form,
603	/// e.g., `\x61`.
604	#[derive(Clone, Debug, Eq, PartialEq)]
605	pub struct Literal {
606	/// The span of this literal.
607	pub span: Span,
608	/// The kind of this literal.
609	pub kind: LiteralKind,
610	/// The Unicode scalar value corresponding to this literal.
611	pub c: char,
612	}
613
614	impl Literal {
615	/// If this literal was written as a `\x` hex escape, then this returns
616	/// the corresponding byte value. Otherwise, this returns `None`.
617	pub fn byte(&self) -> Option<u8> {
618	let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
619	if self.c as u32 <= `255` && self.kind == short_hex {
620	Some(self.c as u8)
621	} else {
622	None
623	}
624	}
625	}
626
627	/// The kind of a single literal expression.
628	#[derive(Clone, Debug, Eq, PartialEq)]
629	pub enum LiteralKind {
630	/// The literal is written verbatim, e.g., `a` or `☃`.
631	Verbatim,
632	/// The literal is written as an escape because it is punctuation, e.g.,
633	/// `\` or `\[`.*
634	Punctuation,
635	/// The literal is written as an octal escape, e.g., `\141`.
636	Octal,
637	/// The literal is written as a hex code with a fixed number of digits
638	/// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
639	/// `\U00000061`.
640	HexFixed(HexLiteralKind),
641	/// The literal is written as a hex code with a bracketed number of
642	/// digits. The only restriction is that the bracketed hex code must refer
643	/// to a valid Unicode scalar value.
644	HexBrace(HexLiteralKind),
645	/// The literal is written as a specially recognized escape, e.g., `\f`
646	/// or `\n`.
647	Special(SpecialLiteralKind),
648	}
649
650	/// The type of a special literal.
651	///
652	/// A special literal is a special escape sequence recognized by the regex
653	/// parser, e.g., `\f` or `\n`.
654	#[derive(Clone, Debug, Eq, PartialEq)]
655	pub enum SpecialLiteralKind {
656	/// Bell, spelled `\a` (`\x07`).
657	Bell,
658	/// Form feed, spelled `\f` (`\x0C`).
659	FormFeed,
660	/// Tab, spelled `\t` (`\x09`).
661	Tab,
662	/// Line feed, spelled `\n` (`\x0A`).
663	LineFeed,
664	/// Carriage return, spelled `\r` (`\x0D`).
665	CarriageReturn,
666	/// Vertical tab, spelled `\v` (`\x0B`).
667	VerticalTab,
668	/// Space, spelled `\ ` (`\x20`). Note that this can only appear when
669	/// parsing in verbose mode.
670	Space,
671	}
672
673	/// The type of a Unicode hex literal.
674	///
675	/// Note that all variants behave the same when used with brackets. They only
676	/// differ when used without brackets in the number of hex digits that must
677	/// follow.
678	#[derive(Clone, Debug, Eq, PartialEq)]
679	pub enum HexLiteralKind {
680	/// A `\x` prefix. When used without brackets, this form is limited to
681	/// two digits.
682	X,
683	/// A `\u` prefix. When used without brackets, this form is limited to
684	/// four digits.
685	UnicodeShort,
686	/// A `\U` prefix. When used without brackets, this form is limited to
687	/// eight digits.
688	UnicodeLong,
689	}
690
691	impl HexLiteralKind {
692	/// The number of digits that must be used with this literal form when
693	/// used without brackets. When used with brackets, there is no
694	/// restriction on the number of digits.
695	pub fn digits(&self) -> u32 {
696	match *self {
697	HexLiteralKind::X => `2`,
698	HexLiteralKind::UnicodeShort => `4`,
699	HexLiteralKind::UnicodeLong => `8`,
700	}
701	}
702	}
703
704	/// A single character class expression.
705	#[derive(Clone, Debug, Eq, PartialEq)]
706	pub enum Class {
707	/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
708	Unicode(ClassUnicode),
709	/// A perl character class, e.g., `\d` or `\W`.
710	Perl(ClassPerl),
711	/// A bracketed character class set, which may contain zero or more
712	/// character ranges and/or zero or more nested classes. e.g.,
713	/// `[a-zA-Z\pL]`.
714	Bracketed(ClassBracketed),
715	}
716
717	impl Class {
718	/// Return the span of this character class.
719	pub fn span(&self) -> &Span {
720	match *self {
721	Class::Perl(ref x) => &x.span,
722	Class::Unicode(ref x) => &x.span,
723	Class::Bracketed(ref x) => &x.span,
724	}
725	}
726	}
727
728	/// A Perl character class.
729	#[derive(Clone, Debug, Eq, PartialEq)]
730	pub struct ClassPerl {
731	/// The span of this class.
732	pub span: Span,
733	/// The kind of Perl class.
734	pub kind: ClassPerlKind,
735	/// Whether the class is negated or not. e.g., `\d` is not negated but
736	/// `\D` is.
737	pub negated: bool,
738	}
739
740	/// The available Perl character classes.
741	#[derive(Clone, Debug, Eq, PartialEq)]
742	pub enum ClassPerlKind {
743	/// Decimal numbers.
744	Digit,
745	/// Whitespace.
746	Space,
747	/// Word characters.
748	Word,
749	}
750
751	/// An ASCII character class.
752	#[derive(Clone, Debug, Eq, PartialEq)]
753	pub struct ClassAscii {
754	/// The span of this class.
755	pub span: Span,
756	/// The kind of ASCII class.
757	pub kind: ClassAsciiKind,
758	/// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
759	/// but `[[:^alpha:]]` is.
760	pub negated: bool,
761	}
762
763	/// The available ASCII character classes.
764	#[derive(Clone, Debug, Eq, PartialEq)]
765	pub enum ClassAsciiKind {
766	/// `[0-9A-Za-z]`
767	Alnum,
768	/// `[A-Za-z]`
769	Alpha,
770	/// `[\x00-\x7F]`
771	Ascii,
772	/// `[ \t]`
773	Blank,
774	/// `[\x00-\x1F\x7F]`
775	Cntrl,
776	/// `[0-9]`
777	Digit,
778	/// `[!-~]`
779	Graph,
780	/// `[a-z]`
781	Lower,
782	/// `[ -~]`
783	Print,
784	/// `[!-/:-@\[-`{-~]`
785	Punct,
786	/// `[\t\n\v\f\r ]`
787	Space,
788	/// `[A-Z]`
789	Upper,
790	/// `[0-9A-Za-z_]`
791	Word,
792	/// `[0-9A-Fa-f]`
793	Xdigit,
794	}
795
796	impl ClassAsciiKind {
797	/// Return the corresponding ClassAsciiKind variant for the given name.
798	///
799	/// The name given should correspond to the lowercase version of the
800	/// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
801	///
802	/// If no variant with the corresponding name exists, then `None` is
803	/// returned.
804	pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
805	use self::ClassAsciiKind::*;
806	match name {
807	"alnum" => Some(Alnum),
808	"alpha" => Some(Alpha),
809	"ascii" => Some(Ascii),
810	"blank" => Some(Blank),
811	"cntrl" => Some(Cntrl),
812	"digit" => Some(Digit),
813	"graph" => Some(Graph),
814	"lower" => Some(Lower),
815	"print" => Some(Print),
816	"punct" => Some(Punct),
817	"space" => Some(Space),
818	"upper" => Some(Upper),
819	"word" => Some(Word),
820	"xdigit" => Some(Xdigit),
821	_ => None,
822	}
823	}
824	}
825
826	/// A Unicode character class.
827	#[derive(Clone, Debug, Eq, PartialEq)]
828	pub struct ClassUnicode {
829	/// The span of this class.
830	pub span: Span,
831	/// Whether this class is negated or not.
832	///
833	/// Note: be careful when using this attribute. This specifically refers
834	/// to whether the class is written as `\p` or `\P`, where the latter
835	/// is `negated = true`. However, it also possible to write something like
836	/// `\P{scx!=Katakana}` which is actually equivalent to
837	/// `\p{scx=Katakana}` and is therefore not actually negated even though
838	/// `negated = true` here. To test whether this class is truly negated
839	/// or not, use the `is_negated` method.
840	pub negated: bool,
841	/// The kind of Unicode class.
842	pub kind: ClassUnicodeKind,
843	}
844
845	impl ClassUnicode {
846	/// Returns true if this class has been negated.
847	///
848	/// Note that this takes the Unicode op into account, if it's present.
849	/// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
850	pub fn is_negated(&self) -> bool {
851	match self.kind {
852	ClassUnicodeKind::NamedValue {
853	op: ClassUnicodeOpKind::NotEqual,
854	..
855	} => !self.negated,
856	_ => self.negated,
857	}
858	}
859	}
860
861	/// The available forms of Unicode character classes.
862	#[derive(Clone, Debug, Eq, PartialEq)]
863	pub enum ClassUnicodeKind {
864	/// A one letter abbreviated class, e.g., `\pN`.
865	OneLetter(char),
866	/// A binary property, general category or script. The string may be
867	/// empty.
868	Named(String),
869	/// A property name and an associated value.
870	NamedValue {
871	/// The type of Unicode op used to associate `name` with `value`.
872	op: ClassUnicodeOpKind,
873	/// The property name (which may be empty).
874	name: String,
875	/// The property value (which may be empty).
876	value: String,
877	},
878	}
879
880	/// The type of op used in a Unicode character class.
881	#[derive(Clone, Debug, Eq, PartialEq)]
882	pub enum ClassUnicodeOpKind {
883	/// A property set to a specific value, e.g., `\p{scx=Katakana}`.
884	Equal,
885	/// A property set to a specific value using a colon, e.g.,
886	/// `\p{scx:Katakana}`.
887	Colon,
888	/// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
889	NotEqual,
890	}
891
892	impl ClassUnicodeOpKind {
893	/// Whether the op is an equality op or not.
894	pub fn is_equal(&self) -> bool {
895	match *self {
896	ClassUnicodeOpKind::Equal \| ClassUnicodeOpKind::Colon => `true`,
897	_ => `false`,
898	}
899	}
900	}
901
902	/// A bracketed character class, e.g., `[a-z0-9]`.
903	#[derive(Clone, Debug, Eq, PartialEq)]
904	pub struct ClassBracketed {
905	/// The span of this class.
906	pub span: Span,
907	/// Whether this class is negated or not. e.g., `[a]` is not negated but
908	/// `[^a]` is.
909	pub negated: bool,
910	/// The type of this set. A set is either a normal union of things, e.g.,
911	/// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
912	pub kind: ClassSet,
913	}
914
915	/// A character class set.
916	///
917	/// This type corresponds to the internal structure of a bracketed character
918	/// class. That is, every bracketed character is one of two types: a union of
919	/// items (literals, ranges, other bracketed classes) or a tree of binary set
920	/// operations.
921	#[derive(Clone, Debug, Eq, PartialEq)]
922	pub enum ClassSet {
923	/// An item, which can be a single literal, range, nested character class
924	/// or a union of items.
925	Item(ClassSetItem),
926	/// A single binary operation (i.e., &&, -- or ~~).
927	BinaryOp(ClassSetBinaryOp),
928	}
929
930	impl ClassSet {
931	/// Build a set from a union.
932	pub fn union(ast: ClassSetUnion) -> ClassSet {
933	ClassSet::Item(ClassSetItem::Union(ast))
934	}
935
936	/// Return the span of this character class set.
937	pub fn span(&self) -> &Span {
938	match *self {
939	ClassSet::Item(ref x) => x.span(),
940	ClassSet::BinaryOp(ref x) => &x.span,
941	}
942	}
943
944	/// Return true if and only if this class set is empty.
945	fn is_empty(&self) -> bool {
946	match *self {
947	ClassSet::Item(ClassSetItem::Empty(_)) => `true`,
948	_ => `false`,
949	}
950	}
951	}
952
953	/// A single component of a character class set.
954	#[derive(Clone, Debug, Eq, PartialEq)]
955	pub enum ClassSetItem {
956	/// An empty item.
957	///
958	/// Note that a bracketed character class cannot contain a single empty
959	/// item. Empty items can appear when using one of the binary operators.
960	/// For example, `[&&]` is the intersection of two empty classes.
961	Empty(Span),
962	/// A single literal.
963	Literal(Literal),
964	/// A range between two literals.
965	Range(ClassSetRange),
966	/// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
967	Ascii(ClassAscii),
968	/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
969	Unicode(ClassUnicode),
970	/// A perl character class, e.g., `\d` or `\W`.
971	Perl(ClassPerl),
972	/// A bracketed character class set, which may contain zero or more
973	/// character ranges and/or zero or more nested classes. e.g.,
974	/// `[a-zA-Z\pL]`.
975	Bracketed(Box<ClassBracketed>),
976	/// A union of items.
977	Union(ClassSetUnion),
978	}
979
980	impl ClassSetItem {
981	/// Return the span of this character class set item.
982	pub fn span(&self) -> &Span {
983	match *self {
984	ClassSetItem::Empty(ref span) => span,
985	ClassSetItem::Literal(ref x) => &x.span,
986	ClassSetItem::Range(ref x) => &x.span,
987	ClassSetItem::Ascii(ref x) => &x.span,
988	ClassSetItem::Perl(ref x) => &x.span,
989	ClassSetItem::Unicode(ref x) => &x.span,
990	ClassSetItem::Bracketed(ref x) => &x.span,
991	ClassSetItem::Union(ref x) => &x.span,
992	}
993	}
994	}
995
996	/// A single character class range in a set.
997	#[derive(Clone, Debug, Eq, PartialEq)]
998	pub struct ClassSetRange {
999	/// The span of this range.
1000	pub span: Span,
1001	/// The start of this range.
1002	pub start: Literal,
1003	/// The end of this range.
1004	pub end: Literal,
1005	}
1006
1007	impl ClassSetRange {
1008	/// Returns true if and only if this character class range is valid.
1009	///
1010	/// The only case where a range is invalid is if its start is greater than
1011	/// its end.
1012	pub fn is_valid(&self) -> bool {
1013	self.start.c <= self.end.c
1014	}
1015	}
1016
1017	/// A union of items inside a character class set.
1018	#[derive(Clone, Debug, Eq, PartialEq)]
1019	pub struct ClassSetUnion {
1020	/// The span of the items in this operation. e.g., the `a-z0-9` in
1021	/// `[^a-z0-9]`
1022	pub span: Span,
1023	/// The sequence of items that make up this union.
1024	pub items: Vec<ClassSetItem>,
1025	}
1026
1027	impl ClassSetUnion {
1028	/// Push a new item in this union.
1029	///
1030	/// The ending position of this union's span is updated to the ending
1031	/// position of the span of the item given. If the union is empty, then
1032	/// the starting position of this union is set to the starting position
1033	/// of this item.
1034	///
1035	/// In other words, if you only use this method to add items to a union
1036	/// and you set the spans on each item correctly, then you should never
1037	/// need to adjust the span of the union directly.
1038	pub fn push(&mut self, item: ClassSetItem) {
1039	if self.items.is_empty() {
1040	self.span.start = item.span().start;
1041	}
1042	self.span.end = item.span().end;
1043	self.items.push(item);
1044	}
1045
1046	/// Return this union as a character class set item.
1047	///
1048	/// If this union contains zero items, then an empty union is
1049	/// returned. If this concatenation contains exactly 1 item, then the
1050	/// corresponding item is returned. Otherwise, ClassSetItem::Union is
1051	/// returned.
1052	pub fn into_item(mut self) -> ClassSetItem {
1053	match self.items.len() {
1054	`0` => ClassSetItem::Empty(self.span),
1055	`1` => self.items.pop().unwrap(),
1056	_ => ClassSetItem::Union(self),
1057	}
1058	}
1059	}
1060
1061	/// A Unicode character class set operation.
1062	#[derive(Clone, Debug, Eq, PartialEq)]
1063	pub struct ClassSetBinaryOp {
1064	/// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
1065	pub span: Span,
1066	/// The type of this set operation.
1067	pub kind: ClassSetBinaryOpKind,
1068	/// The left hand side of the operation.
1069	pub lhs: Box<ClassSet>,
1070	/// The right hand side of the operation.
1071	pub rhs: Box<ClassSet>,
1072	}
1073
1074	/// The type of a Unicode character class set operation.
1075	///
1076	/// Note that this doesn't explicitly represent union since there is no
1077	/// explicit union operator. Concatenation inside a character class corresponds
1078	/// to the union operation.
1079	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1080	pub enum ClassSetBinaryOpKind {
1081	/// The intersection of two sets, e.g., `\pN&&[a-z]`.
1082	Intersection,
1083	/// The difference of two sets, e.g., `\pN--[0-9]`.
1084	Difference,
1085	/// The symmetric difference of two sets. The symmetric difference is the
1086	/// set of elements belonging to one but not both sets.
1087	/// e.g., `[\pL~~[:ascii:]]`.
1088	SymmetricDifference,
1089	}
1090
1091	/// A single zero-width assertion.
1092	#[derive(Clone, Debug, Eq, PartialEq)]
1093	pub struct Assertion {
1094	/// The span of this assertion.
1095	pub span: Span,
1096	/// The assertion kind, e.g., `\b` or `^`.
1097	pub kind: AssertionKind,
1098	}
1099
1100	/// An assertion kind.
1101	#[derive(Clone, Debug, Eq, PartialEq)]
1102	pub enum AssertionKind {
1103	/// `^`
1104	StartLine,
1105	/// `$`
1106	EndLine,
1107	/// `\A`
1108	StartText,
1109	/// `\z`
1110	EndText,
1111	/// `\b`
1112	WordBoundary,
1113	/// `\B`
1114	NotWordBoundary,
1115	}
1116
1117	/// A repetition operation applied to a regular expression.
1118	#[derive(Clone, Debug, Eq, PartialEq)]
1119	pub struct Repetition {
1120	/// The span of this operation.
1121	pub span: Span,
1122	/// The actual operation.
1123	pub op: RepetitionOp,
1124	/// Whether this operation was applied greedily or not.
1125	pub greedy: bool,
1126	/// The regular expression under repetition.
1127	pub ast: Box<Ast>,
1128	}
1129
1130	/// The repetition operator itself.
1131	#[derive(Clone, Debug, Eq, PartialEq)]
1132	pub struct RepetitionOp {
1133	/// The span of this operator. This includes things like `+`, `?` and*
1134	/// `{m,n}`.
1135	pub span: Span,
1136	/// The type of operation.
1137	pub kind: RepetitionKind,
1138	}
1139
1140	/// The kind of a repetition operator.
1141	#[derive(Clone, Debug, Eq, PartialEq)]
1142	pub enum RepetitionKind {
1143	/// `?`
1144	ZeroOrOne,
1145	/// ``*
1146	ZeroOrMore,
1147	/// `+`
1148	OneOrMore,
1149	/// `{m,n}`
1150	Range(RepetitionRange),
1151	}
1152
1153	/// A range repetition operator.
1154	#[derive(Clone, Debug, Eq, PartialEq)]
1155	pub enum RepetitionRange {
1156	/// `{m}`
1157	Exactly(u32),
1158	/// `{m,}`
1159	AtLeast(u32),
1160	/// `{m,n}`
1161	Bounded(u32, u32),
1162	}
1163
1164	impl RepetitionRange {
1165	/// Returns true if and only if this repetition range is valid.
1166	///
1167	/// The only case where a repetition range is invalid is if it is bounded
1168	/// and its start is greater than its end.
1169	pub fn is_valid(&self) -> bool {
1170	match *self {
1171	RepetitionRange::Bounded(s, e) if s > e => `false`,
1172	_ => `true`,
1173	}
1174	}
1175	}
1176
1177	/// A grouped regular expression.
1178	///
1179	/// This includes both capturing and non-capturing groups. This does not
1180	/// include flag-only groups like `(?is)`, but does contain any group that
1181	/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
1182	/// `(?is:a)`.
1183	#[derive(Clone, Debug, Eq, PartialEq)]
1184	pub struct Group {
1185	/// The span of this group.
1186	pub span: Span,
1187	/// The kind of this group.
1188	pub kind: GroupKind,
1189	/// The regular expression in this group.
1190	pub ast: Box<Ast>,
1191	}
1192
1193	impl Group {
1194	/// If this group is non-capturing, then this returns the (possibly empty)
1195	/// set of flags. Otherwise, `None` is returned.
1196	pub fn flags(&self) -> Option<&Flags> {
1197	match self.kind {
1198	GroupKind::NonCapturing(ref flags) => Some(flags),
1199	_ => None,
1200	}
1201	}
1202
1203	/// Returns true if and only if this group is capturing.
1204	pub fn is_capturing(&self) -> bool {
1205	match self.kind {
1206	GroupKind::CaptureIndex(_) \| GroupKind::CaptureName(_) => `true`,
1207	GroupKind::NonCapturing(_) => `false`,
1208	}
1209	}
1210
1211	/// Returns the capture index of this group, if this is a capturing group.
1212	///
1213	/// This returns a capture index precisely when `is_capturing` is `true`.
1214	pub fn capture_index(&self) -> Option<u32> {
1215	match self.kind {
1216	GroupKind::CaptureIndex(i) => Some(i),
1217	GroupKind::CaptureName(ref x) => Some(x.index),
1218	GroupKind::NonCapturing(_) => None,
1219	}
1220	}
1221	}
1222
1223	/// The kind of a group.
1224	#[derive(Clone, Debug, Eq, PartialEq)]
1225	pub enum GroupKind {
1226	/// `(a)`
1227	CaptureIndex(u32),
1228	/// `(?P<name>a)`
1229	CaptureName(CaptureName),
1230	/// `(?:a)` and `(?i:a)`
1231	NonCapturing(Flags),
1232	}
1233
1234	/// A capture name.
1235	///
1236	/// This corresponds to the name itself between the angle brackets in, e.g.,
1237	/// `(?P<foo>expr)`.
1238	#[derive(Clone, Debug, Eq, PartialEq)]
1239	pub struct CaptureName {
1240	/// The span of this capture name.
1241	pub span: Span,
1242	/// The capture name.
1243	pub name: String,
1244	/// The capture index.
1245	pub index: u32,
1246	}
1247
1248	/// A group of flags that is not applied to a particular regular expression.
1249	#[derive(Clone, Debug, Eq, PartialEq)]
1250	pub struct SetFlags {
1251	/// The span of these flags, including the grouping parentheses.
1252	pub span: Span,
1253	/// The actual sequence of flags.
1254	pub flags: Flags,
1255	}
1256
1257	/// A group of flags.
1258	///
1259	/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
1260	#[derive(Clone, Debug, Eq, PartialEq)]
1261	pub struct Flags {
1262	/// The span of this group of flags.
1263	pub span: Span,
1264	/// A sequence of flag items. Each item is either a flag or a negation
1265	/// operator.
1266	pub items: Vec<FlagsItem>,
1267	}
1268
1269	impl Flags {
1270	/// Add the given item to this sequence of flags.
1271	///
1272	/// If the item was added successfully, then `None` is returned. If the
1273	/// given item is a duplicate, then `Some(i)` is returned, where
1274	/// `items[i].kind == item.kind`.
1275	pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
1276	for (i, x) in self.items.iter().enumerate() {
1277	if x.kind == item.kind {
1278	return Some(i);
1279	}
1280	}
1281	self.items.push(item);
1282	None
1283	}
1284
1285	/// Returns the state of the given flag in this set.
1286	///
1287	/// If the given flag is in the set but is negated, then `Some(false)` is
1288	/// returned.
1289	///
1290	/// If the given flag is in the set and is not negated, then `Some(true)`
1291	/// is returned.
1292	///
1293	/// Otherwise, `None` is returned.
1294	pub fn flag_state(&self, flag: Flag) -> Option<bool> {
1295	let mut negated = `false`;
1296	for x in &self.items {
1297	match x.kind {
1298	FlagsItemKind::Negation => {
1299	negated = `true`;
1300	}
1301	FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
1302	return Some(!negated);
1303	}
1304	_ => {}
1305	}
1306	}
1307	None
1308	}
1309	}
1310
1311	/// A single item in a group of flags.
1312	#[derive(Clone, Debug, Eq, PartialEq)]
1313	pub struct FlagsItem {
1314	/// The span of this item.
1315	pub span: Span,
1316	/// The kind of this item.
1317	pub kind: FlagsItemKind,
1318	}
1319
1320	/// The kind of an item in a group of flags.
1321	#[derive(Clone, Debug, Eq, PartialEq)]
1322	pub enum FlagsItemKind {
1323	/// A negation operator applied to all subsequent flags in the enclosing
1324	/// group.
1325	Negation,
1326	/// A single flag in a group.
1327	Flag(Flag),
1328	}
1329
1330	impl FlagsItemKind {
1331	/// Returns true if and only if this item is a negation operator.
1332	pub fn is_negation(&self) -> bool {
1333	match *self {
1334	FlagsItemKind::Negation => `true`,
1335	_ => `false`,
1336	}
1337	}
1338	}
1339
1340	/// A single flag.
1341	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1342	pub enum Flag {
1343	/// `i`
1344	CaseInsensitive,
1345	/// `m`
1346	MultiLine,
1347	/// `s`
1348	DotMatchesNewLine,
1349	/// `U`
1350	SwapGreed,
1351	/// `u`
1352	Unicode,
1353	/// `x`
1354	IgnoreWhitespace,
1355	}
1356
1357	/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
1358	/// space but heap space proportional to the depth of the `Ast`.
1359	impl Drop for Ast {
1360	fn drop(&mut self) {
1361	use std::mem;
1362
1363	match *self {
1364	Ast::Empty(_)
1365	\| Ast::Flags(_)
1366	\| Ast::Literal(_)
1367	\| Ast::Dot(_)
1368	\| Ast::Assertion(_)
1369	// Classes are recursive, so they get their own Drop impl.
1370	\| Ast::Class(_) => return,
1371	Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1372	Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1373	Ast::Alternation(ref x) if x.asts.is_empty() => return,
1374	Ast::Concat(ref x) if x.asts.is_empty() => return,
1375	_ => {}
1376	}
1377
1378	let empty_span = \|\| Span::splat(Position::new(`0`, `0`, `0`));
1379	let empty_ast = \|\| Ast::Empty(empty_span());
1380	let mut stack = vec![mem::replace(self, empty_ast())];
1381	while let Some(mut ast) = stack.pop() {
1382	match ast {
1383	Ast::Empty(_)
1384	\| Ast::Flags(_)
1385	\| Ast::Literal(_)
1386	\| Ast::Dot(_)
1387	\| Ast::Assertion(_)
1388	// Classes are recursive, so they get their own Drop impl.
1389	\| Ast::Class(_) => {}
1390	Ast::Repetition(ref mut x) => {
1391	stack.push(mem::replace(&mut x.ast, empty_ast()));
1392	}
1393	Ast::Group(ref mut x) => {
1394	stack.push(mem::replace(&mut x.ast, empty_ast()));
1395	}
1396	Ast::Alternation(ref mut x) => {
1397	stack.extend(x.asts.drain(..));
1398	}
1399	Ast::Concat(ref mut x) => {
1400	stack.extend(x.asts.drain(..));
1401	}
1402	}
1403	}
1404	}
1405	}
1406
1407	/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
1408	/// stack space but heap space proportional to the depth of the `ClassSet`.
1409	impl Drop for ClassSet {
1410	fn drop(&mut self) {
1411	use std::mem;
1412
1413	match *self {
1414	ClassSet::Item(ref item) => match *item {
1415	ClassSetItem::Empty(_)
1416	\| ClassSetItem::Literal(_)
1417	\| ClassSetItem::Range(_)
1418	\| ClassSetItem::Ascii(_)
1419	\| ClassSetItem::Unicode(_)
1420	\| ClassSetItem::Perl(_) => return,
1421	ClassSetItem::Bracketed(ref x) => {
1422	if x.kind.is_empty() {
1423	return;
1424	}
1425	}
1426	ClassSetItem::Union(ref x) => {
1427	if x.items.is_empty() {
1428	return;
1429	}
1430	}
1431	},
1432	ClassSet::BinaryOp(ref op) => {
1433	if op.lhs.is_empty() && op.rhs.is_empty() {
1434	return;
1435	}
1436	}
1437	}
1438
1439	let empty_span = \|\| Span::splat(Position::new(`0`, `0`, `0`));
1440	let empty_set = \|\| ClassSet::Item(ClassSetItem::Empty(empty_span()));
1441	let mut stack = vec![mem::replace(self, empty_set())];
1442	while let Some(mut set) = stack.pop() {
1443	match set {
1444	ClassSet::Item(ref mut item) => match *item {
1445	ClassSetItem::Empty(_)
1446	\| ClassSetItem::Literal(_)
1447	\| ClassSetItem::Range(_)
1448	\| ClassSetItem::Ascii(_)
1449	\| ClassSetItem::Unicode(_)
1450	\| ClassSetItem::Perl(_) => {}
1451	ClassSetItem::Bracketed(ref mut x) => {
1452	stack.push(mem::replace(&mut x.kind, empty_set()));
1453	}
1454	ClassSetItem::Union(ref mut x) => {
1455	stack.extend(x.items.drain(..).map(ClassSet::Item));
1456	}
1457	},
1458	ClassSet::BinaryOp(ref mut op) => {
1459	stack.push(mem::replace(&mut op.lhs, empty_set()));
1460	stack.push(mem::replace(&mut op.rhs, empty_set()));
1461	}
1462	}
1463	}
1464	}
1465	}
1466
1467	#[cfg(test)]
1468	mod tests {
1469	use super::*;
1470
1471	// We use a thread with an explicit stack size to test that our destructor
1472	// for Ast can handle arbitrarily sized expressions in constant stack
1473	// space. In case we run on a platform without threads (WASM?), we limit
1474	// this test to Windows/Unix.
1475	#[test]
1476	#[cfg(any(unix, windows))]
1477	fn no_stack_overflow_on_drop() {
1478	use std::thread;
1479
1480	let run = \|\| {
1481	let span = \|\| Span::splat(Position::new(`0`, `0`, `0`));
1482	let mut ast = Ast::Empty(span());
1483	for i in `0`..`200` {
1484	ast = Ast::Group(Group {
1485	span: span(),
1486	kind: GroupKind::CaptureIndex(i),
1487	ast: Box::new(ast),
1488	});
1489	}
1490	assert!(!ast.is_empty());
1491	};
1492
1493	// We run our test on a thread with a small stack size so we can
1494	// force the issue more easily.
1495	//
1496	// NOTE(2023-03-21): It turns out that some platforms (like FreeBSD)
1497	// will just barf with very small stack sizes. So we bump this up a bit
1498	// to give more room to breath. When I did this, I confirmed that if
1499	// I remove the custom `Drop` impl for `Ast`, then this test does
1500	// indeed still fail with a stack overflow. (At the time of writing, I
1501	// had to bump it all the way up to 32K before the test would pass even
1502	// without the custom `Drop` impl. So 16K seems like a safe number
1503	// here.)
1504	//
1505	// See: https://github.com/rust-lang/regex/issues/967
1506	thread::Builder::new()
1507	.stack_size(`16` << `10`)
1508	.spawn(run)
1509	.unwrap()
1510	.join()
1511	.unwrap();
1512	}
1513	}
1514

Provided by KDAB

Definitions