mod.rs source code [crates/regex-syntax-0.7.2/src/ast/mod.rs]

1	/!*
2	Defines an abstract syntax for regular expressions.
3	*/
4
5	use core::cmp::Ordering;
6
7	use alloc::{boxed::Box, string::String, vec, vec::Vec};
8
9	pub use crate::ast::visitor::{visit, Visitor};
10
11	pub mod parse;
12	pub mod print;
13	mod visitor;
14
15	/// An error that occurred while parsing a regular expression into an abstract
16	/// syntax tree.
17	///
18	/// Note that not all ASTs represents a valid regular expression. For example,
19	/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
20	/// valid Unicode property name. That particular error is reported when
21	/// translating an AST to the high-level intermediate representation (`HIR`).
22	#[derive(Clone, Debug, Eq, PartialEq)]
23	pub struct Error {
24	/// The kind of error.
25	kind: ErrorKind,
26	/// The original pattern that the parser generated the error from. Every
27	/// span in an error is a valid range into this string.
28	pattern: String,
29	/// The span of this error.
30	span: Span,
31	}
32
33	impl Error {
34	/// Return the type of this error.
35	pub fn kind(&self) -> &ErrorKind {
36	&self.kind
37	}
38
39	/// The original pattern string in which this error occurred.
40	///
41	/// Every span reported by this error is reported in terms of this string.
42	pub fn pattern(&self) -> &str {
43	&self.pattern
44	}
45
46	/// Return the span at which this error occurred.
47	pub fn span(&self) -> &Span {
48	&self.span
49	}
50
51	/// Return an auxiliary span. This span exists only for some errors that
52	/// benefit from being able to point to two locations in the original
53	/// regular expression. For example, "duplicate" errors will have the
54	/// main error position set to the duplicate occurrence while its
55	/// auxiliary span will be set to the initial occurrence.
56	pub fn auxiliary_span(&self) -> Option<&Span> {
57	use self::ErrorKind::*;
58	match self.kind {
59	FlagDuplicate { ref original } => Some(original),
60	FlagRepeatedNegation { ref original, .. } => Some(original),
61	GroupNameDuplicate { ref original, .. } => Some(original),
62	_ => None,
63	}
64	}
65	}
66
67	/// The type of an error that occurred while building an AST.
68	///
69	/// This error type is marked as `non_exhaustive`. This means that adding a
70	/// new variant is not considered a breaking change.
71	#[non_exhaustive]
72	#[derive(Clone, Debug, Eq, PartialEq)]
73	pub enum ErrorKind {
74	/// The capturing group limit was exceeded.
75	///
76	/// Note that this represents a limit on the total number of capturing
77	/// groups in a regex and not necessarily the number of nested capturing
78	/// groups. That is, the nest limit can be low and it is still possible for
79	/// this error to occur.
80	CaptureLimitExceeded,
81	/// An invalid escape sequence was found in a character class set.
82	ClassEscapeInvalid,
83	/// An invalid character class range was found. An invalid range is any
84	/// range where the start is greater than the end.
85	ClassRangeInvalid,
86	/// An invalid range boundary was found in a character class. Range
87	/// boundaries must be a single literal codepoint, but this error indicates
88	/// that something else was found, such as a nested class.
89	ClassRangeLiteral,
90	/// An opening `[` was found with no corresponding closing `]`.
91	ClassUnclosed,
92	/// Note that this error variant is no longer used. Namely, a decimal
93	/// number can only appear as a repetition quantifier. When the number
94	/// in a repetition quantifier is empty, then it gets its own specialized
95	/// error, `RepetitionCountDecimalEmpty`.
96	DecimalEmpty,
97	/// An invalid decimal number was given where one was expected.
98	DecimalInvalid,
99	/// A bracketed hex literal was empty.
100	EscapeHexEmpty,
101	/// A bracketed hex literal did not correspond to a Unicode scalar value.
102	EscapeHexInvalid,
103	/// An invalid hexadecimal digit was found.
104	EscapeHexInvalidDigit,
105	/// EOF was found before an escape sequence was completed.
106	EscapeUnexpectedEof,
107	/// An unrecognized escape sequence.
108	EscapeUnrecognized,
109	/// A dangling negation was used when setting flags, e.g., `i-`.
110	FlagDanglingNegation,
111	/// A flag was used twice, e.g., `i-i`.
112	FlagDuplicate {
113	/// The position of the original flag. The error position
114	/// points to the duplicate flag.
115	original: Span,
116	},
117	/// The negation operator was used twice, e.g., `-i-s`.
118	FlagRepeatedNegation {
119	/// The position of the original negation operator. The error position
120	/// points to the duplicate negation operator.
121	original: Span,
122	},
123	/// Expected a flag but got EOF, e.g., `(?`.
124	FlagUnexpectedEof,
125	/// Unrecognized flag, e.g., `a`.
126	FlagUnrecognized,
127	/// A duplicate capture name was found.
128	GroupNameDuplicate {
129	/// The position of the initial occurrence of the capture name. The
130	/// error position itself points to the duplicate occurrence.
131	original: Span,
132	},
133	/// A capture group name is empty, e.g., `(?P<>abc)`.
134	GroupNameEmpty,
135	/// An invalid character was seen for a capture group name. This includes
136	/// errors where the first character is a digit (even though subsequent
137	/// characters are allowed to be digits).
138	GroupNameInvalid,
139	/// A closing `>` could not be found for a capture group name.
140	GroupNameUnexpectedEof,
141	/// An unclosed group, e.g., `(ab`.
142	///
143	/// The span of this error corresponds to the unclosed parenthesis.
144	GroupUnclosed,
145	/// An unopened group, e.g., `ab)`.
146	GroupUnopened,
147	/// The nest limit was exceeded. The limit stored here is the limit
148	/// configured in the parser.
149	NestLimitExceeded(u32),
150	/// The range provided in a counted repetition operator is invalid. The
151	/// range is invalid if the start is greater than the end.
152	RepetitionCountInvalid,
153	/// An opening `{` was not followed by a valid decimal value.
154	/// For example, `x{}` or `x{]}` would fail.
155	RepetitionCountDecimalEmpty,
156	/// An opening `{` was found with no corresponding closing `}`.
157	RepetitionCountUnclosed,
158	/// A repetition operator was applied to a missing sub-expression. This
159	/// occurs, for example, in the regex consisting of just a `` or even*
160	/// `(?i)`. It is, however, possible to create a repetition operating on*
161	/// an empty sub-expression. For example, `()` is still considered valid.*
162	RepetitionMissing,
163	/// The Unicode class is not valid. This typically occurs when a `\p` is
164	/// followed by something other than a `{`.
165	UnicodeClassInvalid,
166	/// When octal support is disabled, this error is produced when an octal
167	/// escape is used. The octal escape is assumed to be an invocation of
168	/// a backreference, which is the common case.
169	UnsupportedBackreference,
170	/// When syntax similar to PCRE's look-around is used, this error is
171	/// returned. Some example syntaxes that are rejected include, but are
172	/// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
173	/// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
174	/// error is used to improve the user experience.
175	UnsupportedLookAround,
176	}
177
178	#[cfg(feature = "std")]
179	impl std::error::Error for Error {}
180
181	impl core::fmt::Display for Error {
182	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
183	crate::error::Formatter::from(self).fmt(f)
184	}
185	}
186
187	impl core::fmt::Display for ErrorKind {
188	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
189	use self::ErrorKind::*;
190	match *self {
191	CaptureLimitExceeded => write!(
192	f,
193	"exceeded the maximum number of \
194	capturing groups ({})",
195	u32::MAX
196	),
197	ClassEscapeInvalid => {
198	write!(f, "invalid escape sequence found in character class")
199	}
200	ClassRangeInvalid => write!(
201	f,
202	"invalid character class range, \
203	the start must be <= the end"
204	),
205	ClassRangeLiteral => {
206	write!(f, "invalid range boundary, must be a literal")
207	}
208	ClassUnclosed => write!(f, "unclosed character class"),
209	DecimalEmpty => write!(f, "decimal literal empty"),
210	DecimalInvalid => write!(f, "decimal literal invalid"),
211	EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
212	EscapeHexInvalid => {
213	write!(f, "hexadecimal literal is not a Unicode scalar value")
214	}
215	EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
216	EscapeUnexpectedEof => write!(
217	f,
218	"incomplete escape sequence, \
219	reached end of pattern prematurely"
220	),
221	EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
222	FlagDanglingNegation => {
223	write!(f, "dangling flag negation operator")
224	}
225	FlagDuplicate { .. } => write!(f, "duplicate flag"),
226	FlagRepeatedNegation { .. } => {
227	write!(f, "flag negation operator repeated")
228	}
229	FlagUnexpectedEof => {
230	write!(f, "expected flag but got end of regex")
231	}
232	FlagUnrecognized => write!(f, "unrecognized flag"),
233	GroupNameDuplicate { .. } => {
234	write!(f, "duplicate capture group name")
235	}
236	GroupNameEmpty => write!(f, "empty capture group name"),
237	GroupNameInvalid => write!(f, "invalid capture group character"),
238	GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
239	GroupUnclosed => write!(f, "unclosed group"),
240	GroupUnopened => write!(f, "unopened group"),
241	NestLimitExceeded(limit) => write!(
242	f,
243	"exceed the maximum number of \
244	nested parentheses/brackets ({})",
245	limit
246	),
247	RepetitionCountInvalid => write!(
248	f,
249	"invalid repetition count range, \
250	the start must be <= the end"
251	),
252	RepetitionCountDecimalEmpty => {
253	write!(f, "repetition quantifier expects a valid decimal")
254	}
255	RepetitionCountUnclosed => {
256	write!(f, "unclosed counted repetition")
257	}
258	RepetitionMissing => {
259	write!(f, "repetition operator missing expression")
260	}
261	UnicodeClassInvalid => {
262	write!(f, "invalid Unicode character class")
263	}
264	UnsupportedBackreference => {
265	write!(f, "backreferences are not supported")
266	}
267	UnsupportedLookAround => write!(
268	f,
269	"look-around, including look-ahead and look-behind, \
270	is not supported"
271	),
272	}
273	}
274	}
275
276	/// Span represents the position information of a single AST item.
277	///
278	/// All span positions are absolute byte offsets that can be used on the
279	/// original regular expression that was parsed.
280	#[derive(Clone, Copy, Eq, PartialEq)]
281	pub struct Span {
282	/// The start byte offset.
283	pub start: Position,
284	/// The end byte offset.
285	pub end: Position,
286	}
287
288	impl core::fmt::Debug for Span {
289	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
290	write!(f, "Span({:?}, {:?})", self.start, self.end)
291	}
292	}
293
294	impl Ord for Span {
295	fn cmp(&self, other: &Span) -> Ordering {
296	(&self.start, &self.end).cmp(&(&other.start, &other.end))
297	}
298	}
299
300	impl PartialOrd for Span {
301	fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
302	Some(self.cmp(other))
303	}
304	}
305
306	/// A single position in a regular expression.
307	///
308	/// A position encodes one half of a span, and include the byte offset, line
309	/// number and column number.
310	#[derive(Clone, Copy, Eq, PartialEq)]
311	pub struct Position {
312	/// The absolute offset of this position, starting at `0` from the
313	/// beginning of the regular expression pattern string.
314	pub offset: usize,
315	/// The line number, starting at `1`.
316	pub line: usize,
317	/// The approximate column number, starting at `1`.
318	pub column: usize,
319	}
320
321	impl core::fmt::Debug for Position {
322	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
323	write!(
324	f,
325	"Position(o: {:?}, l: {:?}, c: {:?})",
326	self.offset, self.line, self.column
327	)
328	}
329	}
330
331	impl Ord for Position {
332	fn cmp(&self, other: &Position) -> Ordering {
333	self.offset.cmp(&other.offset)
334	}
335	}
336
337	impl PartialOrd for Position {
338	fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
339	Some(self.cmp(other))
340	}
341	}
342
343	impl Span {
344	/// Create a new span with the given positions.
345	pub fn new(start: Position, end: Position) -> Span {
346	Span { start, end }
347	}
348
349	/// Create a new span using the given position as the start and end.
350	pub fn splat(pos: Position) -> Span {
351	Span::new(pos, pos)
352	}
353
354	/// Create a new span by replacing the starting the position with the one
355	/// given.
356	pub fn with_start(self, pos: Position) -> Span {
357	Span { start: pos, ..self }
358	}
359
360	/// Create a new span by replacing the ending the position with the one
361	/// given.
362	pub fn with_end(self, pos: Position) -> Span {
363	Span { end: pos, ..self }
364	}
365
366	/// Returns true if and only if this span occurs on a single line.
367	pub fn is_one_line(&self) -> bool {
368	self.start.line == self.end.line
369	}
370
371	/// Returns true if and only if this span is empty. That is, it points to
372	/// a single position in the concrete syntax of a regular expression.
373	pub fn is_empty(&self) -> bool {
374	self.start.offset == self.end.offset
375	}
376	}
377
378	impl Position {
379	/// Create a new position with the given information.
380	///
381	/// `offset` is the absolute offset of the position, starting at `0` from
382	/// the beginning of the regular expression pattern string.
383	///
384	/// `line` is the line number, starting at `1`.
385	///
386	/// `column` is the approximate column number, starting at `1`.
387	pub fn new(offset: usize, line: usize, column: usize) -> Position {
388	Position { offset, line, column }
389	}
390	}
391
392	/// An abstract syntax tree for a singular expression along with comments
393	/// found.
394	///
395	/// Comments are not stored in the tree itself to avoid complexity. Each
396	/// comment contains a span of precisely where it occurred in the original
397	/// regular expression.
398	#[derive(Clone, Debug, Eq, PartialEq)]
399	pub struct WithComments {
400	/// The actual ast.
401	pub ast: Ast,
402	/// All comments found in the original regular expression.
403	pub comments: Vec<Comment>,
404	}
405
406	/// A comment from a regular expression with an associated span.
407	///
408	/// A regular expression can only contain comments when the `x` flag is
409	/// enabled.
410	#[derive(Clone, Debug, Eq, PartialEq)]
411	pub struct Comment {
412	/// The span of this comment, including the beginning `#` and ending `\n`.
413	pub span: Span,
414	/// The comment text, starting with the first character following the `#`
415	/// and ending with the last character preceding the `\n`.
416	pub comment: String,
417	}
418
419	/// An abstract syntax tree for a single regular expression.
420	///
421	/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
422	/// space proportional to the size of the `Ast`.
423	///
424	/// This type defines its own destructor that uses constant stack space and
425	/// heap space proportional to the size of the `Ast`.
426	#[derive(Clone, Debug, Eq, PartialEq)]
427	pub enum Ast {
428	/// An empty regex that matches everything.
429	Empty(Span),
430	/// A set of flags, e.g., `(?is)`.
431	Flags(SetFlags),
432	/// A single character literal, which includes escape sequences.
433	Literal(Literal),
434	/// The "any character" class.
435	Dot(Span),
436	/// A single zero-width assertion.
437	Assertion(Assertion),
438	/// A single character class. This includes all forms of character classes
439	/// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
440	Class(Class),
441	/// A repetition operator applied to an arbitrary regular expression.
442	Repetition(Repetition),
443	/// A grouped regular expression.
444	Group(Group),
445	/// An alternation of regular expressions.
446	Alternation(Alternation),
447	/// A concatenation of regular expressions.
448	Concat(Concat),
449	}
450
451	impl Ast {
452	/// Return the span of this abstract syntax tree.
453	pub fn span(&self) -> &Span {
454	match *self {
455	Ast::Empty(ref span) => span,
456	Ast::Flags(ref x) => &x.span,
457	Ast::Literal(ref x) => &x.span,
458	Ast::Dot(ref span) => span,
459	Ast::Assertion(ref x) => &x.span,
460	Ast::Class(ref x) => x.span(),
461	Ast::Repetition(ref x) => &x.span,
462	Ast::Group(ref x) => &x.span,
463	Ast::Alternation(ref x) => &x.span,
464	Ast::Concat(ref x) => &x.span,
465	}
466	}
467
468	/// Return true if and only if this Ast is empty.
469	pub fn is_empty(&self) -> bool {
470	match *self {
471	Ast::Empty(_) => `true`,
472	_ => `false`,
473	}
474	}
475
476	/// Returns true if and only if this AST has any (including possibly empty)
477	/// subexpressions.
478	fn has_subexprs(&self) -> bool {
479	match *self {
480	Ast::Empty(_)
481	\| Ast::Flags(_)
482	\| Ast::Literal(_)
483	\| Ast::Dot(_)
484	\| Ast::Assertion(_) => `false`,
485	Ast::Class(_)
486	\| Ast::Repetition(_)
487	\| Ast::Group(_)
488	\| Ast::Alternation(_)
489	\| Ast::Concat(_) => `true`,
490	}
491	}
492	}
493
494	/// Print a display representation of this Ast.
495	///
496	/// This does not preserve any of the original whitespace formatting that may
497	/// have originally been present in the concrete syntax from which this Ast
498	/// was generated.
499	///
500	/// This implementation uses constant stack space and heap space proportional
501	/// to the size of the `Ast`.
502	impl core::fmt::Display for Ast {
503	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
504	use crate::ast::print::Printer;
505	Printer::new().print(self, wtr:f)
506	}
507	}
508
509	/// An alternation of regular expressions.
510	#[derive(Clone, Debug, Eq, PartialEq)]
511	pub struct Alternation {
512	/// The span of this alternation.
513	pub span: Span,
514	/// The alternate regular expressions.
515	pub asts: Vec<Ast>,
516	}
517
518	impl Alternation {
519	/// Return this alternation as an AST.
520	///
521	/// If this alternation contains zero ASTs, then Ast::Empty is
522	/// returned. If this alternation contains exactly 1 AST, then the
523	/// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
524	pub fn into_ast(mut self) -> Ast {
525	match self.asts.len() {
526	`0` => Ast::Empty(self.span),
527	`1` => self.asts.pop().unwrap(),
528	_ => Ast::Alternation(self),
529	}
530	}
531	}
532
533	/// A concatenation of regular expressions.
534	#[derive(Clone, Debug, Eq, PartialEq)]
535	pub struct Concat {
536	/// The span of this concatenation.
537	pub span: Span,
538	/// The concatenation regular expressions.
539	pub asts: Vec<Ast>,
540	}
541
542	impl Concat {
543	/// Return this concatenation as an AST.
544	///
545	/// If this concatenation contains zero ASTs, then Ast::Empty is
546	/// returned. If this concatenation contains exactly 1 AST, then the
547	/// corresponding AST is returned. Otherwise, Ast::Concat is returned.
548	pub fn into_ast(mut self) -> Ast {
549	match self.asts.len() {
550	`0` => Ast::Empty(self.span),
551	`1` => self.asts.pop().unwrap(),
552	_ => Ast::Concat(self),
553	}
554	}
555	}
556
557	/// A single literal expression.
558	///
559	/// A literal corresponds to a single Unicode scalar value. Literals may be
560	/// represented in their literal form, e.g., `a` or in their escaped form,
561	/// e.g., `\x61`.
562	#[derive(Clone, Debug, Eq, PartialEq)]
563	pub struct Literal {
564	/// The span of this literal.
565	pub span: Span,
566	/// The kind of this literal.
567	pub kind: LiteralKind,
568	/// The Unicode scalar value corresponding to this literal.
569	pub c: char,
570	}
571
572	impl Literal {
573	/// If this literal was written as a `\x` hex escape, then this returns
574	/// the corresponding byte value. Otherwise, this returns `None`.
575	pub fn byte(&self) -> Option<u8> {
576	match self.kind {
577	LiteralKind::HexFixed(HexLiteralKind::X) => {
578	u8::try_from(self.c).ok()
579	}
580	_ => None,
581	}
582	}
583	}
584
585	/// The kind of a single literal expression.
586	#[derive(Clone, Debug, Eq, PartialEq)]
587	pub enum LiteralKind {
588	/// The literal is written verbatim, e.g., `a` or `☃`.
589	Verbatim,
590	/// The literal is written as an escape because it is otherwise a special
591	/// regex meta character, e.g., `\` or `\[`.*
592	Meta,
593	/// The literal is written as an escape despite the fact that the escape is
594	/// unnecessary, e.g., `\%` or `\/`.
595	Superfluous,
596	/// The literal is written as an octal escape, e.g., `\141`.
597	Octal,
598	/// The literal is written as a hex code with a fixed number of digits
599	/// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
600	/// `\U00000061`.
601	HexFixed(HexLiteralKind),
602	/// The literal is written as a hex code with a bracketed number of
603	/// digits. The only restriction is that the bracketed hex code must refer
604	/// to a valid Unicode scalar value.
605	HexBrace(HexLiteralKind),
606	/// The literal is written as a specially recognized escape, e.g., `\f`
607	/// or `\n`.
608	Special(SpecialLiteralKind),
609	}
610
611	/// The type of a special literal.
612	///
613	/// A special literal is a special escape sequence recognized by the regex
614	/// parser, e.g., `\f` or `\n`.
615	#[derive(Clone, Debug, Eq, PartialEq)]
616	pub enum SpecialLiteralKind {
617	/// Bell, spelled `\a` (`\x07`).
618	Bell,
619	/// Form feed, spelled `\f` (`\x0C`).
620	FormFeed,
621	/// Tab, spelled `\t` (`\x09`).
622	Tab,
623	/// Line feed, spelled `\n` (`\x0A`).
624	LineFeed,
625	/// Carriage return, spelled `\r` (`\x0D`).
626	CarriageReturn,
627	/// Vertical tab, spelled `\v` (`\x0B`).
628	VerticalTab,
629	/// Space, spelled `\ ` (`\x20`). Note that this can only appear when
630	/// parsing in verbose mode.
631	Space,
632	}
633
634	/// The type of a Unicode hex literal.
635	///
636	/// Note that all variants behave the same when used with brackets. They only
637	/// differ when used without brackets in the number of hex digits that must
638	/// follow.
639	#[derive(Clone, Debug, Eq, PartialEq)]
640	pub enum HexLiteralKind {
641	/// A `\x` prefix. When used without brackets, this form is limited to
642	/// two digits.
643	X,
644	/// A `\u` prefix. When used without brackets, this form is limited to
645	/// four digits.
646	UnicodeShort,
647	/// A `\U` prefix. When used without brackets, this form is limited to
648	/// eight digits.
649	UnicodeLong,
650	}
651
652	impl HexLiteralKind {
653	/// The number of digits that must be used with this literal form when
654	/// used without brackets. When used with brackets, there is no
655	/// restriction on the number of digits.
656	pub fn digits(&self) -> u32 {
657	match *self {
658	HexLiteralKind::X => `2`,
659	HexLiteralKind::UnicodeShort => `4`,
660	HexLiteralKind::UnicodeLong => `8`,
661	}
662	}
663	}
664
665	/// A single character class expression.
666	#[derive(Clone, Debug, Eq, PartialEq)]
667	pub enum Class {
668	/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
669	Unicode(ClassUnicode),
670	/// A perl character class, e.g., `\d` or `\W`.
671	Perl(ClassPerl),
672	/// A bracketed character class set, which may contain zero or more
673	/// character ranges and/or zero or more nested classes. e.g.,
674	/// `[a-zA-Z\pL]`.
675	Bracketed(ClassBracketed),
676	}
677
678	impl Class {
679	/// Return the span of this character class.
680	pub fn span(&self) -> &Span {
681	match *self {
682	Class::Perl(ref x: &ClassPerl) => &x.span,
683	Class::Unicode(ref x: &ClassUnicode) => &x.span,
684	Class::Bracketed(ref x: &ClassBracketed) => &x.span,
685	}
686	}
687	}
688
689	/// A Perl character class.
690	#[derive(Clone, Debug, Eq, PartialEq)]
691	pub struct ClassPerl {
692	/// The span of this class.
693	pub span: Span,
694	/// The kind of Perl class.
695	pub kind: ClassPerlKind,
696	/// Whether the class is negated or not. e.g., `\d` is not negated but
697	/// `\D` is.
698	pub negated: bool,
699	}
700
701	/// The available Perl character classes.
702	#[derive(Clone, Debug, Eq, PartialEq)]
703	pub enum ClassPerlKind {
704	/// Decimal numbers.
705	Digit,
706	/// Whitespace.
707	Space,
708	/// Word characters.
709	Word,
710	}
711
712	/// An ASCII character class.
713	#[derive(Clone, Debug, Eq, PartialEq)]
714	pub struct ClassAscii {
715	/// The span of this class.
716	pub span: Span,
717	/// The kind of ASCII class.
718	pub kind: ClassAsciiKind,
719	/// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
720	/// but `[[:^alpha:]]` is.
721	pub negated: bool,
722	}
723
724	/// The available ASCII character classes.
725	#[derive(Clone, Debug, Eq, PartialEq)]
726	pub enum ClassAsciiKind {
727	/// `[0-9A-Za-z]`
728	Alnum,
729	/// `[A-Za-z]`
730	Alpha,
731	/// `[\x00-\x7F]`
732	Ascii,
733	/// `[ \t]`
734	Blank,
735	/// `[\x00-\x1F\x7F]`
736	Cntrl,
737	/// `[0-9]`
738	Digit,
739	/// `[!-~]`
740	Graph,
741	/// `[a-z]`
742	Lower,
743	/// `[ -~]`
744	Print,
745	/// `[!-/:-@\[-`{-~]`
746	Punct,
747	/// `[\t\n\v\f\r ]`
748	Space,
749	/// `[A-Z]`
750	Upper,
751	/// `[0-9A-Za-z_]`
752	Word,
753	/// `[0-9A-Fa-f]`
754	Xdigit,
755	}
756
757	impl ClassAsciiKind {
758	/// Return the corresponding ClassAsciiKind variant for the given name.
759	///
760	/// The name given should correspond to the lowercase version of the
761	/// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
762	///
763	/// If no variant with the corresponding name exists, then `None` is
764	/// returned.
765	pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
766	use self::ClassAsciiKind::*;
767	match name {
768	"alnum" => Some(Alnum),
769	"alpha" => Some(Alpha),
770	"ascii" => Some(Ascii),
771	"blank" => Some(Blank),
772	"cntrl" => Some(Cntrl),
773	"digit" => Some(Digit),
774	"graph" => Some(Graph),
775	"lower" => Some(Lower),
776	"print" => Some(Print),
777	"punct" => Some(Punct),
778	"space" => Some(Space),
779	"upper" => Some(Upper),
780	"word" => Some(Word),
781	"xdigit" => Some(Xdigit),
782	_ => None,
783	}
784	}
785	}
786
787	/// A Unicode character class.
788	#[derive(Clone, Debug, Eq, PartialEq)]
789	pub struct ClassUnicode {
790	/// The span of this class.
791	pub span: Span,
792	/// Whether this class is negated or not.
793	///
794	/// Note: be careful when using this attribute. This specifically refers
795	/// to whether the class is written as `\p` or `\P`, where the latter
796	/// is `negated = true`. However, it also possible to write something like
797	/// `\P{scx!=Katakana}` which is actually equivalent to
798	/// `\p{scx=Katakana}` and is therefore not actually negated even though
799	/// `negated = true` here. To test whether this class is truly negated
800	/// or not, use the `is_negated` method.
801	pub negated: bool,
802	/// The kind of Unicode class.
803	pub kind: ClassUnicodeKind,
804	}
805
806	impl ClassUnicode {
807	/// Returns true if this class has been negated.
808	///
809	/// Note that this takes the Unicode op into account, if it's present.
810	/// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
811	pub fn is_negated(&self) -> bool {
812	match self.kind {
813	ClassUnicodeKind::NamedValue {
814	op: ClassUnicodeOpKind::NotEqual,
815	..
816	} => !self.negated,
817	_ => self.negated,
818	}
819	}
820	}
821
822	/// The available forms of Unicode character classes.
823	#[derive(Clone, Debug, Eq, PartialEq)]
824	pub enum ClassUnicodeKind {
825	/// A one letter abbreviated class, e.g., `\pN`.
826	OneLetter(char),
827	/// A binary property, general category or script. The string may be
828	/// empty.
829	Named(String),
830	/// A property name and an associated value.
831	NamedValue {
832	/// The type of Unicode op used to associate `name` with `value`.
833	op: ClassUnicodeOpKind,
834	/// The property name (which may be empty).
835	name: String,
836	/// The property value (which may be empty).
837	value: String,
838	},
839	}
840
841	/// The type of op used in a Unicode character class.
842	#[derive(Clone, Debug, Eq, PartialEq)]
843	pub enum ClassUnicodeOpKind {
844	/// A property set to a specific value, e.g., `\p{scx=Katakana}`.
845	Equal,
846	/// A property set to a specific value using a colon, e.g.,
847	/// `\p{scx:Katakana}`.
848	Colon,
849	/// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
850	NotEqual,
851	}
852
853	impl ClassUnicodeOpKind {
854	/// Whether the op is an equality op or not.
855	pub fn is_equal(&self) -> bool {
856	match *self {
857	ClassUnicodeOpKind::Equal \| ClassUnicodeOpKind::Colon => `true`,
858	_ => `false`,
859	}
860	}
861	}
862
863	/// A bracketed character class, e.g., `[a-z0-9]`.
864	#[derive(Clone, Debug, Eq, PartialEq)]
865	pub struct ClassBracketed {
866	/// The span of this class.
867	pub span: Span,
868	/// Whether this class is negated or not. e.g., `[a]` is not negated but
869	/// `[^a]` is.
870	pub negated: bool,
871	/// The type of this set. A set is either a normal union of things, e.g.,
872	/// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
873	pub kind: ClassSet,
874	}
875
876	/// A character class set.
877	///
878	/// This type corresponds to the internal structure of a bracketed character
879	/// class. That is, every bracketed character is one of two types: a union of
880	/// items (literals, ranges, other bracketed classes) or a tree of binary set
881	/// operations.
882	#[derive(Clone, Debug, Eq, PartialEq)]
883	pub enum ClassSet {
884	/// An item, which can be a single literal, range, nested character class
885	/// or a union of items.
886	Item(ClassSetItem),
887	/// A single binary operation (i.e., &&, -- or ~~).
888	BinaryOp(ClassSetBinaryOp),
889	}
890
891	impl ClassSet {
892	/// Build a set from a union.
893	pub fn union(ast: ClassSetUnion) -> ClassSet {
894	ClassSet::Item(ClassSetItem::Union(ast))
895	}
896
897	/// Return the span of this character class set.
898	pub fn span(&self) -> &Span {
899	match *self {
900	ClassSet::Item(ref x: &ClassSetItem) => x.span(),
901	ClassSet::BinaryOp(ref x: &ClassSetBinaryOp) => &x.span,
902	}
903	}
904
905	/// Return true if and only if this class set is empty.
906	fn is_empty(&self) -> bool {
907	match *self {
908	ClassSet::Item(ClassSetItem::Empty(_)) => `true`,
909	_ => `false`,
910	}
911	}
912	}
913
914	/// A single component of a character class set.
915	#[derive(Clone, Debug, Eq, PartialEq)]
916	pub enum ClassSetItem {
917	/// An empty item.
918	///
919	/// Note that a bracketed character class cannot contain a single empty
920	/// item. Empty items can appear when using one of the binary operators.
921	/// For example, `[&&]` is the intersection of two empty classes.
922	Empty(Span),
923	/// A single literal.
924	Literal(Literal),
925	/// A range between two literals.
926	Range(ClassSetRange),
927	/// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
928	Ascii(ClassAscii),
929	/// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
930	Unicode(ClassUnicode),
931	/// A perl character class, e.g., `\d` or `\W`.
932	Perl(ClassPerl),
933	/// A bracketed character class set, which may contain zero or more
934	/// character ranges and/or zero or more nested classes. e.g.,
935	/// `[a-zA-Z\pL]`.
936	Bracketed(Box<ClassBracketed>),
937	/// A union of items.
938	Union(ClassSetUnion),
939	}
940
941	impl ClassSetItem {
942	/// Return the span of this character class set item.
943	pub fn span(&self) -> &Span {
944	match *self {
945	ClassSetItem::Empty(ref span: &Span) => span,
946	ClassSetItem::Literal(ref x: &Literal) => &x.span,
947	ClassSetItem::Range(ref x: &ClassSetRange) => &x.span,
948	ClassSetItem::Ascii(ref x: &ClassAscii) => &x.span,
949	ClassSetItem::Perl(ref x: &ClassPerl) => &x.span,
950	ClassSetItem::Unicode(ref x: &ClassUnicode) => &x.span,
951	ClassSetItem::Bracketed(ref x: &Box) => &x.span,
952	ClassSetItem::Union(ref x: &ClassSetUnion) => &x.span,
953	}
954	}
955	}
956
957	/// A single character class range in a set.
958	#[derive(Clone, Debug, Eq, PartialEq)]
959	pub struct ClassSetRange {
960	/// The span of this range.
961	pub span: Span,
962	/// The start of this range.
963	pub start: Literal,
964	/// The end of this range.
965	pub end: Literal,
966	}
967
968	impl ClassSetRange {
969	/// Returns true if and only if this character class range is valid.
970	///
971	/// The only case where a range is invalid is if its start is greater than
972	/// its end.
973	pub fn is_valid(&self) -> bool {
974	self.start.c <= self.end.c
975	}
976	}
977
978	/// A union of items inside a character class set.
979	#[derive(Clone, Debug, Eq, PartialEq)]
980	pub struct ClassSetUnion {
981	/// The span of the items in this operation. e.g., the `a-z0-9` in
982	/// `[^a-z0-9]`
983	pub span: Span,
984	/// The sequence of items that make up this union.
985	pub items: Vec<ClassSetItem>,
986	}
987
988	impl ClassSetUnion {
989	/// Push a new item in this union.
990	///
991	/// The ending position of this union's span is updated to the ending
992	/// position of the span of the item given. If the union is empty, then
993	/// the starting position of this union is set to the starting position
994	/// of this item.
995	///
996	/// In other words, if you only use this method to add items to a union
997	/// and you set the spans on each item correctly, then you should never
998	/// need to adjust the span of the union directly.
999	pub fn push(&mut self, item: ClassSetItem) {
1000	if self.items.is_empty() {
1001	self.span.start = item.span().start;
1002	}
1003	self.span.end = item.span().end;
1004	self.items.push(item);
1005	}
1006
1007	/// Return this union as a character class set item.
1008	///
1009	/// If this union contains zero items, then an empty union is
1010	/// returned. If this concatenation contains exactly 1 item, then the
1011	/// corresponding item is returned. Otherwise, ClassSetItem::Union is
1012	/// returned.
1013	pub fn into_item(mut self) -> ClassSetItem {
1014	match self.items.len() {
1015	`0` => ClassSetItem::Empty(self.span),
1016	`1` => self.items.pop().unwrap(),
1017	_ => ClassSetItem::Union(self),
1018	}
1019	}
1020	}
1021
1022	/// A Unicode character class set operation.
1023	#[derive(Clone, Debug, Eq, PartialEq)]
1024	pub struct ClassSetBinaryOp {
1025	/// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
1026	pub span: Span,
1027	/// The type of this set operation.
1028	pub kind: ClassSetBinaryOpKind,
1029	/// The left hand side of the operation.
1030	pub lhs: Box<ClassSet>,
1031	/// The right hand side of the operation.
1032	pub rhs: Box<ClassSet>,
1033	}
1034
1035	/// The type of a Unicode character class set operation.
1036	///
1037	/// Note that this doesn't explicitly represent union since there is no
1038	/// explicit union operator. Concatenation inside a character class corresponds
1039	/// to the union operation.
1040	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1041	pub enum ClassSetBinaryOpKind {
1042	/// The intersection of two sets, e.g., `\pN&&[a-z]`.
1043	Intersection,
1044	/// The difference of two sets, e.g., `\pN--[0-9]`.
1045	Difference,
1046	/// The symmetric difference of two sets. The symmetric difference is the
1047	/// set of elements belonging to one but not both sets.
1048	/// e.g., `[\pL~~[:ascii:]]`.
1049	SymmetricDifference,
1050	}
1051
1052	/// A single zero-width assertion.
1053	#[derive(Clone, Debug, Eq, PartialEq)]
1054	pub struct Assertion {
1055	/// The span of this assertion.
1056	pub span: Span,
1057	/// The assertion kind, e.g., `\b` or `^`.
1058	pub kind: AssertionKind,
1059	}
1060
1061	/// An assertion kind.
1062	#[derive(Clone, Debug, Eq, PartialEq)]
1063	pub enum AssertionKind {
1064	/// `^`
1065	StartLine,
1066	/// `$`
1067	EndLine,
1068	/// `\A`
1069	StartText,
1070	/// `\z`
1071	EndText,
1072	/// `\b`
1073	WordBoundary,
1074	/// `\B`
1075	NotWordBoundary,
1076	}
1077
1078	/// A repetition operation applied to a regular expression.
1079	#[derive(Clone, Debug, Eq, PartialEq)]
1080	pub struct Repetition {
1081	/// The span of this operation.
1082	pub span: Span,
1083	/// The actual operation.
1084	pub op: RepetitionOp,
1085	/// Whether this operation was applied greedily or not.
1086	pub greedy: bool,
1087	/// The regular expression under repetition.
1088	pub ast: Box<Ast>,
1089	}
1090
1091	/// The repetition operator itself.
1092	#[derive(Clone, Debug, Eq, PartialEq)]
1093	pub struct RepetitionOp {
1094	/// The span of this operator. This includes things like `+`, `?` and*
1095	/// `{m,n}`.
1096	pub span: Span,
1097	/// The type of operation.
1098	pub kind: RepetitionKind,
1099	}
1100
1101	/// The kind of a repetition operator.
1102	#[derive(Clone, Debug, Eq, PartialEq)]
1103	pub enum RepetitionKind {
1104	/// `?`
1105	ZeroOrOne,
1106	/// ``*
1107	ZeroOrMore,
1108	/// `+`
1109	OneOrMore,
1110	/// `{m,n}`
1111	Range(RepetitionRange),
1112	}
1113
1114	/// A range repetition operator.
1115	#[derive(Clone, Debug, Eq, PartialEq)]
1116	pub enum RepetitionRange {
1117	/// `{m}`
1118	Exactly(u32),
1119	/// `{m,}`
1120	AtLeast(u32),
1121	/// `{m,n}`
1122	Bounded(u32, u32),
1123	}
1124
1125	impl RepetitionRange {
1126	/// Returns true if and only if this repetition range is valid.
1127	///
1128	/// The only case where a repetition range is invalid is if it is bounded
1129	/// and its start is greater than its end.
1130	pub fn is_valid(&self) -> bool {
1131	match *self {
1132	RepetitionRange::Bounded(s: u32, e: u32) if s > e => `false`,
1133	_ => `true`,
1134	}
1135	}
1136	}
1137
1138	/// A grouped regular expression.
1139	///
1140	/// This includes both capturing and non-capturing groups. This does not
1141	/// include flag-only groups like `(?is)`, but does contain any group that
1142	/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
1143	/// `(?is:a)`.
1144	#[derive(Clone, Debug, Eq, PartialEq)]
1145	pub struct Group {
1146	/// The span of this group.
1147	pub span: Span,
1148	/// The kind of this group.
1149	pub kind: GroupKind,
1150	/// The regular expression in this group.
1151	pub ast: Box<Ast>,
1152	}
1153
1154	impl Group {
1155	/// If this group is non-capturing, then this returns the (possibly empty)
1156	/// set of flags. Otherwise, `None` is returned.
1157	pub fn flags(&self) -> Option<&Flags> {
1158	match self.kind {
1159	GroupKind::NonCapturing(ref flags) => Some(flags),
1160	_ => None,
1161	}
1162	}
1163
1164	/// Returns true if and only if this group is capturing.
1165	pub fn is_capturing(&self) -> bool {
1166	match self.kind {
1167	GroupKind::CaptureIndex(_) \| GroupKind::CaptureName { .. } => `true`,
1168	GroupKind::NonCapturing(_) => `false`,
1169	}
1170	}
1171
1172	/// Returns the capture index of this group, if this is a capturing group.
1173	///
1174	/// This returns a capture index precisely when `is_capturing` is `true`.
1175	pub fn capture_index(&self) -> Option<u32> {
1176	match self.kind {
1177	GroupKind::CaptureIndex(i) => Some(i),
1178	GroupKind::CaptureName { ref name, .. } => Some(name.index),
1179	GroupKind::NonCapturing(_) => None,
1180	}
1181	}
1182	}
1183
1184	/// The kind of a group.
1185	#[derive(Clone, Debug, Eq, PartialEq)]
1186	pub enum GroupKind {
1187	/// `(a)`
1188	CaptureIndex(u32),
1189	/// `(?<name>a)` or `(?P<name>a)`
1190	CaptureName {
1191	/// True if the `?P<` syntax is used and false if the `?<` syntax is used.
1192	starts_with_p: bool,
1193	/// The capture name.
1194	name: CaptureName,
1195	},
1196	/// `(?:a)` and `(?i:a)`
1197	NonCapturing(Flags),
1198	}
1199
1200	/// A capture name.
1201	///
1202	/// This corresponds to the name itself between the angle brackets in, e.g.,
1203	/// `(?P<foo>expr)`.
1204	#[derive(Clone, Debug, Eq, PartialEq)]
1205	pub struct CaptureName {
1206	/// The span of this capture name.
1207	pub span: Span,
1208	/// The capture name.
1209	pub name: String,
1210	/// The capture index.
1211	pub index: u32,
1212	}
1213
1214	/// A group of flags that is not applied to a particular regular expression.
1215	#[derive(Clone, Debug, Eq, PartialEq)]
1216	pub struct SetFlags {
1217	/// The span of these flags, including the grouping parentheses.
1218	pub span: Span,
1219	/// The actual sequence of flags.
1220	pub flags: Flags,
1221	}
1222
1223	/// A group of flags.
1224	///
1225	/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
1226	#[derive(Clone, Debug, Eq, PartialEq)]
1227	pub struct Flags {
1228	/// The span of this group of flags.
1229	pub span: Span,
1230	/// A sequence of flag items. Each item is either a flag or a negation
1231	/// operator.
1232	pub items: Vec<FlagsItem>,
1233	}
1234
1235	impl Flags {
1236	/// Add the given item to this sequence of flags.
1237	///
1238	/// If the item was added successfully, then `None` is returned. If the
1239	/// given item is a duplicate, then `Some(i)` is returned, where
1240	/// `items[i].kind == item.kind`.
1241	pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
1242	for (i, x) in self.items.iter().enumerate() {
1243	if x.kind == item.kind {
1244	return Some(i);
1245	}
1246	}
1247	self.items.push(item);
1248	None
1249	}
1250
1251	/// Returns the state of the given flag in this set.
1252	///
1253	/// If the given flag is in the set but is negated, then `Some(false)` is
1254	/// returned.
1255	///
1256	/// If the given flag is in the set and is not negated, then `Some(true)`
1257	/// is returned.
1258	///
1259	/// Otherwise, `None` is returned.
1260	pub fn flag_state(&self, flag: Flag) -> Option<bool> {
1261	let mut negated = `false`;
1262	for x in &self.items {
1263	match x.kind {
1264	FlagsItemKind::Negation => {
1265	negated = `true`;
1266	}
1267	FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
1268	return Some(!negated);
1269	}
1270	_ => {}
1271	}
1272	}
1273	None
1274	}
1275	}
1276
1277	/// A single item in a group of flags.
1278	#[derive(Clone, Debug, Eq, PartialEq)]
1279	pub struct FlagsItem {
1280	/// The span of this item.
1281	pub span: Span,
1282	/// The kind of this item.
1283	pub kind: FlagsItemKind,
1284	}
1285
1286	/// The kind of an item in a group of flags.
1287	#[derive(Clone, Debug, Eq, PartialEq)]
1288	pub enum FlagsItemKind {
1289	/// A negation operator applied to all subsequent flags in the enclosing
1290	/// group.
1291	Negation,
1292	/// A single flag in a group.
1293	Flag(Flag),
1294	}
1295
1296	impl FlagsItemKind {
1297	/// Returns true if and only if this item is a negation operator.
1298	pub fn is_negation(&self) -> bool {
1299	match *self {
1300	FlagsItemKind::Negation => `true`,
1301	_ => `false`,
1302	}
1303	}
1304	}
1305
1306	/// A single flag.
1307	#[derive(Clone, Copy, Debug, Eq, PartialEq)]
1308	pub enum Flag {
1309	/// `i`
1310	CaseInsensitive,
1311	/// `m`
1312	MultiLine,
1313	/// `s`
1314	DotMatchesNewLine,
1315	/// `U`
1316	SwapGreed,
1317	/// `u`
1318	Unicode,
1319	/// `R`
1320	CRLF,
1321	/// `x`
1322	IgnoreWhitespace,
1323	}
1324
1325	/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
1326	/// space but heap space proportional to the depth of the `Ast`.
1327	impl Drop for Ast {
1328	fn drop(&mut self) {
1329	use core::mem;
1330
1331	match *self {
1332	Ast::Empty(_)
1333	\| Ast::Flags(_)
1334	\| Ast::Literal(_)
1335	\| Ast::Dot(_)
1336	\| Ast::Assertion(_)
1337	// Classes are recursive, so they get their own Drop impl.
1338	\| Ast::Class(_) => return,
1339	Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1340	Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1341	Ast::Alternation(ref x) if x.asts.is_empty() => return,
1342	Ast::Concat(ref x) if x.asts.is_empty() => return,
1343	_ => {}
1344	}
1345
1346	let empty_span = \|\| Span::splat(Position::new(`0`, `0`, `0`));
1347	let empty_ast = \|\| Ast::Empty(empty_span());
1348	let mut stack = vec![mem::replace(self, empty_ast())];
1349	while let Some(mut ast) = stack.pop() {
1350	match ast {
1351	Ast::Empty(_)
1352	\| Ast::Flags(_)
1353	\| Ast::Literal(_)
1354	\| Ast::Dot(_)
1355	\| Ast::Assertion(_)
1356	// Classes are recursive, so they get their own Drop impl.
1357	\| Ast::Class(_) => {}
1358	Ast::Repetition(ref mut x) => {
1359	stack.push(mem::replace(&mut x.ast, empty_ast()));
1360	}
1361	Ast::Group(ref mut x) => {
1362	stack.push(mem::replace(&mut x.ast, empty_ast()));
1363	}
1364	Ast::Alternation(ref mut x) => {
1365	stack.extend(x.asts.drain(..));
1366	}
1367	Ast::Concat(ref mut x) => {
1368	stack.extend(x.asts.drain(..));
1369	}
1370	}
1371	}
1372	}
1373	}
1374
1375	/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
1376	/// stack space but heap space proportional to the depth of the `ClassSet`.
1377	impl Drop for ClassSet {
1378	fn drop(&mut self) {
1379	use core::mem;
1380
1381	match *self {
1382	ClassSet::Item(ref item) => match *item {
1383	ClassSetItem::Empty(_)
1384	\| ClassSetItem::Literal(_)
1385	\| ClassSetItem::Range(_)
1386	\| ClassSetItem::Ascii(_)
1387	\| ClassSetItem::Unicode(_)
1388	\| ClassSetItem::Perl(_) => return,
1389	ClassSetItem::Bracketed(ref x) => {
1390	if x.kind.is_empty() {
1391	return;
1392	}
1393	}
1394	ClassSetItem::Union(ref x) => {
1395	if x.items.is_empty() {
1396	return;
1397	}
1398	}
1399	},
1400	ClassSet::BinaryOp(ref op) => {
1401	if op.lhs.is_empty() && op.rhs.is_empty() {
1402	return;
1403	}
1404	}
1405	}
1406
1407	let empty_span = \|\| Span::splat(Position::new(`0`, `0`, `0`));
1408	let empty_set = \|\| ClassSet::Item(ClassSetItem::Empty(empty_span()));
1409	let mut stack = vec![mem::replace(self, empty_set())];
1410	while let Some(mut set) = stack.pop() {
1411	match set {
1412	ClassSet::Item(ref mut item) => match *item {
1413	ClassSetItem::Empty(_)
1414	\| ClassSetItem::Literal(_)
1415	\| ClassSetItem::Range(_)
1416	\| ClassSetItem::Ascii(_)
1417	\| ClassSetItem::Unicode(_)
1418	\| ClassSetItem::Perl(_) => {}
1419	ClassSetItem::Bracketed(ref mut x) => {
1420	stack.push(mem::replace(&mut x.kind, empty_set()));
1421	}
1422	ClassSetItem::Union(ref mut x) => {
1423	stack.extend(x.items.drain(..).map(ClassSet::Item));
1424	}
1425	},
1426	ClassSet::BinaryOp(ref mut op) => {
1427	stack.push(mem::replace(&mut op.lhs, empty_set()));
1428	stack.push(mem::replace(&mut op.rhs, empty_set()));
1429	}
1430	}
1431	}
1432	}
1433	}
1434
1435	#[cfg(test)]
1436	mod tests {
1437	use super::*;
1438
1439	// We use a thread with an explicit stack size to test that our destructor
1440	// for Ast can handle arbitrarily sized expressions in constant stack
1441	// space. In case we run on a platform without threads (WASM?), we limit
1442	// this test to Windows/Unix.
1443	#[test]
1444	#[cfg(any(unix, windows))]
1445	fn no_stack_overflow_on_drop() {
1446	use std::thread;
1447
1448	let run = \|\| {
1449	let span = \|\| Span::splat(Position::new(`0`, `0`, `0`));
1450	let mut ast = Ast::Empty(span());
1451	for i in `0`..`200` {
1452	ast = Ast::Group(Group {
1453	span: span(),
1454	kind: GroupKind::CaptureIndex(i),
1455	ast: Box::new(ast),
1456	});
1457	}
1458	assert!(!ast.is_empty());
1459	};
1460
1461	// We run our test on a thread with a small stack size so we can
1462	// force the issue more easily.
1463	//
1464	// NOTE(2023-03-21): It turns out that some platforms (like FreeBSD)
1465	// will just barf with very small stack sizes. So we bump this up a bit
1466	// to give more room to breath. When I did this, I confirmed that if
1467	// I remove the custom `Drop` impl for `Ast`, then this test does
1468	// indeed still fail with a stack overflow. (At the time of writing, I
1469	// had to bump it all the way up to 32K before the test would pass even
1470	// without the custom `Drop` impl. So 16K seems like a safe number
1471	// here.)
1472	//
1473	// See: https://github.com/rust-lang/regex/issues/967
1474	thread::Builder::new()
1475	.stack_size(`16` << `10`)
1476	.spawn(run)
1477	.unwrap()
1478	.join()
1479	.unwrap();
1480	}
1481	}
1482