parse.rs source code [crates/regex-syntax/src/ast/parse.rs]

1	/!*
2	This module provides a regular expression parser.
3	*/
4
5	use core::{
6	borrow::Borrow,
7	cell::{Cell, RefCell},
8	mem,
9	};
10
11	use alloc::{
12	boxed::Box,
13	string::{String, ToString},
14	vec,
15	vec::Vec,
16	};
17
18	use crate::{
19	ast::{self, Ast, Position, Span},
20	either::Either,
21	is_escapeable_character, is_meta_character,
22	};
23
24	type Result<T> = core::result::Result<T, ast::Error>;
25
26	/// A primitive is an expression with no sub-expressions. This includes
27	/// literals, assertions and non-set character classes. This representation
28	/// is used as intermediate state in the parser.
29	///
30	/// This does not include ASCII character classes, since they can only appear
31	/// within a set character class.
32	#[derive(Clone, Debug, Eq, PartialEq)]
33	enum Primitive {
34	Literal(ast::Literal),
35	Assertion(ast::Assertion),
36	Dot(Span),
37	Perl(ast::ClassPerl),
38	Unicode(ast::ClassUnicode),
39	}
40
41	impl Primitive {
42	/// Return the span of this primitive.
43	fn span(&self) -> &Span {
44	match *self {
45	Primitive::Literal(ref x) => &x.span,
46	Primitive::Assertion(ref x) => &x.span,
47	Primitive::Dot(ref span) => span,
48	Primitive::Perl(ref x) => &x.span,
49	Primitive::Unicode(ref x) => &x.span,
50	}
51	}
52
53	/// Convert this primitive into a proper AST.
54	fn into_ast(self) -> Ast {
55	match self {
56	Primitive::Literal(lit) => Ast::literal(lit),
57	Primitive::Assertion(assert) => Ast::assertion(assert),
58	Primitive::Dot(span) => Ast::dot(span),
59	Primitive::Perl(cls) => Ast::class_perl(cls),
60	Primitive::Unicode(cls) => Ast::class_unicode(cls),
61	}
62	}
63
64	/// Convert this primitive into an item in a character class.
65	///
66	/// If this primitive is not a legal item (i.e., an assertion or a dot),
67	/// then return an error.
68	fn into_class_set_item<P: Borrow<Parser>>(
69	self,
70	p: &ParserI<'_, P>,
71	) -> Result<ast::ClassSetItem> {
72	use self::Primitive::*;
73	use crate::ast::ClassSetItem;
74
75	match self {
76	Literal(lit) => Ok(ClassSetItem::Literal(lit)),
77	Perl(cls) => Ok(ClassSetItem::Perl(cls)),
78	Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
79	x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
80	}
81	}
82
83	/// Convert this primitive into a literal in a character class. In
84	/// particular, literals are the only valid items that can appear in
85	/// ranges.
86	///
87	/// If this primitive is not a legal item (i.e., a class, assertion or a
88	/// dot), then return an error.
89	fn into_class_literal<P: Borrow<Parser>>(
90	self,
91	p: &ParserI<'_, P>,
92	) -> Result<ast::Literal> {
93	use self::Primitive::*;
94
95	match self {
96	Literal(lit) => Ok(lit),
97	x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
98	}
99	}
100	}
101
102	/// Returns true if the given character is a hexadecimal digit.
103	fn is_hex(c: char) -> bool {
104	('0' <= c && c <= '9') \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F')
105	}
106
107	/// Returns true if the given character is a valid in a capture group name.
108	///
109	/// If `first` is true, then `c` is treated as the first character in the
110	/// group name (which must be alphabetic or underscore).
111	fn is_capture_char(c: char, first: bool) -> bool {
112	if first {
113	c == '_' \|\| c.is_alphabetic()
114	} else {
115	c == '_' \|\| c == '.' \|\| c == '[' \|\| c == ']' \|\| c.is_alphanumeric()
116	}
117	}
118
119	/// A builder for a regular expression parser.
120	///
121	/// This builder permits modifying configuration options for the parser.
122	#[derive(Clone, Debug)]
123	pub struct ParserBuilder {
124	ignore_whitespace: bool,
125	nest_limit: u32,
126	octal: bool,
127	}
128
129	impl Default for ParserBuilder {
130	fn default() -> ParserBuilder {
131	ParserBuilder::new()
132	}
133	}
134
135	impl ParserBuilder {
136	/// Create a new parser builder with a default configuration.
137	pub fn new() -> ParserBuilder {
138	ParserBuilder {
139	ignore_whitespace: `false`,
140	nest_limit: `250`,
141	octal: `false`,
142	}
143	}
144
145	/// Build a parser from this configuration with the given pattern.
146	pub fn build(&self) -> Parser {
147	Parser {
148	pos: Cell::new(Position { offset: `0`, line: `1`, column: `1` }),
149	capture_index: Cell::new(`0`),
150	nest_limit: self.nest_limit,
151	octal: self.octal,
152	initial_ignore_whitespace: self.ignore_whitespace,
153	ignore_whitespace: Cell::new(self.ignore_whitespace),
154	comments: RefCell::new(vec![]),
155	stack_group: RefCell::new(vec![]),
156	stack_class: RefCell::new(vec![]),
157	capture_names: RefCell::new(vec![]),
158	scratch: RefCell::new(String::new()),
159	}
160	}
161
162	/// Set the nesting limit for this parser.
163	///
164	/// The nesting limit controls how deep the abstract syntax tree is allowed
165	/// to be. If the AST exceeds the given limit (e.g., with too many nested
166	/// groups), then an error is returned by the parser.
167	///
168	/// The purpose of this limit is to act as a heuristic to prevent stack
169	/// overflow for consumers that do structural induction on an `Ast` using
170	/// explicit recursion. While this crate never does this (instead using
171	/// constant stack space and moving the call stack to the heap), other
172	/// crates may.
173	///
174	/// This limit is not checked until the entire AST is parsed. Therefore,
175	/// if callers want to put a limit on the amount of heap space used, then
176	/// they should impose a limit on the length, in bytes, of the concrete
177	/// pattern string. In particular, this is viable since this parser
178	/// implementation will limit itself to heap space proportional to the
179	/// length of the pattern string.
180	///
181	/// Note that a nest limit of `0` will return a nest limit error for most
182	/// patterns but not all. For example, a nest limit of `0` permits `a` but
183	/// not `ab`, since `ab` requires a concatenation, which results in a nest
184	/// depth of `1`. In general, a nest limit is not something that manifests
185	/// in an obvious way in the concrete syntax, therefore, it should not be
186	/// used in a granular way.
187	pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
188	self.nest_limit = limit;
189	self
190	}
191
192	/// Whether to support octal syntax or not.
193	///
194	/// Octal syntax is a little-known way of uttering Unicode codepoints in
195	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
196	/// `\141` are all equivalent regular expressions, where the last example
197	/// shows octal syntax.
198	///
199	/// While supporting octal syntax isn't in and of itself a problem, it does
200	/// make good error messages harder. That is, in PCRE based regex engines,
201	/// syntax like `\0` invokes a backreference, which is explicitly
202	/// unsupported in Rust's regex engine. However, many users expect it to
203	/// be supported. Therefore, when octal support is disabled, the error
204	/// message will explicitly mention that backreferences aren't supported.
205	///
206	/// Octal syntax is disabled by default.
207	pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
208	self.octal = yes;
209	self
210	}
211
212	/// Enable verbose mode in the regular expression.
213	///
214	/// When enabled, verbose mode permits insignificant whitespace in many
215	/// places in the regular expression, as well as comments. Comments are
216	/// started using `#` and continue until the end of the line.
217	///
218	/// By default, this is disabled. It may be selectively enabled in the
219	/// regular expression by using the `x` flag regardless of this setting.
220	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
221	self.ignore_whitespace = yes;
222	self
223	}
224	}
225
226	/// A regular expression parser.
227	///
228	/// This parses a string representation of a regular expression into an
229	/// abstract syntax tree. The size of the tree is proportional to the length
230	/// of the regular expression pattern.
231	///
232	/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
233	#[derive(Clone, Debug)]
234	pub struct Parser {
235	/// The current position of the parser.
236	pos: Cell<Position>,
237	/// The current capture index.
238	capture_index: Cell<u32>,
239	/// The maximum number of open parens/brackets allowed. If the parser
240	/// exceeds this number, then an error is returned.
241	nest_limit: u32,
242	/// Whether to support octal syntax or not. When `false`, the parser will
243	/// return an error helpfully pointing out that backreferences are not
244	/// supported.
245	octal: bool,
246	/// The initial setting for `ignore_whitespace` as provided by
247	/// `ParserBuilder`. It is used when resetting the parser's state.
248	initial_ignore_whitespace: bool,
249	/// Whether whitespace should be ignored. When enabled, comments are
250	/// also permitted.
251	ignore_whitespace: Cell<bool>,
252	/// A list of comments, in order of appearance.
253	comments: RefCell<Vec<ast::Comment>>,
254	/// A stack of grouped sub-expressions, including alternations.
255	stack_group: RefCell<Vec<GroupState>>,
256	/// A stack of nested character classes. This is only non-empty when
257	/// parsing a class.
258	stack_class: RefCell<Vec<ClassState>>,
259	/// A sorted sequence of capture names. This is used to detect duplicate
260	/// capture names and report an error if one is detected.
261	capture_names: RefCell<Vec<ast::CaptureName>>,
262	/// A scratch buffer used in various places. Mostly this is used to
263	/// accumulate relevant characters from parts of a pattern.
264	scratch: RefCell<String>,
265	}
266
267	/// ParserI is the internal parser implementation.
268	///
269	/// We use this separate type so that we can carry the provided pattern string
270	/// along with us. In particular, a `Parser` internal state is not tied to any
271	/// one pattern, but `ParserI` is.
272	///
273	/// This type also lets us use `ParserI<&Parser>` in production code while
274	/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
275	/// work against the internal interface of the parser.
276	#[derive(Clone, Debug)]
277	struct ParserI<'s, P> {
278	/// The parser state/configuration.
279	parser: P,
280	/// The full regular expression provided by the user.
281	pattern: &'s str,
282	}
283
284	/// GroupState represents a single stack frame while parsing nested groups
285	/// and alternations. Each frame records the state up to an opening parenthesis
286	/// or a alternating bracket `\|`.
287	#[derive(Clone, Debug)]
288	enum GroupState {
289	/// This state is pushed whenever an opening group is found.
290	Group {
291	/// The concatenation immediately preceding the opening group.
292	concat: ast::Concat,
293	/// The group that has been opened. Its sub-AST is always empty.
294	group: ast::Group,
295	/// Whether this group has the `x` flag enabled or not.
296	ignore_whitespace: bool,
297	},
298	/// This state is pushed whenever a new alternation branch is found. If
299	/// an alternation branch is found and this state is at the top of the
300	/// stack, then this state should be modified to include the new
301	/// alternation.
302	Alternation(ast::Alternation),
303	}
304
305	/// ClassState represents a single stack frame while parsing character classes.
306	/// Each frame records the state up to an intersection, difference, symmetric
307	/// difference or nested class.
308	///
309	/// Note that a parser's character class stack is only non-empty when parsing
310	/// a character class. In all other cases, it is empty.
311	#[derive(Clone, Debug)]
312	enum ClassState {
313	/// This state is pushed whenever an opening bracket is found.
314	Open {
315	/// The union of class items immediately preceding this class.
316	union: ast::ClassSetUnion,
317	/// The class that has been opened. Typically this just corresponds
318	/// to the `[`, but it can also include `[^` since `^` indicates
319	/// negation of the class.
320	set: ast::ClassBracketed,
321	},
322	/// This state is pushed when a operator is seen. When popped, the stored
323	/// set becomes the left hand side of the operator.
324	Op {
325	/// The type of the operation, i.e., &&, -- or ~~.
326	kind: ast::ClassSetBinaryOpKind,
327	/// The left-hand side of the operator.
328	lhs: ast::ClassSet,
329	},
330	}
331
332	impl Parser {
333	/// Create a new parser with a default configuration.
334	///
335	/// The parser can be run with either the `parse` or `parse_with_comments`
336	/// methods. The parse methods return an abstract syntax tree.
337	///
338	/// To set configuration options on the parser, use [`ParserBuilder`].
339	pub fn new() -> Parser {
340	ParserBuilder::new().build()
341	}
342
343	/// Parse the regular expression into an abstract syntax tree.
344	pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
345	ParserI::new(self, pattern).parse()
346	}
347
348	/// Parse the regular expression and return an abstract syntax tree with
349	/// all of the comments found in the pattern.
350	pub fn parse_with_comments(
351	&mut self,
352	pattern: &str,
353	) -> Result<ast::WithComments> {
354	ParserI::new(self, pattern).parse_with_comments()
355	}
356
357	/// Reset the internal state of a parser.
358	///
359	/// This is called at the beginning of every parse. This prevents the
360	/// parser from running with inconsistent state (say, if a previous
361	/// invocation returned an error and the parser is reused).
362	fn reset(&self) {
363	// These settings should be in line with the construction
364	// in `ParserBuilder::build`.
365	self.pos.set(Position { offset: `0`, line: `1`, column: `1` });
366	self.ignore_whitespace.set(self.initial_ignore_whitespace);
367	self.comments.borrow_mut().clear();
368	self.stack_group.borrow_mut().clear();
369	self.stack_class.borrow_mut().clear();
370	}
371	}
372
373	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
374	/// Build an internal parser from a parser configuration and a pattern.
375	fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
376	ParserI { parser, pattern }
377	}
378
379	/// Return a reference to the parser state.
380	fn parser(&self) -> &Parser {
381	self.parser.borrow()
382	}
383
384	/// Return a reference to the pattern being parsed.
385	fn pattern(&self) -> &str {
386	self.pattern
387	}
388
389	/// Create a new error with the given span and error type.
390	fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
391	ast::Error { kind, pattern: self.pattern().to_string(), span }
392	}
393
394	/// Return the current offset of the parser.
395	///
396	/// The offset starts at `0` from the beginning of the regular expression
397	/// pattern string.
398	fn offset(&self) -> usize {
399	self.parser().pos.get().offset
400	}
401
402	/// Return the current line number of the parser.
403	///
404	/// The line number starts at `1`.
405	fn line(&self) -> usize {
406	self.parser().pos.get().line
407	}
408
409	/// Return the current column of the parser.
410	///
411	/// The column number starts at `1` and is reset whenever a `\n` is seen.
412	fn column(&self) -> usize {
413	self.parser().pos.get().column
414	}
415
416	/// Return the next capturing index. Each subsequent call increments the
417	/// internal index.
418	///
419	/// The span given should correspond to the location of the opening
420	/// parenthesis.
421	///
422	/// If the capture limit is exceeded, then an error is returned.
423	fn next_capture_index(&self, span: Span) -> Result<u32> {
424	let current = self.parser().capture_index.get();
425	let i = current.checked_add(`1`).ok_or_else(\|\| {
426	self.error(span, ast::ErrorKind::CaptureLimitExceeded)
427	})?;
428	self.parser().capture_index.set(i);
429	Ok(i)
430	}
431
432	/// Adds the given capture name to this parser. If this capture name has
433	/// already been used, then an error is returned.
434	fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
435	let mut names = self.parser().capture_names.borrow_mut();
436	match names
437	.binary_search_by_key(&cap.name.as_str(), \|c\| c.name.as_str())
438	{
439	Err(i) => {
440	names.insert(i, cap.clone());
441	Ok(())
442	}
443	Ok(i) => Err(self.error(
444	cap.span,
445	ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
446	)),
447	}
448	}
449
450	/// Return whether the parser should ignore whitespace or not.
451	fn ignore_whitespace(&self) -> bool {
452	self.parser().ignore_whitespace.get()
453	}
454
455	/// Return the character at the current position of the parser.
456	///
457	/// This panics if the current position does not point to a valid char.
458	fn char(&self) -> char {
459	self.char_at(self.offset())
460	}
461
462	/// Return the character at the given position.
463	///
464	/// This panics if the given position does not point to a valid char.
465	fn char_at(&self, i: usize) -> char {
466	self.pattern()[i..]
467	.chars()
468	.next()
469	.unwrap_or_else(\|\| panic!("expected char at offset {}", i))
470	}
471
472	/// Bump the parser to the next Unicode scalar value.
473	///
474	/// If the end of the input has been reached, then `false` is returned.
475	fn bump(&self) -> bool {
476	if self.is_eof() {
477	return `false`;
478	}
479	let Position { mut offset, mut line, mut column } = self.pos();
480	if self.char() == '`\n`' {
481	line = line.checked_add(`1`).unwrap();
482	column = `1`;
483	} else {
484	column = column.checked_add(`1`).unwrap();
485	}
486	offset += self.char().len_utf8();
487	self.parser().pos.set(Position { offset, line, column });
488	self.pattern()[self.offset()..].chars().next().is_some()
489	}
490
491	/// If the substring starting at the current position of the parser has
492	/// the given prefix, then bump the parser to the character immediately
493	/// following the prefix and return true. Otherwise, don't bump the parser
494	/// and return false.
495	fn bump_if(&self, prefix: &str) -> bool {
496	if self.pattern()[self.offset()..].starts_with(prefix) {
497	for _ in `0`..prefix.chars().count() {
498	self.bump();
499	}
500	`true`
501	} else {
502	`false`
503	}
504	}
505
506	/// Returns true if and only if the parser is positioned at a look-around
507	/// prefix. The conditions under which this returns true must always
508	/// correspond to a regular expression that would otherwise be consider
509	/// invalid.
510	///
511	/// This should only be called immediately after parsing the opening of
512	/// a group or a set of flags.
513	fn is_lookaround_prefix(&self) -> bool {
514	self.bump_if("?=")
515	\|\| self.bump_if("?!")
516	\|\| self.bump_if("?<=")
517	\|\| self.bump_if("?<!")
518	}
519
520	/// Bump the parser, and if the `x` flag is enabled, bump through any
521	/// subsequent spaces. Return true if and only if the parser is not at
522	/// EOF.
523	fn bump_and_bump_space(&self) -> bool {
524	if !self.bump() {
525	return `false`;
526	}
527	self.bump_space();
528	!self.is_eof()
529	}
530
531	/// If the `x` flag is enabled (i.e., whitespace insensitivity with
532	/// comments), then this will advance the parser through all whitespace
533	/// and comments to the next non-whitespace non-comment byte.
534	///
535	/// If the `x` flag is disabled, then this is a no-op.
536	///
537	/// This should be used selectively throughout the parser where
538	/// arbitrary whitespace is permitted when the `x` flag is enabled. For
539	/// example, `{ 5 , 6}` is equivalent to `{5,6}`.
540	fn bump_space(&self) {
541	if !self.ignore_whitespace() {
542	return;
543	}
544	while !self.is_eof() {
545	if self.char().is_whitespace() {
546	self.bump();
547	} else if self.char() == '#' {
548	let start = self.pos();
549	let mut comment_text = String::new();
550	self.bump();
551	while !self.is_eof() {
552	let c = self.char();
553	self.bump();
554	if c == '`\n`' {
555	break;
556	}
557	comment_text.push(c);
558	}
559	let comment = ast::Comment {
560	span: Span::new(start, self.pos()),
561	comment: comment_text,
562	};
563	self.parser().comments.borrow_mut().push(comment);
564	} else {
565	break;
566	}
567	}
568	}
569
570	/// Peek at the next character in the input without advancing the parser.
571	///
572	/// If the input has been exhausted, then this returns `None`.
573	fn peek(&self) -> Option<char> {
574	if self.is_eof() {
575	return None;
576	}
577	self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
578	}
579
580	/// Like peek, but will ignore spaces when the parser is in whitespace
581	/// insensitive mode.
582	fn peek_space(&self) -> Option<char> {
583	if !self.ignore_whitespace() {
584	return self.peek();
585	}
586	if self.is_eof() {
587	return None;
588	}
589	let mut start = self.offset() + self.char().len_utf8();
590	let mut in_comment = `false`;
591	for (i, c) in self.pattern()[start..].char_indices() {
592	if c.is_whitespace() {
593	continue;
594	} else if !in_comment && c == '#' {
595	in_comment = `true`;
596	} else if in_comment && c == '`\n`' {
597	in_comment = `false`;
598	} else {
599	start += i;
600	break;
601	}
602	}
603	self.pattern()[start..].chars().next()
604	}
605
606	/// Returns true if the next call to `bump` would return false.
607	fn is_eof(&self) -> bool {
608	self.offset() == self.pattern().len()
609	}
610
611	/// Return the current position of the parser, which includes the offset,
612	/// line and column.
613	fn pos(&self) -> Position {
614	self.parser().pos.get()
615	}
616
617	/// Create a span at the current position of the parser. Both the start
618	/// and end of the span are set.
619	fn span(&self) -> Span {
620	Span::splat(self.pos())
621	}
622
623	/// Create a span that covers the current character.
624	fn span_char(&self) -> Span {
625	let mut next = Position {
626	offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
627	line: self.line(),
628	column: self.column().checked_add(`1`).unwrap(),
629	};
630	if self.char() == '`\n`' {
631	next.line += `1`;
632	next.column = `1`;
633	}
634	Span::new(self.pos(), next)
635	}
636
637	/// Parse and push a single alternation on to the parser's internal stack.
638	/// If the top of the stack already has an alternation, then add to that
639	/// instead of pushing a new one.
640	///
641	/// The concatenation given corresponds to a single alternation branch.
642	/// The concatenation returned starts the next branch and is empty.
643	///
644	/// This assumes the parser is currently positioned at `\|` and will advance
645	/// the parser to the character following `\|`.
646	#[inline(never)]
647	fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
648	assert_eq!(self.char(), '\|');
649	concat.span.end = self.pos();
650	self.push_or_add_alternation(concat);
651	self.bump();
652	Ok(ast::Concat { span: self.span(), asts: vec![] })
653	}
654
655	/// Pushes or adds the given branch of an alternation to the parser's
656	/// internal stack of state.
657	fn push_or_add_alternation(&self, concat: ast::Concat) {
658	use self::GroupState::*;
659
660	let mut stack = self.parser().stack_group.borrow_mut();
661	if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
662	alts.asts.push(concat.into_ast());
663	return;
664	}
665	stack.push(Alternation(ast::Alternation {
666	span: Span::new(concat.span.start, self.pos()),
667	asts: vec![concat.into_ast()],
668	}));
669	}
670
671	/// Parse and push a group AST (and its parent concatenation) on to the
672	/// parser's internal stack. Return a fresh concatenation corresponding
673	/// to the group's sub-AST.
674	///
675	/// If a set of flags was found (with no group), then the concatenation
676	/// is returned with that set of flags added.
677	///
678	/// This assumes that the parser is currently positioned on the opening
679	/// parenthesis. It advances the parser to the character at the start
680	/// of the sub-expression (or adjoining expression).
681	///
682	/// If there was a problem parsing the start of the group, then an error
683	/// is returned.
684	#[inline(never)]
685	fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
686	assert_eq!(self.char(), '(');
687	match self.parse_group()? {
688	Either::Left(set) => {
689	let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
690	if let Some(v) = ignore {
691	self.parser().ignore_whitespace.set(v);
692	}
693
694	concat.asts.push(Ast::flags(set));
695	Ok(concat)
696	}
697	Either::Right(group) => {
698	let old_ignore_whitespace = self.ignore_whitespace();
699	let new_ignore_whitespace = group
700	.flags()
701	.and_then(\|f\| f.flag_state(ast::Flag::IgnoreWhitespace))
702	.unwrap_or(old_ignore_whitespace);
703	self.parser().stack_group.borrow_mut().push(
704	GroupState::Group {
705	concat,
706	group,
707	ignore_whitespace: old_ignore_whitespace,
708	},
709	);
710	self.parser().ignore_whitespace.set(new_ignore_whitespace);
711	Ok(ast::Concat { span: self.span(), asts: vec![] })
712	}
713	}
714	}
715
716	/// Pop a group AST from the parser's internal stack and set the group's
717	/// AST to the given concatenation. Return the concatenation containing
718	/// the group.
719	///
720	/// This assumes that the parser is currently positioned on the closing
721	/// parenthesis and advances the parser to the character following the `)`.
722	///
723	/// If no such group could be popped, then an unopened group error is
724	/// returned.
725	#[inline(never)]
726	fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
727	use self::GroupState::*;
728
729	assert_eq!(self.char(), ')');
730	let mut stack = self.parser().stack_group.borrow_mut();
731	let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
732	.pop()
733	{
734	Some(Group { concat, group, ignore_whitespace }) => {
735	(concat, group, ignore_whitespace, None)
736	}
737	Some(Alternation(alt)) => match stack.pop() {
738	Some(Group { concat, group, ignore_whitespace }) => {
739	(concat, group, ignore_whitespace, Some(alt))
740	}
741	None \| Some(Alternation(_)) => {
742	return Err(self.error(
743	self.span_char(),
744	ast::ErrorKind::GroupUnopened,
745	));
746	}
747	},
748	None => {
749	return Err(self
750	.error(self.span_char(), ast::ErrorKind::GroupUnopened));
751	}
752	};
753	self.parser().ignore_whitespace.set(ignore_whitespace);
754	group_concat.span.end = self.pos();
755	self.bump();
756	group.span.end = self.pos();
757	match alt {
758	Some(mut alt) => {
759	alt.span.end = group_concat.span.end;
760	alt.asts.push(group_concat.into_ast());
761	group.ast = Box::new(alt.into_ast());
762	}
763	None => {
764	group.ast = Box::new(group_concat.into_ast());
765	}
766	}
767	prior_concat.asts.push(Ast::group(group));
768	Ok(prior_concat)
769	}
770
771	/// Pop the last state from the parser's internal stack, if it exists, and
772	/// add the given concatenation to it. There either must be no state or a
773	/// single alternation item on the stack. Any other scenario produces an
774	/// error.
775	///
776	/// This assumes that the parser has advanced to the end.
777	#[inline(never)]
778	fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
779	concat.span.end = self.pos();
780	let mut stack = self.parser().stack_group.borrow_mut();
781	let ast = match stack.pop() {
782	None => Ok(concat.into_ast()),
783	Some(GroupState::Alternation(mut alt)) => {
784	alt.span.end = self.pos();
785	alt.asts.push(concat.into_ast());
786	Ok(Ast::alternation(alt))
787	}
788	Some(GroupState::Group { group, .. }) => {
789	return Err(
790	self.error(group.span, ast::ErrorKind::GroupUnclosed)
791	);
792	}
793	};
794	// If we try to pop again, there should be nothing.
795	match stack.pop() {
796	None => ast,
797	Some(GroupState::Alternation(_)) => {
798	// This unreachable is unfortunate. This case can't happen
799	// because the only way we can be here is if there were two
800	// `GroupState::Alternation`s adjacent in the parser's stack,
801	// which we guarantee to never happen because we never push a
802	// `GroupState::Alternation` if one is already at the top of
803	// the stack.
804	unreachable!()
805	}
806	Some(GroupState::Group { group, .. }) => {
807	Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
808	}
809	}
810	}
811
812	/// Parse the opening of a character class and push the current class
813	/// parsing context onto the parser's stack. This assumes that the parser
814	/// is positioned at an opening `[`. The given union should correspond to
815	/// the union of set items built up before seeing the `[`.
816	///
817	/// If there was a problem parsing the opening of the class, then an error
818	/// is returned. Otherwise, a new union of set items for the class is
819	/// returned (which may be populated with either a `]` or a `-`).
820	#[inline(never)]
821	fn push_class_open(
822	&self,
823	parent_union: ast::ClassSetUnion,
824	) -> Result<ast::ClassSetUnion> {
825	assert_eq!(self.char(), '[');
826
827	let (nested_set, nested_union) = self.parse_set_class_open()?;
828	self.parser()
829	.stack_class
830	.borrow_mut()
831	.push(ClassState::Open { union: parent_union, set: nested_set });
832	Ok(nested_union)
833	}
834
835	/// Parse the end of a character class set and pop the character class
836	/// parser stack. The union given corresponds to the last union built
837	/// before seeing the closing `]`. The union returned corresponds to the
838	/// parent character class set with the nested class added to it.
839	///
840	/// This assumes that the parser is positioned at a `]` and will advance
841	/// the parser to the byte immediately following the `]`.
842	///
843	/// If the stack is empty after popping, then this returns the final
844	/// "top-level" character class AST (where a "top-level" character class
845	/// is one that is not nested inside any other character class).
846	///
847	/// If there is no corresponding opening bracket on the parser's stack,
848	/// then an error is returned.
849	#[inline(never)]
850	fn pop_class(
851	&self,
852	nested_union: ast::ClassSetUnion,
853	) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
854	assert_eq!(self.char(), ']');
855
856	let item = ast::ClassSet::Item(nested_union.into_item());
857	let prevset = self.pop_class_op(item);
858	let mut stack = self.parser().stack_class.borrow_mut();
859	match stack.pop() {
860	None => {
861	// We can never observe an empty stack:
862	//
863	// 1) We are guaranteed to start with a non-empty stack since
864	// the character class parser is only initiated when it sees
865	// a `[`.
866	// 2) If we ever observe an empty stack while popping after
867	// seeing a `]`, then we signal the character class parser
868	// to terminate.
869	panic!("unexpected empty character class stack")
870	}
871	Some(ClassState::Op { .. }) => {
872	// This panic is unfortunate, but this case is impossible
873	// since we already popped the Op state if one exists above.
874	// Namely, every push to the class parser stack is guarded by
875	// whether an existing Op is already on the top of the stack.
876	// If it is, the existing Op is modified. That is, the stack
877	// can never have consecutive Op states.
878	panic!("unexpected ClassState::Op")
879	}
880	Some(ClassState::Open { mut union, mut set }) => {
881	self.bump();
882	set.span.end = self.pos();
883	set.kind = prevset;
884	if stack.is_empty() {
885	Ok(Either::Right(set))
886	} else {
887	union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
888	Ok(Either::Left(union))
889	}
890	}
891	}
892	}
893
894	/// Return an "unclosed class" error whose span points to the most
895	/// recently opened class.
896	///
897	/// This should only be called while parsing a character class.
898	#[inline(never)]
899	fn unclosed_class_error(&self) -> ast::Error {
900	for state in self.parser().stack_class.borrow().iter().rev() {
901	if let ClassState::Open { ref set, .. } = *state {
902	return self.error(set.span, ast::ErrorKind::ClassUnclosed);
903	}
904	}
905	// We are guaranteed to have a non-empty stack with at least
906	// one open bracket, so we should never get here.
907	panic!("no open character class found")
908	}
909
910	/// Push the current set of class items on to the class parser's stack as
911	/// the left hand side of the given operator.
912	///
913	/// A fresh set union is returned, which should be used to build the right
914	/// hand side of this operator.
915	#[inline(never)]
916	fn push_class_op(
917	&self,
918	next_kind: ast::ClassSetBinaryOpKind,
919	next_union: ast::ClassSetUnion,
920	) -> ast::ClassSetUnion {
921	let item = ast::ClassSet::Item(next_union.into_item());
922	let new_lhs = self.pop_class_op(item);
923	self.parser()
924	.stack_class
925	.borrow_mut()
926	.push(ClassState::Op { kind: next_kind, lhs: new_lhs });
927	ast::ClassSetUnion { span: self.span(), items: vec![] }
928	}
929
930	/// Pop a character class set from the character class parser stack. If the
931	/// top of the stack is just an item (not an operation), then return the
932	/// given set unchanged. If the top of the stack is an operation, then the
933	/// given set will be used as the rhs of the operation on the top of the
934	/// stack. In that case, the binary operation is returned as a set.
935	#[inline(never)]
936	fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
937	let mut stack = self.parser().stack_class.borrow_mut();
938	let (kind, lhs) = match stack.pop() {
939	Some(ClassState::Op { kind, lhs }) => (kind, lhs),
940	Some(state @ ClassState::Open { .. }) => {
941	stack.push(state);
942	return rhs;
943	}
944	None => unreachable!(),
945	};
946	let span = Span::new(lhs.span().start, rhs.span().end);
947	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
948	span,
949	kind,
950	lhs: Box::new(lhs),
951	rhs: Box::new(rhs),
952	})
953	}
954	}
955
956	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
957	/// Parse the regular expression into an abstract syntax tree.
958	fn parse(&self) -> Result<Ast> {
959	self.parse_with_comments().map(\|astc\| astc.ast)
960	}
961
962	/// Parse the regular expression and return an abstract syntax tree with
963	/// all of the comments found in the pattern.
964	fn parse_with_comments(&self) -> Result<ast::WithComments> {
965	assert_eq!(self.offset(), `0`, "parser can only be used once");
966	self.parser().reset();
967	let mut concat = ast::Concat { span: self.span(), asts: vec![] };
968	loop {
969	self.bump_space();
970	if self.is_eof() {
971	break;
972	}
973	match self.char() {
974	'(' => concat = self.push_group(concat)?,
975	')' => concat = self.pop_group(concat)?,
976	'\|' => concat = self.push_alternate(concat)?,
977	'[' => {
978	let class = self.parse_set_class()?;
979	concat.asts.push(Ast::class_bracketed(class));
980	}
981	'?' => {
982	concat = self.parse_uncounted_repetition(
983	concat,
984	ast::RepetitionKind::ZeroOrOne,
985	)?;
986	}
987	'*' => {
988	concat = self.parse_uncounted_repetition(
989	concat,
990	ast::RepetitionKind::ZeroOrMore,
991	)?;
992	}
993	'+' => {
994	concat = self.parse_uncounted_repetition(
995	concat,
996	ast::RepetitionKind::OneOrMore,
997	)?;
998	}
999	'{' => {
1000	concat = self.parse_counted_repetition(concat)?;
1001	}
1002	_ => concat.asts.push(self.parse_primitive()?.into_ast()),
1003	}
1004	}
1005	let ast = self.pop_group_end(concat)?;
1006	NestLimiter::new(self).check(&ast)?;
1007	Ok(ast::WithComments {
1008	ast,
1009	comments: mem::replace(
1010	&mut *self.parser().comments.borrow_mut(),
1011	vec![],
1012	),
1013	})
1014	}
1015
1016	/// Parses an uncounted repetition operation. An uncounted repetition
1017	/// operator includes ?, and +, but does not include the {m,n} syntax.*
1018	/// The given `kind` should correspond to the operator observed by the
1019	/// caller.
1020	///
1021	/// This assumes that the parser is currently positioned at the repetition
1022	/// operator and advances the parser to the first character after the
1023	/// operator. (Note that the operator may include a single additional `?`,
1024	/// which makes the operator ungreedy.)
1025	///
1026	/// The caller should include the concatenation that is being built. The
1027	/// concatenation returned includes the repetition operator applied to the
1028	/// last expression in the given concatenation.
1029	#[inline(never)]
1030	fn parse_uncounted_repetition(
1031	&self,
1032	mut concat: ast::Concat,
1033	kind: ast::RepetitionKind,
1034	) -> Result<ast::Concat> {
1035	assert!(
1036	self.char() == '?' \|\| self.char() == '*' \|\| self.char() == '+'
1037	);
1038	let op_start = self.pos();
1039	let ast = match concat.asts.pop() {
1040	Some(ast) => ast,
1041	None => {
1042	return Err(
1043	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1044	)
1045	}
1046	};
1047	match ast {
1048	Ast::Empty(_) \| Ast::Flags(_) => {
1049	return Err(
1050	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1051	)
1052	}
1053	_ => {}
1054	}
1055	let mut greedy = `true`;
1056	if self.bump() && self.char() == '?' {
1057	greedy = `false`;
1058	self.bump();
1059	}
1060	concat.asts.push(Ast::repetition(ast::Repetition {
1061	span: ast.span().with_end(self.pos()),
1062	op: ast::RepetitionOp {
1063	span: Span::new(op_start, self.pos()),
1064	kind,
1065	},
1066	greedy,
1067	ast: Box::new(ast),
1068	}));
1069	Ok(concat)
1070	}
1071
1072	/// Parses a counted repetition operation. A counted repetition operator
1073	/// corresponds to the {m,n} syntax, and does not include the ?, or +*
1074	/// operators.
1075	///
1076	/// This assumes that the parser is currently positioned at the opening `{`
1077	/// and advances the parser to the first character after the operator.
1078	/// (Note that the operator may include a single additional `?`, which
1079	/// makes the operator ungreedy.)
1080	///
1081	/// The caller should include the concatenation that is being built. The
1082	/// concatenation returned includes the repetition operator applied to the
1083	/// last expression in the given concatenation.
1084	#[inline(never)]
1085	fn parse_counted_repetition(
1086	&self,
1087	mut concat: ast::Concat,
1088	) -> Result<ast::Concat> {
1089	assert!(self.char() == '{');
1090	let start = self.pos();
1091	let ast = match concat.asts.pop() {
1092	Some(ast) => ast,
1093	None => {
1094	return Err(
1095	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1096	)
1097	}
1098	};
1099	match ast {
1100	Ast::Empty(_) \| Ast::Flags(_) => {
1101	return Err(
1102	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1103	)
1104	}
1105	_ => {}
1106	}
1107	if !self.bump_and_bump_space() {
1108	return Err(self.error(
1109	Span::new(start, self.pos()),
1110	ast::ErrorKind::RepetitionCountUnclosed,
1111	));
1112	}
1113	let count_start = specialize_err(
1114	self.parse_decimal(),
1115	ast::ErrorKind::DecimalEmpty,
1116	ast::ErrorKind::RepetitionCountDecimalEmpty,
1117	)?;
1118	let mut range = ast::RepetitionRange::Exactly(count_start);
1119	if self.is_eof() {
1120	return Err(self.error(
1121	Span::new(start, self.pos()),
1122	ast::ErrorKind::RepetitionCountUnclosed,
1123	));
1124	}
1125	if self.char() == ',' {
1126	if !self.bump_and_bump_space() {
1127	return Err(self.error(
1128	Span::new(start, self.pos()),
1129	ast::ErrorKind::RepetitionCountUnclosed,
1130	));
1131	}
1132	if self.char() != '}' {
1133	let count_end = specialize_err(
1134	self.parse_decimal(),
1135	ast::ErrorKind::DecimalEmpty,
1136	ast::ErrorKind::RepetitionCountDecimalEmpty,
1137	)?;
1138	range = ast::RepetitionRange::Bounded(count_start, count_end);
1139	} else {
1140	range = ast::RepetitionRange::AtLeast(count_start);
1141	}
1142	}
1143	if self.is_eof() \|\| self.char() != '}' {
1144	return Err(self.error(
1145	Span::new(start, self.pos()),
1146	ast::ErrorKind::RepetitionCountUnclosed,
1147	));
1148	}
1149
1150	let mut greedy = `true`;
1151	if self.bump_and_bump_space() && self.char() == '?' {
1152	greedy = `false`;
1153	self.bump();
1154	}
1155
1156	let op_span = Span::new(start, self.pos());
1157	if !range.is_valid() {
1158	return Err(
1159	self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1160	);
1161	}
1162	concat.asts.push(Ast::repetition(ast::Repetition {
1163	span: ast.span().with_end(self.pos()),
1164	op: ast::RepetitionOp {
1165	span: op_span,
1166	kind: ast::RepetitionKind::Range(range),
1167	},
1168	greedy,
1169	ast: Box::new(ast),
1170	}));
1171	Ok(concat)
1172	}
1173
1174	/// Parse a group (which contains a sub-expression) or a set of flags.
1175	///
1176	/// If a group was found, then it is returned with an empty AST. If a set
1177	/// of flags is found, then that set is returned.
1178	///
1179	/// The parser should be positioned at the opening parenthesis.
1180	///
1181	/// This advances the parser to the character before the start of the
1182	/// sub-expression (in the case of a group) or to the closing parenthesis
1183	/// immediately following the set of flags.
1184	///
1185	/// # Errors
1186	///
1187	/// If flags are given and incorrectly specified, then a corresponding
1188	/// error is returned.
1189	///
1190	/// If a capture name is given and it is incorrectly specified, then a
1191	/// corresponding error is returned.
1192	#[inline(never)]
1193	fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1194	assert_eq!(self.char(), '(');
1195	let open_span = self.span_char();
1196	self.bump();
1197	self.bump_space();
1198	if self.is_lookaround_prefix() {
1199	return Err(self.error(
1200	Span::new(open_span.start, self.span().end),
1201	ast::ErrorKind::UnsupportedLookAround,
1202	));
1203	}
1204	let inner_span = self.span();
1205	let mut starts_with_p = `true`;
1206	if self.bump_if("?P<") \|\| {
1207	starts_with_p = `false`;
1208	self.bump_if("?<")
1209	} {
1210	let capture_index = self.next_capture_index(open_span)?;
1211	let name = self.parse_capture_name(capture_index)?;
1212	Ok(Either::Right(ast::Group {
1213	span: open_span,
1214	kind: ast::GroupKind::CaptureName { starts_with_p, name },
1215	ast: Box::new(Ast::empty(self.span())),
1216	}))
1217	} else if self.bump_if("?") {
1218	if self.is_eof() {
1219	return Err(
1220	self.error(open_span, ast::ErrorKind::GroupUnclosed)
1221	);
1222	}
1223	let flags = self.parse_flags()?;
1224	let char_end = self.char();
1225	self.bump();
1226	if char_end == ')' {
1227	// We don't allow empty flags, e.g., `(?)`. We instead
1228	// interpret it as a repetition operator missing its argument.
1229	if flags.items.is_empty() {
1230	return Err(self.error(
1231	inner_span,
1232	ast::ErrorKind::RepetitionMissing,
1233	));
1234	}
1235	Ok(Either::Left(ast::SetFlags {
1236	span: Span { end: self.pos(), ..open_span },
1237	flags,
1238	}))
1239	} else {
1240	assert_eq!(char_end, ':');
1241	Ok(Either::Right(ast::Group {
1242	span: open_span,
1243	kind: ast::GroupKind::NonCapturing(flags),
1244	ast: Box::new(Ast::empty(self.span())),
1245	}))
1246	}
1247	} else {
1248	let capture_index = self.next_capture_index(open_span)?;
1249	Ok(Either::Right(ast::Group {
1250	span: open_span,
1251	kind: ast::GroupKind::CaptureIndex(capture_index),
1252	ast: Box::new(Ast::empty(self.span())),
1253	}))
1254	}
1255	}
1256
1257	/// Parses a capture group name. Assumes that the parser is positioned at
1258	/// the first character in the name following the opening `<` (and may
1259	/// possibly be EOF). This advances the parser to the first character
1260	/// following the closing `>`.
1261	///
1262	/// The caller must provide the capture index of the group for this name.
1263	#[inline(never)]
1264	fn parse_capture_name(
1265	&self,
1266	capture_index: u32,
1267	) -> Result<ast::CaptureName> {
1268	if self.is_eof() {
1269	return Err(self
1270	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1271	}
1272	let start = self.pos();
1273	loop {
1274	if self.char() == '>' {
1275	break;
1276	}
1277	if !is_capture_char(self.char(), self.pos() == start) {
1278	return Err(self.error(
1279	self.span_char(),
1280	ast::ErrorKind::GroupNameInvalid,
1281	));
1282	}
1283	if !self.bump() {
1284	break;
1285	}
1286	}
1287	let end = self.pos();
1288	if self.is_eof() {
1289	return Err(self
1290	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1291	}
1292	assert_eq!(self.char(), '>');
1293	self.bump();
1294	let name = &self.pattern()[start.offset..end.offset];
1295	if name.is_empty() {
1296	return Err(self.error(
1297	Span::new(start, start),
1298	ast::ErrorKind::GroupNameEmpty,
1299	));
1300	}
1301	let capname = ast::CaptureName {
1302	span: Span::new(start, end),
1303	name: name.to_string(),
1304	index: capture_index,
1305	};
1306	self.add_capture_name(&capname)?;
1307	Ok(capname)
1308	}
1309
1310	/// Parse a sequence of flags starting at the current character.
1311	///
1312	/// This advances the parser to the character immediately following the
1313	/// flags, which is guaranteed to be either `:` or `)`.
1314	///
1315	/// # Errors
1316	///
1317	/// If any flags are duplicated, then an error is returned.
1318	///
1319	/// If the negation operator is used more than once, then an error is
1320	/// returned.
1321	///
1322	/// If no flags could be found or if the negation operation is not followed
1323	/// by any flags, then an error is returned.
1324	#[inline(never)]
1325	fn parse_flags(&self) -> Result<ast::Flags> {
1326	let mut flags = ast::Flags { span: self.span(), items: vec![] };
1327	let mut last_was_negation = None;
1328	while self.char() != ':' && self.char() != ')' {
1329	if self.char() == '-' {
1330	last_was_negation = Some(self.span_char());
1331	let item = ast::FlagsItem {
1332	span: self.span_char(),
1333	kind: ast::FlagsItemKind::Negation,
1334	};
1335	if let Some(i) = flags.add_item(item) {
1336	return Err(self.error(
1337	self.span_char(),
1338	ast::ErrorKind::FlagRepeatedNegation {
1339	original: flags.items[i].span,
1340	},
1341	));
1342	}
1343	} else {
1344	last_was_negation = None;
1345	let item = ast::FlagsItem {
1346	span: self.span_char(),
1347	kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1348	};
1349	if let Some(i) = flags.add_item(item) {
1350	return Err(self.error(
1351	self.span_char(),
1352	ast::ErrorKind::FlagDuplicate {
1353	original: flags.items[i].span,
1354	},
1355	));
1356	}
1357	}
1358	if !self.bump() {
1359	return Err(
1360	self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1361	);
1362	}
1363	}
1364	if let Some(span) = last_was_negation {
1365	return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1366	}
1367	flags.span.end = self.pos();
1368	Ok(flags)
1369	}
1370
1371	/// Parse the current character as a flag. Do not advance the parser.
1372	///
1373	/// # Errors
1374	///
1375	/// If the flag is not recognized, then an error is returned.
1376	#[inline(never)]
1377	fn parse_flag(&self) -> Result<ast::Flag> {
1378	match self.char() {
1379	'i' => Ok(ast::Flag::CaseInsensitive),
1380	'm' => Ok(ast::Flag::MultiLine),
1381	's' => Ok(ast::Flag::DotMatchesNewLine),
1382	'U' => Ok(ast::Flag::SwapGreed),
1383	'u' => Ok(ast::Flag::Unicode),
1384	'R' => Ok(ast::Flag::CRLF),
1385	'x' => Ok(ast::Flag::IgnoreWhitespace),
1386	_ => {
1387	Err(self
1388	.error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1389	}
1390	}
1391	}
1392
1393	/// Parse a primitive AST. e.g., A literal, non-set character class or
1394	/// assertion.
1395	///
1396	/// This assumes that the parser expects a primitive at the current
1397	/// location. i.e., All other non-primitive cases have been handled.
1398	/// For example, if the parser's position is at `\|`, then `\|` will be
1399	/// treated as a literal (e.g., inside a character class).
1400	///
1401	/// This advances the parser to the first character immediately following
1402	/// the primitive.
1403	fn parse_primitive(&self) -> Result<Primitive> {
1404	match self.char() {
1405	'`\\`' => self.parse_escape(),
1406	'.' => {
1407	let ast = Primitive::Dot(self.span_char());
1408	self.bump();
1409	Ok(ast)
1410	}
1411	'^' => {
1412	let ast = Primitive::Assertion(ast::Assertion {
1413	span: self.span_char(),
1414	kind: ast::AssertionKind::StartLine,
1415	});
1416	self.bump();
1417	Ok(ast)
1418	}
1419	'$' => {
1420	let ast = Primitive::Assertion(ast::Assertion {
1421	span: self.span_char(),
1422	kind: ast::AssertionKind::EndLine,
1423	});
1424	self.bump();
1425	Ok(ast)
1426	}
1427	c => {
1428	let ast = Primitive::Literal(ast::Literal {
1429	span: self.span_char(),
1430	kind: ast::LiteralKind::Verbatim,
1431	c,
1432	});
1433	self.bump();
1434	Ok(ast)
1435	}
1436	}
1437	}
1438
1439	/// Parse an escape sequence as a primitive AST.
1440	///
1441	/// This assumes the parser is positioned at the start of the escape
1442	/// sequence, i.e., `\`. It advances the parser to the first position
1443	/// immediately following the escape sequence.
1444	#[inline(never)]
1445	fn parse_escape(&self) -> Result<Primitive> {
1446	assert_eq!(self.char(), '`\\`');
1447	let start = self.pos();
1448	if !self.bump() {
1449	return Err(self.error(
1450	Span::new(start, self.pos()),
1451	ast::ErrorKind::EscapeUnexpectedEof,
1452	));
1453	}
1454	let c = self.char();
1455	// Put some of the more complicated routines into helpers.
1456	match c {
1457	'0'..='7' => {
1458	if !self.parser().octal {
1459	return Err(self.error(
1460	Span::new(start, self.span_char().end),
1461	ast::ErrorKind::UnsupportedBackreference,
1462	));
1463	}
1464	let mut lit = self.parse_octal();
1465	lit.span.start = start;
1466	return Ok(Primitive::Literal(lit));
1467	}
1468	'8'..='9' if !self.parser().octal => {
1469	return Err(self.error(
1470	Span::new(start, self.span_char().end),
1471	ast::ErrorKind::UnsupportedBackreference,
1472	));
1473	}
1474	'x' \| 'u' \| 'U' => {
1475	let mut lit = self.parse_hex()?;
1476	lit.span.start = start;
1477	return Ok(Primitive::Literal(lit));
1478	}
1479	'p' \| 'P' => {
1480	let mut cls = self.parse_unicode_class()?;
1481	cls.span.start = start;
1482	return Ok(Primitive::Unicode(cls));
1483	}
1484	'd' \| 's' \| 'w' \| 'D' \| 'S' \| 'W' => {
1485	let mut cls = self.parse_perl_class();
1486	cls.span.start = start;
1487	return Ok(Primitive::Perl(cls));
1488	}
1489	_ => {}
1490	}
1491
1492	// Handle all of the one letter sequences inline.
1493	self.bump();
1494	let span = Span::new(start, self.pos());
1495	if is_meta_character(c) {
1496	return Ok(Primitive::Literal(ast::Literal {
1497	span,
1498	kind: ast::LiteralKind::Meta,
1499	c,
1500	}));
1501	}
1502	if is_escapeable_character(c) {
1503	return Ok(Primitive::Literal(ast::Literal {
1504	span,
1505	kind: ast::LiteralKind::Superfluous,
1506	c,
1507	}));
1508	}
1509	let special = \|kind, c\| {
1510	Ok(Primitive::Literal(ast::Literal {
1511	span,
1512	kind: ast::LiteralKind::Special(kind),
1513	c,
1514	}))
1515	};
1516	match c {
1517	'a' => special(ast::SpecialLiteralKind::Bell, '`\x07`'),
1518	'f' => special(ast::SpecialLiteralKind::FormFeed, '`\x0C`'),
1519	't' => special(ast::SpecialLiteralKind::Tab, '`\t`'),
1520	'n' => special(ast::SpecialLiteralKind::LineFeed, '`\n`'),
1521	'r' => special(ast::SpecialLiteralKind::CarriageReturn, '`\r`'),
1522	'v' => special(ast::SpecialLiteralKind::VerticalTab, '`\x0B`'),
1523	'A' => Ok(Primitive::Assertion(ast::Assertion {
1524	span,
1525	kind: ast::AssertionKind::StartText,
1526	})),
1527	'z' => Ok(Primitive::Assertion(ast::Assertion {
1528	span,
1529	kind: ast::AssertionKind::EndText,
1530	})),
1531	'b' => {
1532	let mut wb = ast::Assertion {
1533	span,
1534	kind: ast::AssertionKind::WordBoundary,
1535	};
1536	// After a \b, we "try" to parse things like \b{start} for
1537	// special word boundary assertions.
1538	if !self.is_eof() && self.char() == '{' {
1539	if let Some(kind) =
1540	self.maybe_parse_special_word_boundary(start)?
1541	{
1542	wb.kind = kind;
1543	wb.span.end = self.pos();
1544	}
1545	}
1546	Ok(Primitive::Assertion(wb))
1547	}
1548	'B' => Ok(Primitive::Assertion(ast::Assertion {
1549	span,
1550	kind: ast::AssertionKind::NotWordBoundary,
1551	})),
1552	'<' => Ok(Primitive::Assertion(ast::Assertion {
1553	span,
1554	kind: ast::AssertionKind::WordBoundaryStartAngle,
1555	})),
1556	'>' => Ok(Primitive::Assertion(ast::Assertion {
1557	span,
1558	kind: ast::AssertionKind::WordBoundaryEndAngle,
1559	})),
1560	_ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1561	}
1562	}
1563
1564	/// Attempt to parse a specialty word boundary. That is, `\b{start}`,
1565	/// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
1566	///
1567	/// This is similar to `maybe_parse_ascii_class` in that, in most cases,
1568	/// if it fails it will just return `None` with no error. This is done
1569	/// because `\b{5}` is a valid expression and we want to let that be parsed
1570	/// by the existing counted repetition parsing code. (I thought about just
1571	/// invoking the counted repetition code from here, but it seemed a little
1572	/// ham-fisted.)
1573	///
1574	/// Unlike `maybe_parse_ascii_class` though, this can return an error.
1575	/// Namely, if we definitely know it isn't a counted repetition, then we
1576	/// return an error specific to the specialty word boundaries.
1577	///
1578	/// This assumes the parser is positioned at a `{` immediately following
1579	/// a `\b`. When `None` is returned, the parser is returned to the position
1580	/// at which it started: pointing at a `{`.
1581	///
1582	/// The position given should correspond to the start of the `\b`.
1583	fn maybe_parse_special_word_boundary(
1584	&self,
1585	wb_start: Position,
1586	) -> Result<Option<ast::AssertionKind>> {
1587	assert_eq!(self.char(), '{');
1588
1589	let is_valid_char = \|c\| match c {
1590	'A'..='Z' \| 'a'..='z' \| '-' => `true`,
1591	_ => `false`,
1592	};
1593	let start = self.pos();
1594	if !self.bump_and_bump_space() {
1595	return Err(self.error(
1596	Span::new(wb_start, self.pos()),
1597	ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
1598	));
1599	}
1600	let start_contents = self.pos();
1601	// This is one of the critical bits: if the first non-whitespace
1602	// character isn't in [-A-Za-z] (i.e., this can't be a special word
1603	// boundary), then we bail and let the counted repetition parser deal
1604	// with this.
1605	if !is_valid_char(self.char()) {
1606	self.parser().pos.set(start);
1607	return Ok(None);
1608	}
1609
1610	// Now collect up our chars until we see a '}'.
1611	let mut scratch = self.parser().scratch.borrow_mut();
1612	scratch.clear();
1613	while !self.is_eof() && is_valid_char(self.char()) {
1614	scratch.push(self.char());
1615	self.bump_and_bump_space();
1616	}
1617	if self.is_eof() \|\| self.char() != '}' {
1618	return Err(self.error(
1619	Span::new(start, self.pos()),
1620	ast::ErrorKind::SpecialWordBoundaryUnclosed,
1621	));
1622	}
1623	let end = self.pos();
1624	self.bump();
1625	let kind = match scratch.as_str() {
1626	"start" => ast::AssertionKind::WordBoundaryStart,
1627	"end" => ast::AssertionKind::WordBoundaryEnd,
1628	"start-half" => ast::AssertionKind::WordBoundaryStartHalf,
1629	"end-half" => ast::AssertionKind::WordBoundaryEndHalf,
1630	_ => {
1631	return Err(self.error(
1632	Span::new(start_contents, end),
1633	ast::ErrorKind::SpecialWordBoundaryUnrecognized,
1634	))
1635	}
1636	};
1637	Ok(Some(kind))
1638	}
1639
1640	/// Parse an octal representation of a Unicode codepoint up to 3 digits
1641	/// long. This expects the parser to be positioned at the first octal
1642	/// digit and advances the parser to the first character immediately
1643	/// following the octal number. This also assumes that parsing octal
1644	/// escapes is enabled.
1645	///
1646	/// Assuming the preconditions are met, this routine can never fail.
1647	#[inline(never)]
1648	fn parse_octal(&self) -> ast::Literal {
1649	assert!(self.parser().octal);
1650	assert!('0' <= self.char() && self.char() <= '7');
1651	let start = self.pos();
1652	// Parse up to two more digits.
1653	while self.bump()
1654	&& '0' <= self.char()
1655	&& self.char() <= '7'
1656	&& self.pos().offset - start.offset <= `2`
1657	{}
1658	let end = self.pos();
1659	let octal = &self.pattern()[start.offset..end.offset];
1660	// Parsing the octal should never fail since the above guarantees a
1661	// valid number.
1662	let codepoint =
1663	u32::from_str_radix(octal, `8`).expect("valid octal number");
1664	// The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1665	// invalid Unicode scalar values.
1666	let c = char::from_u32(codepoint).expect("Unicode scalar value");
1667	ast::Literal {
1668	span: Span::new(start, end),
1669	kind: ast::LiteralKind::Octal,
1670	c,
1671	}
1672	}
1673
1674	/// Parse a hex representation of a Unicode codepoint. This handles both
1675	/// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1676	/// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1677	/// the first character immediately following the hexadecimal literal.
1678	#[inline(never)]
1679	fn parse_hex(&self) -> Result<ast::Literal> {
1680	assert!(
1681	self.char() == 'x' \|\| self.char() == 'u' \|\| self.char() == 'U'
1682	);
1683
1684	let hex_kind = match self.char() {
1685	'x' => ast::HexLiteralKind::X,
1686	'u' => ast::HexLiteralKind::UnicodeShort,
1687	_ => ast::HexLiteralKind::UnicodeLong,
1688	};
1689	if !self.bump_and_bump_space() {
1690	return Err(
1691	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1692	);
1693	}
1694	if self.char() == '{' {
1695	self.parse_hex_brace(hex_kind)
1696	} else {
1697	self.parse_hex_digits(hex_kind)
1698	}
1699	}
1700
1701	/// Parse an N-digit hex representation of a Unicode codepoint. This
1702	/// expects the parser to be positioned at the first digit and will advance
1703	/// the parser to the first character immediately following the escape
1704	/// sequence.
1705	///
1706	/// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1707	/// or 8 (for `\UNNNNNNNN`).
1708	#[inline(never)]
1709	fn parse_hex_digits(
1710	&self,
1711	kind: ast::HexLiteralKind,
1712	) -> Result<ast::Literal> {
1713	let mut scratch = self.parser().scratch.borrow_mut();
1714	scratch.clear();
1715
1716	let start = self.pos();
1717	for i in `0`..kind.digits() {
1718	if i > `0` && !self.bump_and_bump_space() {
1719	return Err(self
1720	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1721	}
1722	if !is_hex(self.char()) {
1723	return Err(self.error(
1724	self.span_char(),
1725	ast::ErrorKind::EscapeHexInvalidDigit,
1726	));
1727	}
1728	scratch.push(self.char());
1729	}
1730	// The final bump just moves the parser past the literal, which may
1731	// be EOF.
1732	self.bump_and_bump_space();
1733	let end = self.pos();
1734	let hex = scratch.as_str();
1735	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1736	None => Err(self.error(
1737	Span::new(start, end),
1738	ast::ErrorKind::EscapeHexInvalid,
1739	)),
1740	Some(c) => Ok(ast::Literal {
1741	span: Span::new(start, end),
1742	kind: ast::LiteralKind::HexFixed(kind),
1743	c,
1744	}),
1745	}
1746	}
1747
1748	/// Parse a hex representation of any Unicode scalar value. This expects
1749	/// the parser to be positioned at the opening brace `{` and will advance
1750	/// the parser to the first character following the closing brace `}`.
1751	#[inline(never)]
1752	fn parse_hex_brace(
1753	&self,
1754	kind: ast::HexLiteralKind,
1755	) -> Result<ast::Literal> {
1756	let mut scratch = self.parser().scratch.borrow_mut();
1757	scratch.clear();
1758
1759	let brace_pos = self.pos();
1760	let start = self.span_char().end;
1761	while self.bump_and_bump_space() && self.char() != '}' {
1762	if !is_hex(self.char()) {
1763	return Err(self.error(
1764	self.span_char(),
1765	ast::ErrorKind::EscapeHexInvalidDigit,
1766	));
1767	}
1768	scratch.push(self.char());
1769	}
1770	if self.is_eof() {
1771	return Err(self.error(
1772	Span::new(brace_pos, self.pos()),
1773	ast::ErrorKind::EscapeUnexpectedEof,
1774	));
1775	}
1776	let end = self.pos();
1777	let hex = scratch.as_str();
1778	assert_eq!(self.char(), '}');
1779	self.bump_and_bump_space();
1780
1781	if hex.is_empty() {
1782	return Err(self.error(
1783	Span::new(brace_pos, self.pos()),
1784	ast::ErrorKind::EscapeHexEmpty,
1785	));
1786	}
1787	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1788	None => Err(self.error(
1789	Span::new(start, end),
1790	ast::ErrorKind::EscapeHexInvalid,
1791	)),
1792	Some(c) => Ok(ast::Literal {
1793	span: Span::new(start, self.pos()),
1794	kind: ast::LiteralKind::HexBrace(kind),
1795	c,
1796	}),
1797	}
1798	}
1799
1800	/// Parse a decimal number into a u32 while trimming leading and trailing
1801	/// whitespace.
1802	///
1803	/// This expects the parser to be positioned at the first position where
1804	/// a decimal digit could occur. This will advance the parser to the byte
1805	/// immediately following the last contiguous decimal digit.
1806	///
1807	/// If no decimal digit could be found or if there was a problem parsing
1808	/// the complete set of digits into a u32, then an error is returned.
1809	fn parse_decimal(&self) -> Result<u32> {
1810	let mut scratch = self.parser().scratch.borrow_mut();
1811	scratch.clear();
1812
1813	while !self.is_eof() && self.char().is_whitespace() {
1814	self.bump();
1815	}
1816	let start = self.pos();
1817	while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1818	scratch.push(self.char());
1819	self.bump_and_bump_space();
1820	}
1821	let span = Span::new(start, self.pos());
1822	while !self.is_eof() && self.char().is_whitespace() {
1823	self.bump_and_bump_space();
1824	}
1825	let digits = scratch.as_str();
1826	if digits.is_empty() {
1827	return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1828	}
1829	match u32::from_str_radix(digits, `10`).ok() {
1830	Some(n) => Ok(n),
1831	None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1832	}
1833	}
1834
1835	/// Parse a standard character class consisting primarily of characters or
1836	/// character ranges, but can also contain nested character classes of
1837	/// any type (sans `.`).
1838	///
1839	/// This assumes the parser is positioned at the opening `[`. If parsing
1840	/// is successful, then the parser is advanced to the position immediately
1841	/// following the closing `]`.
1842	#[inline(never)]
1843	fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
1844	assert_eq!(self.char(), '[');
1845
1846	let mut union =
1847	ast::ClassSetUnion { span: self.span(), items: vec![] };
1848	loop {
1849	self.bump_space();
1850	if self.is_eof() {
1851	return Err(self.unclosed_class_error());
1852	}
1853	match self.char() {
1854	'[' => {
1855	// If we've already parsed the opening bracket, then
1856	// attempt to treat this as the beginning of an ASCII
1857	// class. If ASCII class parsing fails, then the parser
1858	// backs up to `[`.
1859	if !self.parser().stack_class.borrow().is_empty() {
1860	if let Some(cls) = self.maybe_parse_ascii_class() {
1861	union.push(ast::ClassSetItem::Ascii(cls));
1862	continue;
1863	}
1864	}
1865	union = self.push_class_open(union)?;
1866	}
1867	']' => match self.pop_class(union)? {
1868	Either::Left(nested_union) => {
1869	union = nested_union;
1870	}
1871	Either::Right(class) => return Ok(class),
1872	},
1873	'&' if self.peek() == Some('&') => {
1874	assert!(self.bump_if("&&"));
1875	union = self.push_class_op(
1876	ast::ClassSetBinaryOpKind::Intersection,
1877	union,
1878	);
1879	}
1880	'-' if self.peek() == Some('-') => {
1881	assert!(self.bump_if("--"));
1882	union = self.push_class_op(
1883	ast::ClassSetBinaryOpKind::Difference,
1884	union,
1885	);
1886	}
1887	'~' if self.peek() == Some('~') => {
1888	assert!(self.bump_if("~~"));
1889	union = self.push_class_op(
1890	ast::ClassSetBinaryOpKind::SymmetricDifference,
1891	union,
1892	);
1893	}
1894	_ => {
1895	union.push(self.parse_set_class_range()?);
1896	}
1897	}
1898	}
1899	}
1900
1901	/// Parse a single primitive item in a character class set. The item to
1902	/// be parsed can either be one of a simple literal character, a range
1903	/// between two simple literal characters or a "primitive" character
1904	/// class like \w or \p{Greek}.
1905	///
1906	/// If an invalid escape is found, or if a character class is found where
1907	/// a simple literal is expected (e.g., in a range), then an error is
1908	/// returned.
1909	#[inline(never)]
1910	fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1911	let prim1 = self.parse_set_class_item()?;
1912	self.bump_space();
1913	if self.is_eof() {
1914	return Err(self.unclosed_class_error());
1915	}
1916	// If the next char isn't a `-`, then we don't have a range.
1917	// There are two exceptions. If the char after a `-` is a `]`, then
1918	// `-` is interpreted as a literal `-`. Alternatively, if the char
1919	// after a `-` is a `-`, then `--` corresponds to a "difference"
1920	// operation.
1921	if self.char() != '-'
1922	\|\| self.peek_space() == Some(']')
1923	\|\| self.peek_space() == Some('-')
1924	{
1925	return prim1.into_class_set_item(self);
1926	}
1927	// OK, now we're parsing a range, so bump past the `-` and parse the
1928	// second half of the range.
1929	if !self.bump_and_bump_space() {
1930	return Err(self.unclosed_class_error());
1931	}
1932	let prim2 = self.parse_set_class_item()?;
1933	let range = ast::ClassSetRange {
1934	span: Span::new(prim1.span().start, prim2.span().end),
1935	start: prim1.into_class_literal(self)?,
1936	end: prim2.into_class_literal(self)?,
1937	};
1938	if !range.is_valid() {
1939	return Err(
1940	self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1941	);
1942	}
1943	Ok(ast::ClassSetItem::Range(range))
1944	}
1945
1946	/// Parse a single item in a character class as a primitive, where the
1947	/// primitive either consists of a verbatim literal or a single escape
1948	/// sequence.
1949	///
1950	/// This assumes the parser is positioned at the beginning of a primitive,
1951	/// and advances the parser to the first position after the primitive if
1952	/// successful.
1953	///
1954	/// Note that it is the caller's responsibility to report an error if an
1955	/// illegal primitive was parsed.
1956	#[inline(never)]
1957	fn parse_set_class_item(&self) -> Result<Primitive> {
1958	if self.char() == '`\\`' {
1959	self.parse_escape()
1960	} else {
1961	let x = Primitive::Literal(ast::Literal {
1962	span: self.span_char(),
1963	kind: ast::LiteralKind::Verbatim,
1964	c: self.char(),
1965	});
1966	self.bump();
1967	Ok(x)
1968	}
1969	}
1970
1971	/// Parses the opening of a character class set. This includes the opening
1972	/// bracket along with `^` if present to indicate negation. This also
1973	/// starts parsing the opening set of unioned items if applicable, since
1974	/// there are special rules applied to certain characters in the opening
1975	/// of a character class. For example, `[^]]` is the class of all
1976	/// characters not equal to `]`. (`]` would need to be escaped in any other
1977	/// position.) Similarly for `-`.
1978	///
1979	/// In all cases, the op inside the returned `ast::ClassBracketed` is an
1980	/// empty union. This empty union should be replaced with the actual item
1981	/// when it is popped from the parser's stack.
1982	///
1983	/// This assumes the parser is positioned at the opening `[` and advances
1984	/// the parser to the first non-special byte of the character class.
1985	///
1986	/// An error is returned if EOF is found.
1987	#[inline(never)]
1988	fn parse_set_class_open(
1989	&self,
1990	) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1991	assert_eq!(self.char(), '[');
1992	let start = self.pos();
1993	if !self.bump_and_bump_space() {
1994	return Err(self.error(
1995	Span::new(start, self.pos()),
1996	ast::ErrorKind::ClassUnclosed,
1997	));
1998	}
1999
2000	let negated = if self.char() != '^' {
2001	`false`
2002	} else {
2003	if !self.bump_and_bump_space() {
2004	return Err(self.error(
2005	Span::new(start, self.pos()),
2006	ast::ErrorKind::ClassUnclosed,
2007	));
2008	}
2009	`true`
2010	};
2011	// Accept any number of `-` as literal `-`.
2012	let mut union =
2013	ast::ClassSetUnion { span: self.span(), items: vec![] };
2014	while self.char() == '-' {
2015	union.push(ast::ClassSetItem::Literal(ast::Literal {
2016	span: self.span_char(),
2017	kind: ast::LiteralKind::Verbatim,
2018	c: '-',
2019	}));
2020	if !self.bump_and_bump_space() {
2021	return Err(self.error(
2022	Span::new(start, start),
2023	ast::ErrorKind::ClassUnclosed,
2024	));
2025	}
2026	}
2027	// If `]` is the first* char in a set, then interpret it as a literal*
2028	// `]`. That is, an empty class is impossible to write.
2029	if union.items.is_empty() && self.char() == ']' {
2030	union.push(ast::ClassSetItem::Literal(ast::Literal {
2031	span: self.span_char(),
2032	kind: ast::LiteralKind::Verbatim,
2033	c: ']',
2034	}));
2035	if !self.bump_and_bump_space() {
2036	return Err(self.error(
2037	Span::new(start, self.pos()),
2038	ast::ErrorKind::ClassUnclosed,
2039	));
2040	}
2041	}
2042	let set = ast::ClassBracketed {
2043	span: Span::new(start, self.pos()),
2044	negated,
2045	kind: ast::ClassSet::union(ast::ClassSetUnion {
2046	span: Span::new(union.span.start, union.span.start),
2047	items: vec![],
2048	}),
2049	};
2050	Ok((set, union))
2051	}
2052
2053	/// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
2054	///
2055	/// This assumes the parser is positioned at the opening `[`.
2056	///
2057	/// If no valid ASCII character class could be found, then this does not
2058	/// advance the parser and `None` is returned. Otherwise, the parser is
2059	/// advanced to the first byte following the closing `]` and the
2060	/// corresponding ASCII class is returned.
2061	#[inline(never)]
2062	fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
2063	// ASCII character classes are interesting from a parsing perspective
2064	// because parsing cannot fail with any interesting error. For example,
2065	// in order to use an ASCII character class, it must be enclosed in
2066	// double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
2067	// of it as "ASCII character classes have the syntax `[:NAME:]` which
2068	// can only appear within character brackets." This means that things
2069	// like `[[:lower:]A]` are legal constructs.
2070	//
2071	// However, if one types an incorrect ASCII character class, e.g.,
2072	// `[[:loower:]]`, then we treat that as a normal nested character
2073	// class containing the characters `:elorw`. One might argue that we
2074	// should return an error instead since the repeated colons give away
2075	// the intent to write an ASCII class. But what if the user typed
2076	// `[[:lower]]` instead? How can we tell that was intended to be an
2077	// ASCII class and not just a normal nested class?
2078	//
2079	// Reasonable people can probably disagree over this, but for better
2080	// or worse, we implement semantics that never fails at the expense
2081	// of better failure modes.
2082	assert_eq!(self.char(), '[');
2083	// If parsing fails, then we back up the parser to this starting point.
2084	let start = self.pos();
2085	let mut negated = `false`;
2086	if !self.bump() \|\| self.char() != ':' {
2087	self.parser().pos.set(start);
2088	return None;
2089	}
2090	if !self.bump() {
2091	self.parser().pos.set(start);
2092	return None;
2093	}
2094	if self.char() == '^' {
2095	negated = `true`;
2096	if !self.bump() {
2097	self.parser().pos.set(start);
2098	return None;
2099	}
2100	}
2101	let name_start = self.offset();
2102	while self.char() != ':' && self.bump() {}
2103	if self.is_eof() {
2104	self.parser().pos.set(start);
2105	return None;
2106	}
2107	let name = &self.pattern()[name_start..self.offset()];
2108	if !self.bump_if(":]") {
2109	self.parser().pos.set(start);
2110	return None;
2111	}
2112	let kind = match ast::ClassAsciiKind::from_name(name) {
2113	Some(kind) => kind,
2114	None => {
2115	self.parser().pos.set(start);
2116	return None;
2117	}
2118	};
2119	Some(ast::ClassAscii {
2120	span: Span::new(start, self.pos()),
2121	kind,
2122	negated,
2123	})
2124	}
2125
2126	/// Parse a Unicode class in either the single character notation, `\pN`
2127	/// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2128	/// the parser is positioned at the `p` (or `P` for negation) and will
2129	/// advance the parser to the character immediately following the class.
2130	///
2131	/// Note that this does not check whether the class name is valid or not.
2132	#[inline(never)]
2133	fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2134	assert!(self.char() == 'p' \|\| self.char() == 'P');
2135
2136	let mut scratch = self.parser().scratch.borrow_mut();
2137	scratch.clear();
2138
2139	let negated = self.char() == 'P';
2140	if !self.bump_and_bump_space() {
2141	return Err(
2142	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2143	);
2144	}
2145	let (start, kind) = if self.char() == '{' {
2146	let start = self.span_char().end;
2147	while self.bump_and_bump_space() && self.char() != '}' {
2148	scratch.push(self.char());
2149	}
2150	if self.is_eof() {
2151	return Err(self
2152	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2153	}
2154	assert_eq!(self.char(), '}');
2155	self.bump();
2156
2157	let name = scratch.as_str();
2158	if let Some(i) = name.find("!=") {
2159	(
2160	start,
2161	ast::ClassUnicodeKind::NamedValue {
2162	op: ast::ClassUnicodeOpKind::NotEqual,
2163	name: name[..i].to_string(),
2164	value: name[i + `2`..].to_string(),
2165	},
2166	)
2167	} else if let Some(i) = name.find(':') {
2168	(
2169	start,
2170	ast::ClassUnicodeKind::NamedValue {
2171	op: ast::ClassUnicodeOpKind::Colon,
2172	name: name[..i].to_string(),
2173	value: name[i + `1`..].to_string(),
2174	},
2175	)
2176	} else if let Some(i) = name.find('=') {
2177	(
2178	start,
2179	ast::ClassUnicodeKind::NamedValue {
2180	op: ast::ClassUnicodeOpKind::Equal,
2181	name: name[..i].to_string(),
2182	value: name[i + `1`..].to_string(),
2183	},
2184	)
2185	} else {
2186	(start, ast::ClassUnicodeKind::Named(name.to_string()))
2187	}
2188	} else {
2189	let start = self.pos();
2190	let c = self.char();
2191	if c == '`\\`' {
2192	return Err(self.error(
2193	self.span_char(),
2194	ast::ErrorKind::UnicodeClassInvalid,
2195	));
2196	}
2197	self.bump_and_bump_space();
2198	let kind = ast::ClassUnicodeKind::OneLetter(c);
2199	(start, kind)
2200	};
2201	Ok(ast::ClassUnicode {
2202	span: Span::new(start, self.pos()),
2203	negated,
2204	kind,
2205	})
2206	}
2207
2208	/// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2209	/// parser is currently at a valid character class name and will be
2210	/// advanced to the character immediately following the class.
2211	#[inline(never)]
2212	fn parse_perl_class(&self) -> ast::ClassPerl {
2213	let c = self.char();
2214	let span = self.span_char();
2215	self.bump();
2216	let (negated, kind) = match c {
2217	'd' => (`false`, ast::ClassPerlKind::Digit),
2218	'D' => (`true`, ast::ClassPerlKind::Digit),
2219	's' => (`false`, ast::ClassPerlKind::Space),
2220	'S' => (`true`, ast::ClassPerlKind::Space),
2221	'w' => (`false`, ast::ClassPerlKind::Word),
2222	'W' => (`true`, ast::ClassPerlKind::Word),
2223	c => panic!("expected valid Perl class but got '{}'", c),
2224	};
2225	ast::ClassPerl { span, kind, negated }
2226	}
2227	}
2228
2229	/// A type that traverses a fully parsed Ast and checks whether its depth
2230	/// exceeds the specified nesting limit. If it does, then an error is returned.
2231	#[derive(Debug)]
2232	struct NestLimiter<'p, 's, P> {
2233	/// The parser that is checking the nest limit.
2234	p: &'p ParserI<'s, P>,
2235	/// The current depth while walking an Ast.
2236	depth: u32,
2237	}
2238
2239	impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2240	fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2241	NestLimiter { p, depth: `0` }
2242	}
2243
2244	#[inline(never)]
2245	fn check(self, ast: &Ast) -> Result<()> {
2246	ast::visit(ast, self)
2247	}
2248
2249	fn increment_depth(&mut self, span: &Span) -> Result<()> {
2250	let new = self.depth.checked_add(`1`).ok_or_else(\|\| {
2251	self.p.error(
2252	span.clone(),
2253	ast::ErrorKind::NestLimitExceeded(u32::MAX),
2254	)
2255	})?;
2256	let limit = self.p.parser().nest_limit;
2257	if new > limit {
2258	return Err(self.p.error(
2259	span.clone(),
2260	ast::ErrorKind::NestLimitExceeded(limit),
2261	));
2262	}
2263	self.depth = new;
2264	Ok(())
2265	}
2266
2267	fn decrement_depth(&mut self) {
2268	// Assuming the correctness of the visitor, this should never drop
2269	// below 0.
2270	self.depth = self.depth.checked_sub(`1`).unwrap();
2271	}
2272	}
2273
2274	impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2275	type Output = ();
2276	type Err = ast::Error;
2277
2278	fn finish(self) -> Result<()> {
2279	Ok(())
2280	}
2281
2282	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2283	let span = match *ast {
2284	Ast::Empty(_)
2285	\| Ast::Flags(_)
2286	\| Ast::Literal(_)
2287	\| Ast::Dot(_)
2288	\| Ast::Assertion(_)
2289	\| Ast::ClassUnicode(_)
2290	\| Ast::ClassPerl(_) => {
2291	// These are all base cases, so we don't increment depth.
2292	return Ok(());
2293	}
2294	Ast::ClassBracketed(ref x) => &x.span,
2295	Ast::Repetition(ref x) => &x.span,
2296	Ast::Group(ref x) => &x.span,
2297	Ast::Alternation(ref x) => &x.span,
2298	Ast::Concat(ref x) => &x.span,
2299	};
2300	self.increment_depth(span)
2301	}
2302
2303	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2304	match *ast {
2305	Ast::Empty(_)
2306	\| Ast::Flags(_)
2307	\| Ast::Literal(_)
2308	\| Ast::Dot(_)
2309	\| Ast::Assertion(_)
2310	\| Ast::ClassUnicode(_)
2311	\| Ast::ClassPerl(_) => {
2312	// These are all base cases, so we don't decrement depth.
2313	Ok(())
2314	}
2315	Ast::ClassBracketed(_)
2316	\| Ast::Repetition(_)
2317	\| Ast::Group(_)
2318	\| Ast::Alternation(_)
2319	\| Ast::Concat(_) => {
2320	self.decrement_depth();
2321	Ok(())
2322	}
2323	}
2324	}
2325
2326	fn visit_class_set_item_pre(
2327	&mut self,
2328	ast: &ast::ClassSetItem,
2329	) -> Result<()> {
2330	let span = match *ast {
2331	ast::ClassSetItem::Empty(_)
2332	\| ast::ClassSetItem::Literal(_)
2333	\| ast::ClassSetItem::Range(_)
2334	\| ast::ClassSetItem::Ascii(_)
2335	\| ast::ClassSetItem::Unicode(_)
2336	\| ast::ClassSetItem::Perl(_) => {
2337	// These are all base cases, so we don't increment depth.
2338	return Ok(());
2339	}
2340	ast::ClassSetItem::Bracketed(ref x) => &x.span,
2341	ast::ClassSetItem::Union(ref x) => &x.span,
2342	};
2343	self.increment_depth(span)
2344	}
2345
2346	fn visit_class_set_item_post(
2347	&mut self,
2348	ast: &ast::ClassSetItem,
2349	) -> Result<()> {
2350	match *ast {
2351	ast::ClassSetItem::Empty(_)
2352	\| ast::ClassSetItem::Literal(_)
2353	\| ast::ClassSetItem::Range(_)
2354	\| ast::ClassSetItem::Ascii(_)
2355	\| ast::ClassSetItem::Unicode(_)
2356	\| ast::ClassSetItem::Perl(_) => {
2357	// These are all base cases, so we don't decrement depth.
2358	Ok(())
2359	}
2360	ast::ClassSetItem::Bracketed(_) \| ast::ClassSetItem::Union(_) => {
2361	self.decrement_depth();
2362	Ok(())
2363	}
2364	}
2365	}
2366
2367	fn visit_class_set_binary_op_pre(
2368	&mut self,
2369	ast: &ast::ClassSetBinaryOp,
2370	) -> Result<()> {
2371	self.increment_depth(&ast.span)
2372	}
2373
2374	fn visit_class_set_binary_op_post(
2375	&mut self,
2376	_ast: &ast::ClassSetBinaryOp,
2377	) -> Result<()> {
2378	self.decrement_depth();
2379	Ok(())
2380	}
2381	}
2382
2383	/// When the result is an error, transforms the ast::ErrorKind from the source
2384	/// Result into another one. This function is used to return clearer error
2385	/// messages when possible.
2386	fn specialize_err<T>(
2387	result: Result<T>,
2388	from: ast::ErrorKind,
2389	to: ast::ErrorKind,
2390	) -> Result<T> {
2391	if let Err(e: Error) = result {
2392	if e.kind == from {
2393	Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2394	} else {
2395	Err(e)
2396	}
2397	} else {
2398	result
2399	}
2400	}
2401
2402	#[cfg(test)]
2403	mod tests {
2404	use core::ops::Range;
2405
2406	use alloc::format;
2407
2408	use crate::ast::{self, Ast, Position, Span};
2409
2410	use super::*;
2411
2412	// Our own assert_eq, which has slightly better formatting (but honestly
2413	// still kind of crappy).
2414	macro_rules! assert_eq {
2415	($left:expr, $right:expr) => {{
2416	match (&$left, &$right) {
2417	(left_val, right_val) => {
2418	if !(left_val == right_val) {
2419	panic!(
2420	"assertion failed: `(left == right)``\n\n`\
2421	left: `{:?}``\n`right: `{:?}``\n\n`",
2422	left_val, right_val
2423	)
2424	}
2425	}
2426	}
2427	}};
2428	}
2429
2430	// We create these errors to compare with real ast::Errors in the tests.
2431	// We define equality between TestError and ast::Error to disregard the
2432	// pattern string in ast::Error, which is annoying to provide in tests.
2433	#[derive(Clone, Debug)]
2434	struct TestError {
2435	span: Span,
2436	kind: ast::ErrorKind,
2437	}
2438
2439	impl PartialEq<ast::Error> for TestError {
2440	fn eq(&self, other: &ast::Error) -> bool {
2441	self.span == other.span && self.kind == other.kind
2442	}
2443	}
2444
2445	impl PartialEq<TestError> for ast::Error {
2446	fn eq(&self, other: &TestError) -> bool {
2447	self.span == other.span && self.kind == other.kind
2448	}
2449	}
2450
2451	fn s(str: &str) -> String {
2452	str.to_string()
2453	}
2454
2455	fn parser(pattern: &str) -> ParserI<'_, Parser> {
2456	ParserI::new(Parser::new(), pattern)
2457	}
2458
2459	fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2460	let parser = ParserBuilder::new().octal(`true`).build();
2461	ParserI::new(parser, pattern)
2462	}
2463
2464	fn parser_nest_limit(
2465	pattern: &str,
2466	nest_limit: u32,
2467	) -> ParserI<'_, Parser> {
2468	let p = ParserBuilder::new().nest_limit(nest_limit).build();
2469	ParserI::new(p, pattern)
2470	}
2471
2472	fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2473	let p = ParserBuilder::new().ignore_whitespace(`true`).build();
2474	ParserI::new(p, pattern)
2475	}
2476
2477	/// Short alias for creating a new span.
2478	fn nspan(start: Position, end: Position) -> Span {
2479	Span::new(start, end)
2480	}
2481
2482	/// Short alias for creating a new position.
2483	fn npos(offset: usize, line: usize, column: usize) -> Position {
2484	Position::new(offset, line, column)
2485	}
2486
2487	/// Create a new span from the given offset range. This assumes a single
2488	/// line and sets the columns based on the offsets. i.e., This only works
2489	/// out of the box for ASCII, which is fine for most tests.
2490	fn span(range: Range<usize>) -> Span {
2491	let start = Position::new(range.start, `1`, range.start + `1`);
2492	let end = Position::new(range.end, `1`, range.end + `1`);
2493	Span::new(start, end)
2494	}
2495
2496	/// Create a new span for the corresponding byte range in the given string.
2497	fn span_range(subject: &str, range: Range<usize>) -> Span {
2498	let start = Position {
2499	offset: range.start,
2500	line: `1` + subject[..range.start].matches('`\n`').count(),
2501	column: `1` + subject[..range.start]
2502	.chars()
2503	.rev()
2504	.position(\|c\| c == '`\n`')
2505	.unwrap_or(subject[..range.start].chars().count()),
2506	};
2507	let end = Position {
2508	offset: range.end,
2509	line: `1` + subject[..range.end].matches('`\n`').count(),
2510	column: `1` + subject[..range.end]
2511	.chars()
2512	.rev()
2513	.position(\|c\| c == '`\n`')
2514	.unwrap_or(subject[..range.end].chars().count()),
2515	};
2516	Span::new(start, end)
2517	}
2518
2519	/// Create a verbatim literal starting at the given position.
2520	fn lit(c: char, start: usize) -> Ast {
2521	lit_with(c, span(start..start + c.len_utf8()))
2522	}
2523
2524	/// Create a meta literal starting at the given position.
2525	fn meta_lit(c: char, span: Span) -> Ast {
2526	Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
2527	}
2528
2529	/// Create a verbatim literal with the given span.
2530	fn lit_with(c: char, span: Span) -> Ast {
2531	Ast::literal(ast::Literal {
2532	span,
2533	kind: ast::LiteralKind::Verbatim,
2534	c,
2535	})
2536	}
2537
2538	/// Create a concatenation with the given range.
2539	fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2540	concat_with(span(range), asts)
2541	}
2542
2543	/// Create a concatenation with the given span.
2544	fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2545	Ast::concat(ast::Concat { span, asts })
2546	}
2547
2548	/// Create an alternation with the given span.
2549	fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2550	Ast::alternation(ast::Alternation { span: span(range), asts })
2551	}
2552
2553	/// Create a capturing group with the given span.
2554	fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2555	Ast::group(ast::Group {
2556	span: span(range),
2557	kind: ast::GroupKind::CaptureIndex(index),
2558	ast: Box::new(ast),
2559	})
2560	}
2561
2562	/// Create an ast::SetFlags.
2563	///
2564	/// The given pattern should be the full pattern string. The range given
2565	/// should correspond to the byte offsets where the flag set occurs.
2566	///
2567	/// If negated is true, then the set is interpreted as beginning with a
2568	/// negation.
2569	fn flag_set(
2570	pat: &str,
2571	range: Range<usize>,
2572	flag: ast::Flag,
2573	negated: bool,
2574	) -> Ast {
2575	let mut items = vec![ast::FlagsItem {
2576	span: span_range(pat, (range.end - `2`)..(range.end - `1`)),
2577	kind: ast::FlagsItemKind::Flag(flag),
2578	}];
2579	if negated {
2580	items.insert(
2581	`0`,
2582	ast::FlagsItem {
2583	span: span_range(pat, (range.start + `2`)..(range.end - `2`)),
2584	kind: ast::FlagsItemKind::Negation,
2585	},
2586	);
2587	}
2588	Ast::flags(ast::SetFlags {
2589	span: span_range(pat, range.clone()),
2590	flags: ast::Flags {
2591	span: span_range(pat, (range.start + `2`)..(range.end - `1`)),
2592	items,
2593	},
2594	})
2595	}
2596
2597	#[test]
2598	fn parse_nest_limit() {
2599	// A nest limit of 0 still allows some types of regexes.
2600	assert_eq!(
2601	parser_nest_limit("", `0`).parse(),
2602	Ok(Ast::empty(span(`0`..`0`)))
2603	);
2604	assert_eq!(parser_nest_limit("a", `0`).parse(), Ok(lit('a', `0`)));
2605
2606	// Test repetition operations, which require one level of nesting.
2607	assert_eq!(
2608	parser_nest_limit("a+", `0`).parse().unwrap_err(),
2609	TestError {
2610	span: span(`0`..`2`),
2611	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2612	}
2613	);
2614	assert_eq!(
2615	parser_nest_limit("a+", `1`).parse(),
2616	Ok(Ast::repetition(ast::Repetition {
2617	span: span(`0`..`2`),
2618	op: ast::RepetitionOp {
2619	span: span(`1`..`2`),
2620	kind: ast::RepetitionKind::OneOrMore,
2621	},
2622	greedy: `true`,
2623	ast: Box::new(lit('a', `0`)),
2624	}))
2625	);
2626	assert_eq!(
2627	parser_nest_limit("(a)+", `1`).parse().unwrap_err(),
2628	TestError {
2629	span: span(`0`..`3`),
2630	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2631	}
2632	);
2633	assert_eq!(
2634	parser_nest_limit("a+*", `1`).parse().unwrap_err(),
2635	TestError {
2636	span: span(`0`..`2`),
2637	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2638	}
2639	);
2640	assert_eq!(
2641	parser_nest_limit("a+*", `2`).parse(),
2642	Ok(Ast::repetition(ast::Repetition {
2643	span: span(`0`..`3`),
2644	op: ast::RepetitionOp {
2645	span: span(`2`..`3`),
2646	kind: ast::RepetitionKind::ZeroOrMore,
2647	},
2648	greedy: `true`,
2649	ast: Box::new(Ast::repetition(ast::Repetition {
2650	span: span(`0`..`2`),
2651	op: ast::RepetitionOp {
2652	span: span(`1`..`2`),
2653	kind: ast::RepetitionKind::OneOrMore,
2654	},
2655	greedy: `true`,
2656	ast: Box::new(lit('a', `0`)),
2657	})),
2658	}))
2659	);
2660
2661	// Test concatenations. A concatenation requires one level of nesting.
2662	assert_eq!(
2663	parser_nest_limit("ab", `0`).parse().unwrap_err(),
2664	TestError {
2665	span: span(`0`..`2`),
2666	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2667	}
2668	);
2669	assert_eq!(
2670	parser_nest_limit("ab", `1`).parse(),
2671	Ok(concat(`0`..`2`, vec![lit('a', `0`), lit('b', `1`)]))
2672	);
2673	assert_eq!(
2674	parser_nest_limit("abc", `1`).parse(),
2675	Ok(concat(`0`..`3`, vec![lit('a', `0`), lit('b', `1`), lit('c', `2`)]))
2676	);
2677
2678	// Test alternations. An alternation requires one level of nesting.
2679	assert_eq!(
2680	parser_nest_limit("a\|b", `0`).parse().unwrap_err(),
2681	TestError {
2682	span: span(`0`..`3`),
2683	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2684	}
2685	);
2686	assert_eq!(
2687	parser_nest_limit("a\|b", `1`).parse(),
2688	Ok(alt(`0`..`3`, vec![lit('a', `0`), lit('b', `2`)]))
2689	);
2690	assert_eq!(
2691	parser_nest_limit("a\|b\|c", `1`).parse(),
2692	Ok(alt(`0`..`5`, vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)]))
2693	);
2694
2695	// Test character classes. Classes form their own mini-recursive
2696	// syntax!
2697	assert_eq!(
2698	parser_nest_limit("[a]", `0`).parse().unwrap_err(),
2699	TestError {
2700	span: span(`0`..`3`),
2701	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2702	}
2703	);
2704	assert_eq!(
2705	parser_nest_limit("[a]", `1`).parse(),
2706	Ok(Ast::class_bracketed(ast::ClassBracketed {
2707	span: span(`0`..`3`),
2708	negated: `false`,
2709	kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2710	ast::Literal {
2711	span: span(`1`..`2`),
2712	kind: ast::LiteralKind::Verbatim,
2713	c: 'a',
2714	}
2715	)),
2716	}))
2717	);
2718	assert_eq!(
2719	parser_nest_limit("[ab]", `1`).parse().unwrap_err(),
2720	TestError {
2721	span: span(`1`..`3`),
2722	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2723	}
2724	);
2725	assert_eq!(
2726	parser_nest_limit("[ab[cd]]", `2`).parse().unwrap_err(),
2727	TestError {
2728	span: span(`3`..`7`),
2729	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2730	}
2731	);
2732	assert_eq!(
2733	parser_nest_limit("[ab[cd]]", `3`).parse().unwrap_err(),
2734	TestError {
2735	span: span(`4`..`6`),
2736	kind: ast::ErrorKind::NestLimitExceeded(`3`),
2737	}
2738	);
2739	assert_eq!(
2740	parser_nest_limit("[a--b]", `1`).parse().unwrap_err(),
2741	TestError {
2742	span: span(`1`..`5`),
2743	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2744	}
2745	);
2746	assert_eq!(
2747	parser_nest_limit("[a--bc]", `2`).parse().unwrap_err(),
2748	TestError {
2749	span: span(`4`..`6`),
2750	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2751	}
2752	);
2753	}
2754
2755	#[test]
2756	fn parse_comments() {
2757	let pat = "(?x)
2758	# This is comment 1.
2759	foo # This is comment 2.
2760	# This is comment 3.
2761	bar
2762	# This is comment 4.";
2763	let astc = parser(pat).parse_with_comments().unwrap();
2764	assert_eq!(
2765	astc.ast,
2766	concat_with(
2767	span_range(pat, `0`..pat.len()),
2768	vec![
2769	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2770	lit_with('f', span_range(pat, `26`..`27`)),
2771	lit_with('o', span_range(pat, `27`..`28`)),
2772	lit_with('o', span_range(pat, `28`..`29`)),
2773	lit_with('b', span_range(pat, `74`..`75`)),
2774	lit_with('a', span_range(pat, `75`..`76`)),
2775	lit_with('r', span_range(pat, `76`..`77`)),
2776	]
2777	)
2778	);
2779	assert_eq!(
2780	astc.comments,
2781	vec![
2782	ast::Comment {
2783	span: span_range(pat, `5`..`26`),
2784	comment: s(" This is comment 1."),
2785	},
2786	ast::Comment {
2787	span: span_range(pat, `30`..`51`),
2788	comment: s(" This is comment 2."),
2789	},
2790	ast::Comment {
2791	span: span_range(pat, `53`..`74`),
2792	comment: s(" This is comment 3."),
2793	},
2794	ast::Comment {
2795	span: span_range(pat, `78`..`98`),
2796	comment: s(" This is comment 4."),
2797	},
2798	]
2799	);
2800	}
2801
2802	#[test]
2803	fn parse_holistic() {
2804	assert_eq!(parser("]").parse(), Ok(lit(']', `0`)));
2805	assert_eq!(
2806	parser(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#\&\-\~").parse(),
2807	Ok(concat(
2808	`0`..`36`,
2809	vec![
2810	meta_lit('`\\`', span(`0`..`2`)),
2811	meta_lit('.', span(`2`..`4`)),
2812	meta_lit('+', span(`4`..`6`)),
2813	meta_lit('*', span(`6`..`8`)),
2814	meta_lit('?', span(`8`..`10`)),
2815	meta_lit('(', span(`10`..`12`)),
2816	meta_lit(')', span(`12`..`14`)),
2817	meta_lit('\|', span(`14`..`16`)),
2818	meta_lit('[', span(`16`..`18`)),
2819	meta_lit(']', span(`18`..`20`)),
2820	meta_lit('{', span(`20`..`22`)),
2821	meta_lit('}', span(`22`..`24`)),
2822	meta_lit('^', span(`24`..`26`)),
2823	meta_lit('$', span(`26`..`28`)),
2824	meta_lit('#', span(`28`..`30`)),
2825	meta_lit('&', span(`30`..`32`)),
2826	meta_lit('-', span(`32`..`34`)),
2827	meta_lit('~', span(`34`..`36`)),
2828	]
2829	))
2830	);
2831	}
2832
2833	#[test]
2834	fn parse_ignore_whitespace() {
2835	// Test that basic whitespace insensitivity works.
2836	let pat = "(?x)a b";
2837	assert_eq!(
2838	parser(pat).parse(),
2839	Ok(concat_with(
2840	nspan(npos(`0`, `1`, `1`), npos(`7`, `1`, `8`)),
2841	vec![
2842	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2843	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2844	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2845	]
2846	))
2847	);
2848
2849	// Test that we can toggle whitespace insensitivity.
2850	let pat = "(?x)a b(?-x)a b";
2851	assert_eq!(
2852	parser(pat).parse(),
2853	Ok(concat_with(
2854	nspan(npos(`0`, `1`, `1`), npos(`15`, `1`, `16`)),
2855	vec![
2856	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2857	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2858	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2859	flag_set(pat, `7`..`12`, ast::Flag::IgnoreWhitespace, `true`),
2860	lit_with('a', nspan(npos(`12`, `1`, `13`), npos(`13`, `1`, `14`))),
2861	lit_with(' ', nspan(npos(`13`, `1`, `14`), npos(`14`, `1`, `15`))),
2862	lit_with('b', nspan(npos(`14`, `1`, `15`), npos(`15`, `1`, `16`))),
2863	]
2864	))
2865	);
2866
2867	// Test that nesting whitespace insensitive flags works.
2868	let pat = "a (?x:a )a ";
2869	assert_eq!(
2870	parser(pat).parse(),
2871	Ok(concat_with(
2872	span_range(pat, `0`..`11`),
2873	vec![
2874	lit_with('a', span_range(pat, `0`..`1`)),
2875	lit_with(' ', span_range(pat, `1`..`2`)),
2876	Ast::group(ast::Group {
2877	span: span_range(pat, `2`..`9`),
2878	kind: ast::GroupKind::NonCapturing(ast::Flags {
2879	span: span_range(pat, `4`..`5`),
2880	items: vec![ast::FlagsItem {
2881	span: span_range(pat, `4`..`5`),
2882	kind: ast::FlagsItemKind::Flag(
2883	ast::Flag::IgnoreWhitespace
2884	),
2885	},],
2886	}),
2887	ast: Box::new(lit_with('a', span_range(pat, `6`..`7`))),
2888	}),
2889	lit_with('a', span_range(pat, `9`..`10`)),
2890	lit_with(' ', span_range(pat, `10`..`11`)),
2891	]
2892	))
2893	);
2894
2895	// Test that whitespace after an opening paren is insignificant.
2896	let pat = "(?x)( ?P<foo> a )";
2897	assert_eq!(
2898	parser(pat).parse(),
2899	Ok(concat_with(
2900	span_range(pat, `0`..pat.len()),
2901	vec![
2902	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2903	Ast::group(ast::Group {
2904	span: span_range(pat, `4`..pat.len()),
2905	kind: ast::GroupKind::CaptureName {
2906	starts_with_p: `true`,
2907	name: ast::CaptureName {
2908	span: span_range(pat, `9`..`12`),
2909	name: s("foo"),
2910	index: `1`,
2911	}
2912	},
2913	ast: Box::new(lit_with('a', span_range(pat, `14`..`15`))),
2914	}),
2915	]
2916	))
2917	);
2918	let pat = "(?x)( a )";
2919	assert_eq!(
2920	parser(pat).parse(),
2921	Ok(concat_with(
2922	span_range(pat, `0`..pat.len()),
2923	vec![
2924	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2925	Ast::group(ast::Group {
2926	span: span_range(pat, `4`..pat.len()),
2927	kind: ast::GroupKind::CaptureIndex(`1`),
2928	ast: Box::new(lit_with('a', span_range(pat, `7`..`8`))),
2929	}),
2930	]
2931	))
2932	);
2933	let pat = "(?x)( ?: a )";
2934	assert_eq!(
2935	parser(pat).parse(),
2936	Ok(concat_with(
2937	span_range(pat, `0`..pat.len()),
2938	vec![
2939	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2940	Ast::group(ast::Group {
2941	span: span_range(pat, `4`..pat.len()),
2942	kind: ast::GroupKind::NonCapturing(ast::Flags {
2943	span: span_range(pat, `8`..`8`),
2944	items: vec![],
2945	}),
2946	ast: Box::new(lit_with('a', span_range(pat, `11`..`12`))),
2947	}),
2948	]
2949	))
2950	);
2951	let pat = r"(?x)\x { 53 }";
2952	assert_eq!(
2953	parser(pat).parse(),
2954	Ok(concat_with(
2955	span_range(pat, `0`..pat.len()),
2956	vec![
2957	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2958	Ast::literal(ast::Literal {
2959	span: span(`4`..`13`),
2960	kind: ast::LiteralKind::HexBrace(
2961	ast::HexLiteralKind::X
2962	),
2963	c: 'S',
2964	}),
2965	]
2966	))
2967	);
2968
2969	// Test that whitespace after an escape is OK.
2970	let pat = r"(?x)\ ";
2971	assert_eq!(
2972	parser(pat).parse(),
2973	Ok(concat_with(
2974	span_range(pat, `0`..pat.len()),
2975	vec![
2976	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2977	Ast::literal(ast::Literal {
2978	span: span_range(pat, `4`..`6`),
2979	kind: ast::LiteralKind::Superfluous,
2980	c: ' ',
2981	}),
2982	]
2983	))
2984	);
2985	}
2986
2987	#[test]
2988	fn parse_newlines() {
2989	let pat = ".`\n`.";
2990	assert_eq!(
2991	parser(pat).parse(),
2992	Ok(concat_with(
2993	span_range(pat, `0`..`3`),
2994	vec![
2995	Ast::dot(span_range(pat, `0`..`1`)),
2996	lit_with('`\n`', span_range(pat, `1`..`2`)),
2997	Ast::dot(span_range(pat, `2`..`3`)),
2998	]
2999	))
3000	);
3001
3002	let pat = "foobar`\n`baz`\n`quux`\n`";
3003	assert_eq!(
3004	parser(pat).parse(),
3005	Ok(concat_with(
3006	span_range(pat, `0`..pat.len()),
3007	vec![
3008	lit_with('f', nspan(npos(`0`, `1`, `1`), npos(`1`, `1`, `2`))),
3009	lit_with('o', nspan(npos(`1`, `1`, `2`), npos(`2`, `1`, `3`))),
3010	lit_with('o', nspan(npos(`2`, `1`, `3`), npos(`3`, `1`, `4`))),
3011	lit_with('b', nspan(npos(`3`, `1`, `4`), npos(`4`, `1`, `5`))),
3012	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
3013	lit_with('r', nspan(npos(`5`, `1`, `6`), npos(`6`, `1`, `7`))),
3014	lit_with('`\n`', nspan(npos(`6`, `1`, `7`), npos(`7`, `2`, `1`))),
3015	lit_with('b', nspan(npos(`7`, `2`, `1`), npos(`8`, `2`, `2`))),
3016	lit_with('a', nspan(npos(`8`, `2`, `2`), npos(`9`, `2`, `3`))),
3017	lit_with('z', nspan(npos(`9`, `2`, `3`), npos(`10`, `2`, `4`))),
3018	lit_with('`\n`', nspan(npos(`10`, `2`, `4`), npos(`11`, `3`, `1`))),
3019	lit_with('q', nspan(npos(`11`, `3`, `1`), npos(`12`, `3`, `2`))),
3020	lit_with('u', nspan(npos(`12`, `3`, `2`), npos(`13`, `3`, `3`))),
3021	lit_with('u', nspan(npos(`13`, `3`, `3`), npos(`14`, `3`, `4`))),
3022	lit_with('x', nspan(npos(`14`, `3`, `4`), npos(`15`, `3`, `5`))),
3023	lit_with('`\n`', nspan(npos(`15`, `3`, `5`), npos(`16`, `4`, `1`))),
3024	]
3025	))
3026	);
3027	}
3028
3029	#[test]
3030	fn parse_uncounted_repetition() {
3031	assert_eq!(
3032	parser(r"a*").parse(),
3033	Ok(Ast::repetition(ast::Repetition {
3034	span: span(`0`..`2`),
3035	op: ast::RepetitionOp {
3036	span: span(`1`..`2`),
3037	kind: ast::RepetitionKind::ZeroOrMore,
3038	},
3039	greedy: `true`,
3040	ast: Box::new(lit('a', `0`)),
3041	}))
3042	);
3043	assert_eq!(
3044	parser(r"a+").parse(),
3045	Ok(Ast::repetition(ast::Repetition {
3046	span: span(`0`..`2`),
3047	op: ast::RepetitionOp {
3048	span: span(`1`..`2`),
3049	kind: ast::RepetitionKind::OneOrMore,
3050	},
3051	greedy: `true`,
3052	ast: Box::new(lit('a', `0`)),
3053	}))
3054	);
3055
3056	assert_eq!(
3057	parser(r"a?").parse(),
3058	Ok(Ast::repetition(ast::Repetition {
3059	span: span(`0`..`2`),
3060	op: ast::RepetitionOp {
3061	span: span(`1`..`2`),
3062	kind: ast::RepetitionKind::ZeroOrOne,
3063	},
3064	greedy: `true`,
3065	ast: Box::new(lit('a', `0`)),
3066	}))
3067	);
3068	assert_eq!(
3069	parser(r"a??").parse(),
3070	Ok(Ast::repetition(ast::Repetition {
3071	span: span(`0`..`3`),
3072	op: ast::RepetitionOp {
3073	span: span(`1`..`3`),
3074	kind: ast::RepetitionKind::ZeroOrOne,
3075	},
3076	greedy: `false`,
3077	ast: Box::new(lit('a', `0`)),
3078	}))
3079	);
3080	assert_eq!(
3081	parser(r"a?").parse(),
3082	Ok(Ast::repetition(ast::Repetition {
3083	span: span(`0`..`2`),
3084	op: ast::RepetitionOp {
3085	span: span(`1`..`2`),
3086	kind: ast::RepetitionKind::ZeroOrOne,
3087	},
3088	greedy: `true`,
3089	ast: Box::new(lit('a', `0`)),
3090	}))
3091	);
3092	assert_eq!(
3093	parser(r"a?b").parse(),
3094	Ok(concat(
3095	`0`..`3`,
3096	vec![
3097	Ast::repetition(ast::Repetition {
3098	span: span(`0`..`2`),
3099	op: ast::RepetitionOp {
3100	span: span(`1`..`2`),
3101	kind: ast::RepetitionKind::ZeroOrOne,
3102	},
3103	greedy: `true`,
3104	ast: Box::new(lit('a', `0`)),
3105	}),
3106	lit('b', `2`),
3107	]
3108	))
3109	);
3110	assert_eq!(
3111	parser(r"a??b").parse(),
3112	Ok(concat(
3113	`0`..`4`,
3114	vec![
3115	Ast::repetition(ast::Repetition {
3116	span: span(`0`..`3`),
3117	op: ast::RepetitionOp {
3118	span: span(`1`..`3`),
3119	kind: ast::RepetitionKind::ZeroOrOne,
3120	},
3121	greedy: `false`,
3122	ast: Box::new(lit('a', `0`)),
3123	}),
3124	lit('b', `3`),
3125	]
3126	))
3127	);
3128	assert_eq!(
3129	parser(r"ab?").parse(),
3130	Ok(concat(
3131	`0`..`3`,
3132	vec![
3133	lit('a', `0`),
3134	Ast::repetition(ast::Repetition {
3135	span: span(`1`..`3`),
3136	op: ast::RepetitionOp {
3137	span: span(`2`..`3`),
3138	kind: ast::RepetitionKind::ZeroOrOne,
3139	},
3140	greedy: `true`,
3141	ast: Box::new(lit('b', `1`)),
3142	}),
3143	]
3144	))
3145	);
3146	assert_eq!(
3147	parser(r"(ab)?").parse(),
3148	Ok(Ast::repetition(ast::Repetition {
3149	span: span(`0`..`5`),
3150	op: ast::RepetitionOp {
3151	span: span(`4`..`5`),
3152	kind: ast::RepetitionKind::ZeroOrOne,
3153	},
3154	greedy: `true`,
3155	ast: Box::new(group(
3156	`0`..`4`,
3157	`1`,
3158	concat(`1`..`3`, vec![lit('a', `1`), lit('b', `2`),])
3159	)),
3160	}))
3161	);
3162	assert_eq!(
3163	parser(r"\|a?").parse(),
3164	Ok(alt(
3165	`0`..`3`,
3166	vec![
3167	Ast::empty(span(`0`..`0`)),
3168	Ast::repetition(ast::Repetition {
3169	span: span(`1`..`3`),
3170	op: ast::RepetitionOp {
3171	span: span(`2`..`3`),
3172	kind: ast::RepetitionKind::ZeroOrOne,
3173	},
3174	greedy: `true`,
3175	ast: Box::new(lit('a', `1`)),
3176	}),
3177	]
3178	))
3179	);
3180
3181	assert_eq!(
3182	parser(r"*").parse().unwrap_err(),
3183	TestError {
3184	span: span(`0`..`0`),
3185	kind: ast::ErrorKind::RepetitionMissing,
3186	}
3187	);
3188	assert_eq!(
3189	parser(r"(?i)*").parse().unwrap_err(),
3190	TestError {
3191	span: span(`4`..`4`),
3192	kind: ast::ErrorKind::RepetitionMissing,
3193	}
3194	);
3195	assert_eq!(
3196	parser(r"(*)").parse().unwrap_err(),
3197	TestError {
3198	span: span(`1`..`1`),
3199	kind: ast::ErrorKind::RepetitionMissing,
3200	}
3201	);
3202	assert_eq!(
3203	parser(r"(?:?)").parse().unwrap_err(),
3204	TestError {
3205	span: span(`3`..`3`),
3206	kind: ast::ErrorKind::RepetitionMissing,
3207	}
3208	);
3209	assert_eq!(
3210	parser(r"+").parse().unwrap_err(),
3211	TestError {
3212	span: span(`0`..`0`),
3213	kind: ast::ErrorKind::RepetitionMissing,
3214	}
3215	);
3216	assert_eq!(
3217	parser(r"?").parse().unwrap_err(),
3218	TestError {
3219	span: span(`0`..`0`),
3220	kind: ast::ErrorKind::RepetitionMissing,
3221	}
3222	);
3223	assert_eq!(
3224	parser(r"(?)").parse().unwrap_err(),
3225	TestError {
3226	span: span(`1`..`1`),
3227	kind: ast::ErrorKind::RepetitionMissing,
3228	}
3229	);
3230	assert_eq!(
3231	parser(r"\|*").parse().unwrap_err(),
3232	TestError {
3233	span: span(`1`..`1`),
3234	kind: ast::ErrorKind::RepetitionMissing,
3235	}
3236	);
3237	assert_eq!(
3238	parser(r"\|+").parse().unwrap_err(),
3239	TestError {
3240	span: span(`1`..`1`),
3241	kind: ast::ErrorKind::RepetitionMissing,
3242	}
3243	);
3244	assert_eq!(
3245	parser(r"\|?").parse().unwrap_err(),
3246	TestError {
3247	span: span(`1`..`1`),
3248	kind: ast::ErrorKind::RepetitionMissing,
3249	}
3250	);
3251	}
3252
3253	#[test]
3254	fn parse_counted_repetition() {
3255	assert_eq!(
3256	parser(r"a{5}").parse(),
3257	Ok(Ast::repetition(ast::Repetition {
3258	span: span(`0`..`4`),
3259	op: ast::RepetitionOp {
3260	span: span(`1`..`4`),
3261	kind: ast::RepetitionKind::Range(
3262	ast::RepetitionRange::Exactly(`5`)
3263	),
3264	},
3265	greedy: `true`,
3266	ast: Box::new(lit('a', `0`)),
3267	}))
3268	);
3269	assert_eq!(
3270	parser(r"a{5,}").parse(),
3271	Ok(Ast::repetition(ast::Repetition {
3272	span: span(`0`..`5`),
3273	op: ast::RepetitionOp {
3274	span: span(`1`..`5`),
3275	kind: ast::RepetitionKind::Range(
3276	ast::RepetitionRange::AtLeast(`5`)
3277	),
3278	},
3279	greedy: `true`,
3280	ast: Box::new(lit('a', `0`)),
3281	}))
3282	);
3283	assert_eq!(
3284	parser(r"a{5,9}").parse(),
3285	Ok(Ast::repetition(ast::Repetition {
3286	span: span(`0`..`6`),
3287	op: ast::RepetitionOp {
3288	span: span(`1`..`6`),
3289	kind: ast::RepetitionKind::Range(
3290	ast::RepetitionRange::Bounded(`5`, `9`)
3291	),
3292	},
3293	greedy: `true`,
3294	ast: Box::new(lit('a', `0`)),
3295	}))
3296	);
3297	assert_eq!(
3298	parser(r"a{5}?").parse(),
3299	Ok(Ast::repetition(ast::Repetition {
3300	span: span(`0`..`5`),
3301	op: ast::RepetitionOp {
3302	span: span(`1`..`5`),
3303	kind: ast::RepetitionKind::Range(
3304	ast::RepetitionRange::Exactly(`5`)
3305	),
3306	},
3307	greedy: `false`,
3308	ast: Box::new(lit('a', `0`)),
3309	}))
3310	);
3311	assert_eq!(
3312	parser(r"ab{5}").parse(),
3313	Ok(concat(
3314	`0`..`5`,
3315	vec![
3316	lit('a', `0`),
3317	Ast::repetition(ast::Repetition {
3318	span: span(`1`..`5`),
3319	op: ast::RepetitionOp {
3320	span: span(`2`..`5`),
3321	kind: ast::RepetitionKind::Range(
3322	ast::RepetitionRange::Exactly(`5`)
3323	),
3324	},
3325	greedy: `true`,
3326	ast: Box::new(lit('b', `1`)),
3327	}),
3328	]
3329	))
3330	);
3331	assert_eq!(
3332	parser(r"ab{5}c").parse(),
3333	Ok(concat(
3334	`0`..`6`,
3335	vec![
3336	lit('a', `0`),
3337	Ast::repetition(ast::Repetition {
3338	span: span(`1`..`5`),
3339	op: ast::RepetitionOp {
3340	span: span(`2`..`5`),
3341	kind: ast::RepetitionKind::Range(
3342	ast::RepetitionRange::Exactly(`5`)
3343	),
3344	},
3345	greedy: `true`,
3346	ast: Box::new(lit('b', `1`)),
3347	}),
3348	lit('c', `5`),
3349	]
3350	))
3351	);
3352
3353	assert_eq!(
3354	parser(r"a{ 5 }").parse(),
3355	Ok(Ast::repetition(ast::Repetition {
3356	span: span(`0`..`6`),
3357	op: ast::RepetitionOp {
3358	span: span(`1`..`6`),
3359	kind: ast::RepetitionKind::Range(
3360	ast::RepetitionRange::Exactly(`5`)
3361	),
3362	},
3363	greedy: `true`,
3364	ast: Box::new(lit('a', `0`)),
3365	}))
3366	);
3367	assert_eq!(
3368	parser(r"a{ 5 , 9 }").parse(),
3369	Ok(Ast::repetition(ast::Repetition {
3370	span: span(`0`..`10`),
3371	op: ast::RepetitionOp {
3372	span: span(`1`..`10`),
3373	kind: ast::RepetitionKind::Range(
3374	ast::RepetitionRange::Bounded(`5`, `9`)
3375	),
3376	},
3377	greedy: `true`,
3378	ast: Box::new(lit('a', `0`)),
3379	}))
3380	);
3381	assert_eq!(
3382	parser_ignore_whitespace(r"a{5,9} ?").parse(),
3383	Ok(Ast::repetition(ast::Repetition {
3384	span: span(`0`..`8`),
3385	op: ast::RepetitionOp {
3386	span: span(`1`..`8`),
3387	kind: ast::RepetitionKind::Range(
3388	ast::RepetitionRange::Bounded(`5`, `9`)
3389	),
3390	},
3391	greedy: `false`,
3392	ast: Box::new(lit('a', `0`)),
3393	}))
3394	);
3395	assert_eq!(
3396	parser(r"\b{5,9}").parse(),
3397	Ok(Ast::repetition(ast::Repetition {
3398	span: span(`0`..`7`),
3399	op: ast::RepetitionOp {
3400	span: span(`2`..`7`),
3401	kind: ast::RepetitionKind::Range(
3402	ast::RepetitionRange::Bounded(`5`, `9`)
3403	),
3404	},
3405	greedy: `true`,
3406	ast: Box::new(Ast::assertion(ast::Assertion {
3407	span: span(`0`..`2`),
3408	kind: ast::AssertionKind::WordBoundary,
3409	})),
3410	}))
3411	);
3412
3413	assert_eq!(
3414	parser(r"(?i){0}").parse().unwrap_err(),
3415	TestError {
3416	span: span(`4`..`4`),
3417	kind: ast::ErrorKind::RepetitionMissing,
3418	}
3419	);
3420	assert_eq!(
3421	parser(r"(?m){1,1}").parse().unwrap_err(),
3422	TestError {
3423	span: span(`4`..`4`),
3424	kind: ast::ErrorKind::RepetitionMissing,
3425	}
3426	);
3427	assert_eq!(
3428	parser(r"a{]}").parse().unwrap_err(),
3429	TestError {
3430	span: span(`2`..`2`),
3431	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3432	}
3433	);
3434	assert_eq!(
3435	parser(r"a{1,]}").parse().unwrap_err(),
3436	TestError {
3437	span: span(`4`..`4`),
3438	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3439	}
3440	);
3441	assert_eq!(
3442	parser(r"a{").parse().unwrap_err(),
3443	TestError {
3444	span: span(`1`..`2`),
3445	kind: ast::ErrorKind::RepetitionCountUnclosed,
3446	}
3447	);
3448	assert_eq!(
3449	parser(r"a{}").parse().unwrap_err(),
3450	TestError {
3451	span: span(`2`..`2`),
3452	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3453	}
3454	);
3455	assert_eq!(
3456	parser(r"a{a").parse().unwrap_err(),
3457	TestError {
3458	span: span(`2`..`2`),
3459	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3460	}
3461	);
3462	assert_eq!(
3463	parser(r"a{9999999999}").parse().unwrap_err(),
3464	TestError {
3465	span: span(`2`..`12`),
3466	kind: ast::ErrorKind::DecimalInvalid,
3467	}
3468	);
3469	assert_eq!(
3470	parser(r"a{9").parse().unwrap_err(),
3471	TestError {
3472	span: span(`1`..`3`),
3473	kind: ast::ErrorKind::RepetitionCountUnclosed,
3474	}
3475	);
3476	assert_eq!(
3477	parser(r"a{9,a").parse().unwrap_err(),
3478	TestError {
3479	span: span(`4`..`4`),
3480	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3481	}
3482	);
3483	assert_eq!(
3484	parser(r"a{9,9999999999}").parse().unwrap_err(),
3485	TestError {
3486	span: span(`4`..`14`),
3487	kind: ast::ErrorKind::DecimalInvalid,
3488	}
3489	);
3490	assert_eq!(
3491	parser(r"a{9,").parse().unwrap_err(),
3492	TestError {
3493	span: span(`1`..`4`),
3494	kind: ast::ErrorKind::RepetitionCountUnclosed,
3495	}
3496	);
3497	assert_eq!(
3498	parser(r"a{9,11").parse().unwrap_err(),
3499	TestError {
3500	span: span(`1`..`6`),
3501	kind: ast::ErrorKind::RepetitionCountUnclosed,
3502	}
3503	);
3504	assert_eq!(
3505	parser(r"a{2,1}").parse().unwrap_err(),
3506	TestError {
3507	span: span(`1`..`6`),
3508	kind: ast::ErrorKind::RepetitionCountInvalid,
3509	}
3510	);
3511	assert_eq!(
3512	parser(r"{5}").parse().unwrap_err(),
3513	TestError {
3514	span: span(`0`..`0`),
3515	kind: ast::ErrorKind::RepetitionMissing,
3516	}
3517	);
3518	assert_eq!(
3519	parser(r"\|{5}").parse().unwrap_err(),
3520	TestError {
3521	span: span(`1`..`1`),
3522	kind: ast::ErrorKind::RepetitionMissing,
3523	}
3524	);
3525	}
3526
3527	#[test]
3528	fn parse_alternate() {
3529	assert_eq!(
3530	parser(r"a\|b").parse(),
3531	Ok(Ast::alternation(ast::Alternation {
3532	span: span(`0`..`3`),
3533	asts: vec![lit('a', `0`), lit('b', `2`)],
3534	}))
3535	);
3536	assert_eq!(
3537	parser(r"(a\|b)").parse(),
3538	Ok(group(
3539	`0`..`5`,
3540	`1`,
3541	Ast::alternation(ast::Alternation {
3542	span: span(`1`..`4`),
3543	asts: vec![lit('a', `1`), lit('b', `3`)],
3544	})
3545	))
3546	);
3547
3548	assert_eq!(
3549	parser(r"a\|b\|c").parse(),
3550	Ok(Ast::alternation(ast::Alternation {
3551	span: span(`0`..`5`),
3552	asts: vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)],
3553	}))
3554	);
3555	assert_eq!(
3556	parser(r"ax\|by\|cz").parse(),
3557	Ok(Ast::alternation(ast::Alternation {
3558	span: span(`0`..`8`),
3559	asts: vec![
3560	concat(`0`..`2`, vec![lit('a', `0`), lit('x', `1`)]),
3561	concat(`3`..`5`, vec![lit('b', `3`), lit('y', `4`)]),
3562	concat(`6`..`8`, vec![lit('c', `6`), lit('z', `7`)]),
3563	],
3564	}))
3565	);
3566	assert_eq!(
3567	parser(r"(ax\|by\|cz)").parse(),
3568	Ok(group(
3569	`0`..`10`,
3570	`1`,
3571	Ast::alternation(ast::Alternation {
3572	span: span(`1`..`9`),
3573	asts: vec![
3574	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3575	concat(`4`..`6`, vec![lit('b', `4`), lit('y', `5`)]),
3576	concat(`7`..`9`, vec![lit('c', `7`), lit('z', `8`)]),
3577	],
3578	})
3579	))
3580	);
3581	assert_eq!(
3582	parser(r"(ax\|(by\|(cz)))").parse(),
3583	Ok(group(
3584	`0`..`14`,
3585	`1`,
3586	alt(
3587	`1`..`13`,
3588	vec![
3589	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3590	group(
3591	`4`..`13`,
3592	`2`,
3593	alt(
3594	`5`..`12`,
3595	vec![
3596	concat(
3597	`5`..`7`,
3598	vec![lit('b', `5`), lit('y', `6`)]
3599	),
3600	group(
3601	`8`..`12`,
3602	`3`,
3603	concat(
3604	`9`..`11`,
3605	vec![lit('c', `9`), lit('z', `10`),]
3606	)
3607	),
3608	]
3609	)
3610	),
3611	]
3612	)
3613	))
3614	);
3615
3616	assert_eq!(
3617	parser(r"\|").parse(),
3618	Ok(alt(
3619	`0`..`1`,
3620	vec![Ast::empty(span(`0`..`0`)), Ast::empty(span(`1`..`1`)),]
3621	))
3622	);
3623	assert_eq!(
3624	parser(r"\|\|").parse(),
3625	Ok(alt(
3626	`0`..`2`,
3627	vec![
3628	Ast::empty(span(`0`..`0`)),
3629	Ast::empty(span(`1`..`1`)),
3630	Ast::empty(span(`2`..`2`)),
3631	]
3632	))
3633	);
3634	assert_eq!(
3635	parser(r"a\|").parse(),
3636	Ok(alt(`0`..`2`, vec![lit('a', `0`), Ast::empty(span(`2`..`2`)),]))
3637	);
3638	assert_eq!(
3639	parser(r"\|a").parse(),
3640	Ok(alt(`0`..`2`, vec![Ast::empty(span(`0`..`0`)), lit('a', `1`),]))
3641	);
3642
3643	assert_eq!(
3644	parser(r"(\|)").parse(),
3645	Ok(group(
3646	`0`..`3`,
3647	`1`,
3648	alt(
3649	`1`..`2`,
3650	vec![Ast::empty(span(`1`..`1`)), Ast::empty(span(`2`..`2`)),]
3651	)
3652	))
3653	);
3654	assert_eq!(
3655	parser(r"(a\|)").parse(),
3656	Ok(group(
3657	`0`..`4`,
3658	`1`,
3659	alt(`1`..`3`, vec![lit('a', `1`), Ast::empty(span(`3`..`3`)),])
3660	))
3661	);
3662	assert_eq!(
3663	parser(r"(\|a)").parse(),
3664	Ok(group(
3665	`0`..`4`,
3666	`1`,
3667	alt(`1`..`3`, vec![Ast::empty(span(`1`..`1`)), lit('a', `2`),])
3668	))
3669	);
3670
3671	assert_eq!(
3672	parser(r"a\|b)").parse().unwrap_err(),
3673	TestError {
3674	span: span(`3`..`4`),
3675	kind: ast::ErrorKind::GroupUnopened,
3676	}
3677	);
3678	assert_eq!(
3679	parser(r"(a\|b").parse().unwrap_err(),
3680	TestError {
3681	span: span(`0`..`1`),
3682	kind: ast::ErrorKind::GroupUnclosed,
3683	}
3684	);
3685	}
3686
3687	#[test]
3688	fn parse_unsupported_lookaround() {
3689	assert_eq!(
3690	parser(r"(?=a)").parse().unwrap_err(),
3691	TestError {
3692	span: span(`0`..`3`),
3693	kind: ast::ErrorKind::UnsupportedLookAround,
3694	}
3695	);
3696	assert_eq!(
3697	parser(r"(?!a)").parse().unwrap_err(),
3698	TestError {
3699	span: span(`0`..`3`),
3700	kind: ast::ErrorKind::UnsupportedLookAround,
3701	}
3702	);
3703	assert_eq!(
3704	parser(r"(?<=a)").parse().unwrap_err(),
3705	TestError {
3706	span: span(`0`..`4`),
3707	kind: ast::ErrorKind::UnsupportedLookAround,
3708	}
3709	);
3710	assert_eq!(
3711	parser(r"(?<!a)").parse().unwrap_err(),
3712	TestError {
3713	span: span(`0`..`4`),
3714	kind: ast::ErrorKind::UnsupportedLookAround,
3715	}
3716	);
3717	}
3718
3719	#[test]
3720	fn parse_group() {
3721	assert_eq!(
3722	parser("(?i)").parse(),
3723	Ok(Ast::flags(ast::SetFlags {
3724	span: span(`0`..`4`),
3725	flags: ast::Flags {
3726	span: span(`2`..`3`),
3727	items: vec![ast::FlagsItem {
3728	span: span(`2`..`3`),
3729	kind: ast::FlagsItemKind::Flag(
3730	ast::Flag::CaseInsensitive
3731	),
3732	}],
3733	},
3734	}))
3735	);
3736	assert_eq!(
3737	parser("(?iU)").parse(),
3738	Ok(Ast::flags(ast::SetFlags {
3739	span: span(`0`..`5`),
3740	flags: ast::Flags {
3741	span: span(`2`..`4`),
3742	items: vec![
3743	ast::FlagsItem {
3744	span: span(`2`..`3`),
3745	kind: ast::FlagsItemKind::Flag(
3746	ast::Flag::CaseInsensitive
3747	),
3748	},
3749	ast::FlagsItem {
3750	span: span(`3`..`4`),
3751	kind: ast::FlagsItemKind::Flag(
3752	ast::Flag::SwapGreed
3753	),
3754	},
3755	],
3756	},
3757	}))
3758	);
3759	assert_eq!(
3760	parser("(?i-U)").parse(),
3761	Ok(Ast::flags(ast::SetFlags {
3762	span: span(`0`..`6`),
3763	flags: ast::Flags {
3764	span: span(`2`..`5`),
3765	items: vec![
3766	ast::FlagsItem {
3767	span: span(`2`..`3`),
3768	kind: ast::FlagsItemKind::Flag(
3769	ast::Flag::CaseInsensitive
3770	),
3771	},
3772	ast::FlagsItem {
3773	span: span(`3`..`4`),
3774	kind: ast::FlagsItemKind::Negation,
3775	},
3776	ast::FlagsItem {
3777	span: span(`4`..`5`),
3778	kind: ast::FlagsItemKind::Flag(
3779	ast::Flag::SwapGreed
3780	),
3781	},
3782	],
3783	},
3784	}))
3785	);
3786
3787	assert_eq!(
3788	parser("()").parse(),
3789	Ok(Ast::group(ast::Group {
3790	span: span(`0`..`2`),
3791	kind: ast::GroupKind::CaptureIndex(`1`),
3792	ast: Box::new(Ast::empty(span(`1`..`1`))),
3793	}))
3794	);
3795	assert_eq!(
3796	parser("(a)").parse(),
3797	Ok(Ast::group(ast::Group {
3798	span: span(`0`..`3`),
3799	kind: ast::GroupKind::CaptureIndex(`1`),
3800	ast: Box::new(lit('a', `1`)),
3801	}))
3802	);
3803	assert_eq!(
3804	parser("(())").parse(),
3805	Ok(Ast::group(ast::Group {
3806	span: span(`0`..`4`),
3807	kind: ast::GroupKind::CaptureIndex(`1`),
3808	ast: Box::new(Ast::group(ast::Group {
3809	span: span(`1`..`3`),
3810	kind: ast::GroupKind::CaptureIndex(`2`),
3811	ast: Box::new(Ast::empty(span(`2`..`2`))),
3812	})),
3813	}))
3814	);
3815
3816	assert_eq!(
3817	parser("(?:a)").parse(),
3818	Ok(Ast::group(ast::Group {
3819	span: span(`0`..`5`),
3820	kind: ast::GroupKind::NonCapturing(ast::Flags {
3821	span: span(`2`..`2`),
3822	items: vec![],
3823	}),
3824	ast: Box::new(lit('a', `3`)),
3825	}))
3826	);
3827
3828	assert_eq!(
3829	parser("(?i:a)").parse(),
3830	Ok(Ast::group(ast::Group {
3831	span: span(`0`..`6`),
3832	kind: ast::GroupKind::NonCapturing(ast::Flags {
3833	span: span(`2`..`3`),
3834	items: vec![ast::FlagsItem {
3835	span: span(`2`..`3`),
3836	kind: ast::FlagsItemKind::Flag(
3837	ast::Flag::CaseInsensitive
3838	),
3839	},],
3840	}),
3841	ast: Box::new(lit('a', `4`)),
3842	}))
3843	);
3844	assert_eq!(
3845	parser("(?i-U:a)").parse(),
3846	Ok(Ast::group(ast::Group {
3847	span: span(`0`..`8`),
3848	kind: ast::GroupKind::NonCapturing(ast::Flags {
3849	span: span(`2`..`5`),
3850	items: vec![
3851	ast::FlagsItem {
3852	span: span(`2`..`3`),
3853	kind: ast::FlagsItemKind::Flag(
3854	ast::Flag::CaseInsensitive
3855	),
3856	},
3857	ast::FlagsItem {
3858	span: span(`3`..`4`),
3859	kind: ast::FlagsItemKind::Negation,
3860	},
3861	ast::FlagsItem {
3862	span: span(`4`..`5`),
3863	kind: ast::FlagsItemKind::Flag(
3864	ast::Flag::SwapGreed
3865	),
3866	},
3867	],
3868	}),
3869	ast: Box::new(lit('a', `6`)),
3870	}))
3871	);
3872
3873	assert_eq!(
3874	parser("(").parse().unwrap_err(),
3875	TestError {
3876	span: span(`0`..`1`),
3877	kind: ast::ErrorKind::GroupUnclosed,
3878	}
3879	);
3880	assert_eq!(
3881	parser("(?").parse().unwrap_err(),
3882	TestError {
3883	span: span(`0`..`1`),
3884	kind: ast::ErrorKind::GroupUnclosed,
3885	}
3886	);
3887	assert_eq!(
3888	parser("(?P").parse().unwrap_err(),
3889	TestError {
3890	span: span(`2`..`3`),
3891	kind: ast::ErrorKind::FlagUnrecognized,
3892	}
3893	);
3894	assert_eq!(
3895	parser("(?P<").parse().unwrap_err(),
3896	TestError {
3897	span: span(`4`..`4`),
3898	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3899	}
3900	);
3901	assert_eq!(
3902	parser("(a").parse().unwrap_err(),
3903	TestError {
3904	span: span(`0`..`1`),
3905	kind: ast::ErrorKind::GroupUnclosed,
3906	}
3907	);
3908	assert_eq!(
3909	parser("(()").parse().unwrap_err(),
3910	TestError {
3911	span: span(`0`..`1`),
3912	kind: ast::ErrorKind::GroupUnclosed,
3913	}
3914	);
3915	assert_eq!(
3916	parser(")").parse().unwrap_err(),
3917	TestError {
3918	span: span(`0`..`1`),
3919	kind: ast::ErrorKind::GroupUnopened,
3920	}
3921	);
3922	assert_eq!(
3923	parser("a)").parse().unwrap_err(),
3924	TestError {
3925	span: span(`1`..`2`),
3926	kind: ast::ErrorKind::GroupUnopened,
3927	}
3928	);
3929	}
3930
3931	#[test]
3932	fn parse_capture_name() {
3933	assert_eq!(
3934	parser("(?<a>z)").parse(),
3935	Ok(Ast::group(ast::Group {
3936	span: span(`0`..`7`),
3937	kind: ast::GroupKind::CaptureName {
3938	starts_with_p: `false`,
3939	name: ast::CaptureName {
3940	span: span(`3`..`4`),
3941	name: s("a"),
3942	index: `1`,
3943	}
3944	},
3945	ast: Box::new(lit('z', `5`)),
3946	}))
3947	);
3948	assert_eq!(
3949	parser("(?P<a>z)").parse(),
3950	Ok(Ast::group(ast::Group {
3951	span: span(`0`..`8`),
3952	kind: ast::GroupKind::CaptureName {
3953	starts_with_p: `true`,
3954	name: ast::CaptureName {
3955	span: span(`4`..`5`),
3956	name: s("a"),
3957	index: `1`,
3958	}
3959	},
3960	ast: Box::new(lit('z', `6`)),
3961	}))
3962	);
3963	assert_eq!(
3964	parser("(?P<abc>z)").parse(),
3965	Ok(Ast::group(ast::Group {
3966	span: span(`0`..`10`),
3967	kind: ast::GroupKind::CaptureName {
3968	starts_with_p: `true`,
3969	name: ast::CaptureName {
3970	span: span(`4`..`7`),
3971	name: s("abc"),
3972	index: `1`,
3973	}
3974	},
3975	ast: Box::new(lit('z', `8`)),
3976	}))
3977	);
3978
3979	assert_eq!(
3980	parser("(?P<a_1>z)").parse(),
3981	Ok(Ast::group(ast::Group {
3982	span: span(`0`..`10`),
3983	kind: ast::GroupKind::CaptureName {
3984	starts_with_p: `true`,
3985	name: ast::CaptureName {
3986	span: span(`4`..`7`),
3987	name: s("a_1"),
3988	index: `1`,
3989	}
3990	},
3991	ast: Box::new(lit('z', `8`)),
3992	}))
3993	);
3994
3995	assert_eq!(
3996	parser("(?P<a.1>z)").parse(),
3997	Ok(Ast::group(ast::Group {
3998	span: span(`0`..`10`),
3999	kind: ast::GroupKind::CaptureName {
4000	starts_with_p: `true`,
4001	name: ast::CaptureName {
4002	span: span(`4`..`7`),
4003	name: s("a.1"),
4004	index: `1`,
4005	}
4006	},
4007	ast: Box::new(lit('z', `8`)),
4008	}))
4009	);
4010
4011	assert_eq!(
4012	parser("(?P<a[1]>z)").parse(),
4013	Ok(Ast::group(ast::Group {
4014	span: span(`0`..`11`),
4015	kind: ast::GroupKind::CaptureName {
4016	starts_with_p: `true`,
4017	name: ast::CaptureName {
4018	span: span(`4`..`8`),
4019	name: s("a[1]"),
4020	index: `1`,
4021	}
4022	},
4023	ast: Box::new(lit('z', `9`)),
4024	}))
4025	);
4026
4027	assert_eq!(
4028	parser("(?P<a¾>)").parse(),
4029	Ok(Ast::group(ast::Group {
4030	span: Span::new(
4031	Position::new(`0`, `1`, `1`),
4032	Position::new(`9`, `1`, `9`),
4033	),
4034	kind: ast::GroupKind::CaptureName {
4035	starts_with_p: `true`,
4036	name: ast::CaptureName {
4037	span: Span::new(
4038	Position::new(`4`, `1`, `5`),
4039	Position::new(`7`, `1`, `7`),
4040	),
4041	name: s("a¾"),
4042	index: `1`,
4043	}
4044	},
4045	ast: Box::new(Ast::empty(Span::new(
4046	Position::new(`8`, `1`, `8`),
4047	Position::new(`8`, `1`, `8`),
4048	))),
4049	}))
4050	);
4051	assert_eq!(
4052	parser("(?P<名字>)").parse(),
4053	Ok(Ast::group(ast::Group {
4054	span: Span::new(
4055	Position::new(`0`, `1`, `1`),
4056	Position::new(`12`, `1`, `9`),
4057	),
4058	kind: ast::GroupKind::CaptureName {
4059	starts_with_p: `true`,
4060	name: ast::CaptureName {
4061	span: Span::new(
4062	Position::new(`4`, `1`, `5`),
4063	Position::new(`10`, `1`, `7`),
4064	),
4065	name: s("名字"),
4066	index: `1`,
4067	}
4068	},
4069	ast: Box::new(Ast::empty(Span::new(
4070	Position::new(`11`, `1`, `8`),
4071	Position::new(`11`, `1`, `8`),
4072	))),
4073	}))
4074	);
4075
4076	assert_eq!(
4077	parser("(?P<").parse().unwrap_err(),
4078	TestError {
4079	span: span(`4`..`4`),
4080	kind: ast::ErrorKind::GroupNameUnexpectedEof,
4081	}
4082	);
4083	assert_eq!(
4084	parser("(?P<>z)").parse().unwrap_err(),
4085	TestError {
4086	span: span(`4`..`4`),
4087	kind: ast::ErrorKind::GroupNameEmpty,
4088	}
4089	);
4090	assert_eq!(
4091	parser("(?P<a").parse().unwrap_err(),
4092	TestError {
4093	span: span(`5`..`5`),
4094	kind: ast::ErrorKind::GroupNameUnexpectedEof,
4095	}
4096	);
4097	assert_eq!(
4098	parser("(?P<ab").parse().unwrap_err(),
4099	TestError {
4100	span: span(`6`..`6`),
4101	kind: ast::ErrorKind::GroupNameUnexpectedEof,
4102	}
4103	);
4104	assert_eq!(
4105	parser("(?P<0a").parse().unwrap_err(),
4106	TestError {
4107	span: span(`4`..`5`),
4108	kind: ast::ErrorKind::GroupNameInvalid,
4109	}
4110	);
4111	assert_eq!(
4112	parser("(?P<~").parse().unwrap_err(),
4113	TestError {
4114	span: span(`4`..`5`),
4115	kind: ast::ErrorKind::GroupNameInvalid,
4116	}
4117	);
4118	assert_eq!(
4119	parser("(?P<abc~").parse().unwrap_err(),
4120	TestError {
4121	span: span(`7`..`8`),
4122	kind: ast::ErrorKind::GroupNameInvalid,
4123	}
4124	);
4125	assert_eq!(
4126	parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
4127	TestError {
4128	span: span(`12`..`13`),
4129	kind: ast::ErrorKind::GroupNameDuplicate {
4130	original: span(`4`..`5`),
4131	},
4132	}
4133	);
4134	assert_eq!(
4135	parser("(?P<5>)").parse().unwrap_err(),
4136	TestError {
4137	span: span(`4`..`5`),
4138	kind: ast::ErrorKind::GroupNameInvalid,
4139	}
4140	);
4141	assert_eq!(
4142	parser("(?P<5a>)").parse().unwrap_err(),
4143	TestError {
4144	span: span(`4`..`5`),
4145	kind: ast::ErrorKind::GroupNameInvalid,
4146	}
4147	);
4148	assert_eq!(
4149	parser("(?P<¾>)").parse().unwrap_err(),
4150	TestError {
4151	span: Span::new(
4152	Position::new(`4`, `1`, `5`),
4153	Position::new(`6`, `1`, `6`),
4154	),
4155	kind: ast::ErrorKind::GroupNameInvalid,
4156	}
4157	);
4158	assert_eq!(
4159	parser("(?P<¾a>)").parse().unwrap_err(),
4160	TestError {
4161	span: Span::new(
4162	Position::new(`4`, `1`, `5`),
4163	Position::new(`6`, `1`, `6`),
4164	),
4165	kind: ast::ErrorKind::GroupNameInvalid,
4166	}
4167	);
4168	assert_eq!(
4169	parser("(?P<☃>)").parse().unwrap_err(),
4170	TestError {
4171	span: Span::new(
4172	Position::new(`4`, `1`, `5`),
4173	Position::new(`7`, `1`, `6`),
4174	),
4175	kind: ast::ErrorKind::GroupNameInvalid,
4176	}
4177	);
4178	assert_eq!(
4179	parser("(?P<a☃>)").parse().unwrap_err(),
4180	TestError {
4181	span: Span::new(
4182	Position::new(`5`, `1`, `6`),
4183	Position::new(`8`, `1`, `7`),
4184	),
4185	kind: ast::ErrorKind::GroupNameInvalid,
4186	}
4187	);
4188	}
4189
4190	#[test]
4191	fn parse_flags() {
4192	assert_eq!(
4193	parser("i:").parse_flags(),
4194	Ok(ast::Flags {
4195	span: span(`0`..`1`),
4196	items: vec![ast::FlagsItem {
4197	span: span(`0`..`1`),
4198	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4199	}],
4200	})
4201	);
4202	assert_eq!(
4203	parser("i)").parse_flags(),
4204	Ok(ast::Flags {
4205	span: span(`0`..`1`),
4206	items: vec![ast::FlagsItem {
4207	span: span(`0`..`1`),
4208	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4209	}],
4210	})
4211	);
4212
4213	assert_eq!(
4214	parser("isU:").parse_flags(),
4215	Ok(ast::Flags {
4216	span: span(`0`..`3`),
4217	items: vec![
4218	ast::FlagsItem {
4219	span: span(`0`..`1`),
4220	kind: ast::FlagsItemKind::Flag(
4221	ast::Flag::CaseInsensitive
4222	),
4223	},
4224	ast::FlagsItem {
4225	span: span(`1`..`2`),
4226	kind: ast::FlagsItemKind::Flag(
4227	ast::Flag::DotMatchesNewLine
4228	),
4229	},
4230	ast::FlagsItem {
4231	span: span(`2`..`3`),
4232	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4233	},
4234	],
4235	})
4236	);
4237
4238	assert_eq!(
4239	parser("-isU:").parse_flags(),
4240	Ok(ast::Flags {
4241	span: span(`0`..`4`),
4242	items: vec![
4243	ast::FlagsItem {
4244	span: span(`0`..`1`),
4245	kind: ast::FlagsItemKind::Negation,
4246	},
4247	ast::FlagsItem {
4248	span: span(`1`..`2`),
4249	kind: ast::FlagsItemKind::Flag(
4250	ast::Flag::CaseInsensitive
4251	),
4252	},
4253	ast::FlagsItem {
4254	span: span(`2`..`3`),
4255	kind: ast::FlagsItemKind::Flag(
4256	ast::Flag::DotMatchesNewLine
4257	),
4258	},
4259	ast::FlagsItem {
4260	span: span(`3`..`4`),
4261	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4262	},
4263	],
4264	})
4265	);
4266	assert_eq!(
4267	parser("i-sU:").parse_flags(),
4268	Ok(ast::Flags {
4269	span: span(`0`..`4`),
4270	items: vec![
4271	ast::FlagsItem {
4272	span: span(`0`..`1`),
4273	kind: ast::FlagsItemKind::Flag(
4274	ast::Flag::CaseInsensitive
4275	),
4276	},
4277	ast::FlagsItem {
4278	span: span(`1`..`2`),
4279	kind: ast::FlagsItemKind::Negation,
4280	},
4281	ast::FlagsItem {
4282	span: span(`2`..`3`),
4283	kind: ast::FlagsItemKind::Flag(
4284	ast::Flag::DotMatchesNewLine
4285	),
4286	},
4287	ast::FlagsItem {
4288	span: span(`3`..`4`),
4289	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4290	},
4291	],
4292	})
4293	);
4294	assert_eq!(
4295	parser("i-sR:").parse_flags(),
4296	Ok(ast::Flags {
4297	span: span(`0`..`4`),
4298	items: vec![
4299	ast::FlagsItem {
4300	span: span(`0`..`1`),
4301	kind: ast::FlagsItemKind::Flag(
4302	ast::Flag::CaseInsensitive
4303	),
4304	},
4305	ast::FlagsItem {
4306	span: span(`1`..`2`),
4307	kind: ast::FlagsItemKind::Negation,
4308	},
4309	ast::FlagsItem {
4310	span: span(`2`..`3`),
4311	kind: ast::FlagsItemKind::Flag(
4312	ast::Flag::DotMatchesNewLine
4313	),
4314	},
4315	ast::FlagsItem {
4316	span: span(`3`..`4`),
4317	kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
4318	},
4319	],
4320	})
4321	);
4322
4323	assert_eq!(
4324	parser("isU").parse_flags().unwrap_err(),
4325	TestError {
4326	span: span(`3`..`3`),
4327	kind: ast::ErrorKind::FlagUnexpectedEof,
4328	}
4329	);
4330	assert_eq!(
4331	parser("isUa:").parse_flags().unwrap_err(),
4332	TestError {
4333	span: span(`3`..`4`),
4334	kind: ast::ErrorKind::FlagUnrecognized,
4335	}
4336	);
4337	assert_eq!(
4338	parser("isUi:").parse_flags().unwrap_err(),
4339	TestError {
4340	span: span(`3`..`4`),
4341	kind: ast::ErrorKind::FlagDuplicate { original: span(`0`..`1`) },
4342	}
4343	);
4344	assert_eq!(
4345	parser("i-sU-i:").parse_flags().unwrap_err(),
4346	TestError {
4347	span: span(`4`..`5`),
4348	kind: ast::ErrorKind::FlagRepeatedNegation {
4349	original: span(`1`..`2`),
4350	},
4351	}
4352	);
4353	assert_eq!(
4354	parser("-)").parse_flags().unwrap_err(),
4355	TestError {
4356	span: span(`0`..`1`),
4357	kind: ast::ErrorKind::FlagDanglingNegation,
4358	}
4359	);
4360	assert_eq!(
4361	parser("i-)").parse_flags().unwrap_err(),
4362	TestError {
4363	span: span(`1`..`2`),
4364	kind: ast::ErrorKind::FlagDanglingNegation,
4365	}
4366	);
4367	assert_eq!(
4368	parser("iU-)").parse_flags().unwrap_err(),
4369	TestError {
4370	span: span(`2`..`3`),
4371	kind: ast::ErrorKind::FlagDanglingNegation,
4372	}
4373	);
4374	}
4375
4376	#[test]
4377	fn parse_flag() {
4378	assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4379	assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4380	assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4381	assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4382	assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4383	assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
4384	assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4385
4386	assert_eq!(
4387	parser("a").parse_flag().unwrap_err(),
4388	TestError {
4389	span: span(`0`..`1`),
4390	kind: ast::ErrorKind::FlagUnrecognized,
4391	}
4392	);
4393	assert_eq!(
4394	parser("☃").parse_flag().unwrap_err(),
4395	TestError {
4396	span: span_range("☃", `0`..`3`),
4397	kind: ast::ErrorKind::FlagUnrecognized,
4398	}
4399	);
4400	}
4401
4402	#[test]
4403	fn parse_primitive_non_escape() {
4404	assert_eq!(
4405	parser(r".").parse_primitive(),
4406	Ok(Primitive::Dot(span(`0`..`1`)))
4407	);
4408	assert_eq!(
4409	parser(r"^").parse_primitive(),
4410	Ok(Primitive::Assertion(ast::Assertion {
4411	span: span(`0`..`1`),
4412	kind: ast::AssertionKind::StartLine,
4413	}))
4414	);
4415	assert_eq!(
4416	parser(r"$").parse_primitive(),
4417	Ok(Primitive::Assertion(ast::Assertion {
4418	span: span(`0`..`1`),
4419	kind: ast::AssertionKind::EndLine,
4420	}))
4421	);
4422
4423	assert_eq!(
4424	parser(r"a").parse_primitive(),
4425	Ok(Primitive::Literal(ast::Literal {
4426	span: span(`0`..`1`),
4427	kind: ast::LiteralKind::Verbatim,
4428	c: 'a',
4429	}))
4430	);
4431	assert_eq!(
4432	parser(r"\|").parse_primitive(),
4433	Ok(Primitive::Literal(ast::Literal {
4434	span: span(`0`..`1`),
4435	kind: ast::LiteralKind::Verbatim,
4436	c: '\|',
4437	}))
4438	);
4439	assert_eq!(
4440	parser(r"☃").parse_primitive(),
4441	Ok(Primitive::Literal(ast::Literal {
4442	span: span_range("☃", `0`..`3`),
4443	kind: ast::LiteralKind::Verbatim,
4444	c: '☃',
4445	}))
4446	);
4447	}
4448
4449	#[test]
4450	fn parse_escape() {
4451	assert_eq!(
4452	parser(r"\\|").parse_primitive(),
4453	Ok(Primitive::Literal(ast::Literal {
4454	span: span(`0`..`2`),
4455	kind: ast::LiteralKind::Meta,
4456	c: '\|',
4457	}))
4458	);
4459	let specials = &[
4460	(r"\a", '`\x07`', ast::SpecialLiteralKind::Bell),
4461	(r"\f", '`\x0C`', ast::SpecialLiteralKind::FormFeed),
4462	(r"\t", '`\t`', ast::SpecialLiteralKind::Tab),
4463	(r"\n", '`\n`', ast::SpecialLiteralKind::LineFeed),
4464	(r"\r", '`\r`', ast::SpecialLiteralKind::CarriageReturn),
4465	(r"\v", '`\x0B`', ast::SpecialLiteralKind::VerticalTab),
4466	];
4467	for &(pat, c, ref kind) in specials {
4468	assert_eq!(
4469	parser(pat).parse_primitive(),
4470	Ok(Primitive::Literal(ast::Literal {
4471	span: span(`0`..`2`),
4472	kind: ast::LiteralKind::Special(kind.clone()),
4473	c,
4474	}))
4475	);
4476	}
4477	assert_eq!(
4478	parser(r"\A").parse_primitive(),
4479	Ok(Primitive::Assertion(ast::Assertion {
4480	span: span(`0`..`2`),
4481	kind: ast::AssertionKind::StartText,
4482	}))
4483	);
4484	assert_eq!(
4485	parser(r"\z").parse_primitive(),
4486	Ok(Primitive::Assertion(ast::Assertion {
4487	span: span(`0`..`2`),
4488	kind: ast::AssertionKind::EndText,
4489	}))
4490	);
4491	assert_eq!(
4492	parser(r"\b").parse_primitive(),
4493	Ok(Primitive::Assertion(ast::Assertion {
4494	span: span(`0`..`2`),
4495	kind: ast::AssertionKind::WordBoundary,
4496	}))
4497	);
4498	assert_eq!(
4499	parser(r"\b{start}").parse_primitive(),
4500	Ok(Primitive::Assertion(ast::Assertion {
4501	span: span(`0`..`9`),
4502	kind: ast::AssertionKind::WordBoundaryStart,
4503	}))
4504	);
4505	assert_eq!(
4506	parser(r"\b{end}").parse_primitive(),
4507	Ok(Primitive::Assertion(ast::Assertion {
4508	span: span(`0`..`7`),
4509	kind: ast::AssertionKind::WordBoundaryEnd,
4510	}))
4511	);
4512	assert_eq!(
4513	parser(r"\b{start-half}").parse_primitive(),
4514	Ok(Primitive::Assertion(ast::Assertion {
4515	span: span(`0`..`14`),
4516	kind: ast::AssertionKind::WordBoundaryStartHalf,
4517	}))
4518	);
4519	assert_eq!(
4520	parser(r"\b{end-half}").parse_primitive(),
4521	Ok(Primitive::Assertion(ast::Assertion {
4522	span: span(`0`..`12`),
4523	kind: ast::AssertionKind::WordBoundaryEndHalf,
4524	}))
4525	);
4526	assert_eq!(
4527	parser(r"\<").parse_primitive(),
4528	Ok(Primitive::Assertion(ast::Assertion {
4529	span: span(`0`..`2`),
4530	kind: ast::AssertionKind::WordBoundaryStartAngle,
4531	}))
4532	);
4533	assert_eq!(
4534	parser(r"\>").parse_primitive(),
4535	Ok(Primitive::Assertion(ast::Assertion {
4536	span: span(`0`..`2`),
4537	kind: ast::AssertionKind::WordBoundaryEndAngle,
4538	}))
4539	);
4540	assert_eq!(
4541	parser(r"\B").parse_primitive(),
4542	Ok(Primitive::Assertion(ast::Assertion {
4543	span: span(`0`..`2`),
4544	kind: ast::AssertionKind::NotWordBoundary,
4545	}))
4546	);
4547
4548	// We also support superfluous escapes in most cases now too.
4549	for c in ['!', '@', '%', '"', '`\'`', '/', ' '] {
4550	let pat = format!(r"\{}", c);
4551	assert_eq!(
4552	parser(&pat).parse_primitive(),
4553	Ok(Primitive::Literal(ast::Literal {
4554	span: span(`0`..`2`),
4555	kind: ast::LiteralKind::Superfluous,
4556	c,
4557	}))
4558	);
4559	}
4560
4561	// Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
4562	// gives flexibility for future evolution.
4563	assert_eq!(
4564	parser(r"\e").parse_escape().unwrap_err(),
4565	TestError {
4566	span: span(`0`..`2`),
4567	kind: ast::ErrorKind::EscapeUnrecognized,
4568	}
4569	);
4570	assert_eq!(
4571	parser(r"\y").parse_escape().unwrap_err(),
4572	TestError {
4573	span: span(`0`..`2`),
4574	kind: ast::ErrorKind::EscapeUnrecognized,
4575	}
4576	);
4577
4578	// Starting a special word boundary without any non-whitespace chars
4579	// after the brace makes it ambiguous whether the user meant to write
4580	// a counted repetition (probably not?) or an actual special word
4581	// boundary assertion.
4582	assert_eq!(
4583	parser(r"\b{").parse_escape().unwrap_err(),
4584	TestError {
4585	span: span(`0`..`3`),
4586	kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4587	}
4588	);
4589	assert_eq!(
4590	parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
4591	TestError {
4592	span: span(`0`..`4`),
4593	kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4594	}
4595	);
4596	// When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
4597	// and thus causes the parser to treat it as a counted repetition.
4598	assert_eq!(
4599	parser(r"\b{ ").parse().unwrap_err(),
4600	TestError {
4601	span: span(`4`..`4`),
4602	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
4603	}
4604	);
4605	// In this case, we got some valid chars that makes it look like the
4606	// user is writing one of the special word boundary assertions, but
4607	// we forget to close the brace.
4608	assert_eq!(
4609	parser(r"\b{foo").parse_escape().unwrap_err(),
4610	TestError {
4611	span: span(`2`..`6`),
4612	kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4613	}
4614	);
4615	// We get the same error as above, except it is provoked by seeing a
4616	// char that we know is invalid before seeing a closing brace.
4617	assert_eq!(
4618	parser(r"\b{foo!}").parse_escape().unwrap_err(),
4619	TestError {
4620	span: span(`2`..`6`),
4621	kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4622	}
4623	);
4624	// And this one occurs when, syntactically, everything looks okay, but
4625	// we don't use a valid spelling of a word boundary assertion.
4626	assert_eq!(
4627	parser(r"\b{foo}").parse_escape().unwrap_err(),
4628	TestError {
4629	span: span(`3`..`6`),
4630	kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
4631	}
4632	);
4633
4634	// An unfinished escape is illegal.
4635	assert_eq!(
4636	parser(r"\").parse_escape().unwrap_err(),
4637	TestError {
4638	span: span(`0`..`1`),
4639	kind: ast::ErrorKind::EscapeUnexpectedEof,
4640	}
4641	);
4642	}
4643
4644	#[test]
4645	fn parse_unsupported_backreference() {
4646	assert_eq!(
4647	parser(r"\0").parse_escape().unwrap_err(),
4648	TestError {
4649	span: span(`0`..`2`),
4650	kind: ast::ErrorKind::UnsupportedBackreference,
4651	}
4652	);
4653	assert_eq!(
4654	parser(r"\9").parse_escape().unwrap_err(),
4655	TestError {
4656	span: span(`0`..`2`),
4657	kind: ast::ErrorKind::UnsupportedBackreference,
4658	}
4659	);
4660	}
4661
4662	#[test]
4663	fn parse_octal() {
4664	for i in `0`..`511` {
4665	let pat = format!(r"\{:o}", i);
4666	assert_eq!(
4667	parser_octal(&pat).parse_escape(),
4668	Ok(Primitive::Literal(ast::Literal {
4669	span: span(`0`..pat.len()),
4670	kind: ast::LiteralKind::Octal,
4671	c: char::from_u32(i).unwrap(),
4672	}))
4673	);
4674	}
4675	assert_eq!(
4676	parser_octal(r"\778").parse_escape(),
4677	Ok(Primitive::Literal(ast::Literal {
4678	span: span(`0`..`3`),
4679	kind: ast::LiteralKind::Octal,
4680	c: '?',
4681	}))
4682	);
4683	assert_eq!(
4684	parser_octal(r"\7777").parse_escape(),
4685	Ok(Primitive::Literal(ast::Literal {
4686	span: span(`0`..`4`),
4687	kind: ast::LiteralKind::Octal,
4688	c: '`\u{01FF}`',
4689	}))
4690	);
4691	assert_eq!(
4692	parser_octal(r"\778").parse(),
4693	Ok(Ast::concat(ast::Concat {
4694	span: span(`0`..`4`),
4695	asts: vec![
4696	Ast::literal(ast::Literal {
4697	span: span(`0`..`3`),
4698	kind: ast::LiteralKind::Octal,
4699	c: '?',
4700	}),
4701	Ast::literal(ast::Literal {
4702	span: span(`3`..`4`),
4703	kind: ast::LiteralKind::Verbatim,
4704	c: '8',
4705	}),
4706	],
4707	}))
4708	);
4709	assert_eq!(
4710	parser_octal(r"\7777").parse(),
4711	Ok(Ast::concat(ast::Concat {
4712	span: span(`0`..`5`),
4713	asts: vec![
4714	Ast::literal(ast::Literal {
4715	span: span(`0`..`4`),
4716	kind: ast::LiteralKind::Octal,
4717	c: '`\u{01FF}`',
4718	}),
4719	Ast::literal(ast::Literal {
4720	span: span(`4`..`5`),
4721	kind: ast::LiteralKind::Verbatim,
4722	c: '7',
4723	}),
4724	],
4725	}))
4726	);
4727
4728	assert_eq!(
4729	parser_octal(r"\8").parse_escape().unwrap_err(),
4730	TestError {
4731	span: span(`0`..`2`),
4732	kind: ast::ErrorKind::EscapeUnrecognized,
4733	}
4734	);
4735	}
4736
4737	#[test]
4738	fn parse_hex_two() {
4739	for i in `0`..`256` {
4740	let pat = format!(r"\x{:02x}", i);
4741	assert_eq!(
4742	parser(&pat).parse_escape(),
4743	Ok(Primitive::Literal(ast::Literal {
4744	span: span(`0`..pat.len()),
4745	kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4746	c: char::from_u32(i).unwrap(),
4747	}))
4748	);
4749	}
4750
4751	assert_eq!(
4752	parser(r"\xF").parse_escape().unwrap_err(),
4753	TestError {
4754	span: span(`3`..`3`),
4755	kind: ast::ErrorKind::EscapeUnexpectedEof,
4756	}
4757	);
4758	assert_eq!(
4759	parser(r"\xG").parse_escape().unwrap_err(),
4760	TestError {
4761	span: span(`2`..`3`),
4762	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4763	}
4764	);
4765	assert_eq!(
4766	parser(r"\xFG").parse_escape().unwrap_err(),
4767	TestError {
4768	span: span(`3`..`4`),
4769	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4770	}
4771	);
4772	}
4773
4774	#[test]
4775	fn parse_hex_four() {
4776	for i in `0`..`65536` {
4777	let c = match char::from_u32(i) {
4778	None => continue,
4779	Some(c) => c,
4780	};
4781	let pat = format!(r"\u{:04x}", i);
4782	assert_eq!(
4783	parser(&pat).parse_escape(),
4784	Ok(Primitive::Literal(ast::Literal {
4785	span: span(`0`..pat.len()),
4786	kind: ast::LiteralKind::HexFixed(
4787	ast::HexLiteralKind::UnicodeShort
4788	),
4789	c,
4790	}))
4791	);
4792	}
4793
4794	assert_eq!(
4795	parser(r"\uF").parse_escape().unwrap_err(),
4796	TestError {
4797	span: span(`3`..`3`),
4798	kind: ast::ErrorKind::EscapeUnexpectedEof,
4799	}
4800	);
4801	assert_eq!(
4802	parser(r"\uG").parse_escape().unwrap_err(),
4803	TestError {
4804	span: span(`2`..`3`),
4805	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4806	}
4807	);
4808	assert_eq!(
4809	parser(r"\uFG").parse_escape().unwrap_err(),
4810	TestError {
4811	span: span(`3`..`4`),
4812	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4813	}
4814	);
4815	assert_eq!(
4816	parser(r"\uFFG").parse_escape().unwrap_err(),
4817	TestError {
4818	span: span(`4`..`5`),
4819	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4820	}
4821	);
4822	assert_eq!(
4823	parser(r"\uFFFG").parse_escape().unwrap_err(),
4824	TestError {
4825	span: span(`5`..`6`),
4826	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4827	}
4828	);
4829	assert_eq!(
4830	parser(r"\uD800").parse_escape().unwrap_err(),
4831	TestError {
4832	span: span(`2`..`6`),
4833	kind: ast::ErrorKind::EscapeHexInvalid,
4834	}
4835	);
4836	}
4837
4838	#[test]
4839	fn parse_hex_eight() {
4840	for i in `0`..`65536` {
4841	let c = match char::from_u32(i) {
4842	None => continue,
4843	Some(c) => c,
4844	};
4845	let pat = format!(r"\U{:08x}", i);
4846	assert_eq!(
4847	parser(&pat).parse_escape(),
4848	Ok(Primitive::Literal(ast::Literal {
4849	span: span(`0`..pat.len()),
4850	kind: ast::LiteralKind::HexFixed(
4851	ast::HexLiteralKind::UnicodeLong
4852	),
4853	c,
4854	}))
4855	);
4856	}
4857
4858	assert_eq!(
4859	parser(r"\UF").parse_escape().unwrap_err(),
4860	TestError {
4861	span: span(`3`..`3`),
4862	kind: ast::ErrorKind::EscapeUnexpectedEof,
4863	}
4864	);
4865	assert_eq!(
4866	parser(r"\UG").parse_escape().unwrap_err(),
4867	TestError {
4868	span: span(`2`..`3`),
4869	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4870	}
4871	);
4872	assert_eq!(
4873	parser(r"\UFG").parse_escape().unwrap_err(),
4874	TestError {
4875	span: span(`3`..`4`),
4876	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4877	}
4878	);
4879	assert_eq!(
4880	parser(r"\UFFG").parse_escape().unwrap_err(),
4881	TestError {
4882	span: span(`4`..`5`),
4883	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4884	}
4885	);
4886	assert_eq!(
4887	parser(r"\UFFFG").parse_escape().unwrap_err(),
4888	TestError {
4889	span: span(`5`..`6`),
4890	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4891	}
4892	);
4893	assert_eq!(
4894	parser(r"\UFFFFG").parse_escape().unwrap_err(),
4895	TestError {
4896	span: span(`6`..`7`),
4897	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4898	}
4899	);
4900	assert_eq!(
4901	parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4902	TestError {
4903	span: span(`7`..`8`),
4904	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4905	}
4906	);
4907	assert_eq!(
4908	parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4909	TestError {
4910	span: span(`8`..`9`),
4911	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4912	}
4913	);
4914	assert_eq!(
4915	parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4916	TestError {
4917	span: span(`9`..`10`),
4918	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4919	}
4920	);
4921	}
4922
4923	#[test]
4924	fn parse_hex_brace() {
4925	assert_eq!(
4926	parser(r"\u{26c4}").parse_escape(),
4927	Ok(Primitive::Literal(ast::Literal {
4928	span: span(`0`..`8`),
4929	kind: ast::LiteralKind::HexBrace(
4930	ast::HexLiteralKind::UnicodeShort
4931	),
4932	c: '⛄',
4933	}))
4934	);
4935	assert_eq!(
4936	parser(r"\U{26c4}").parse_escape(),
4937	Ok(Primitive::Literal(ast::Literal {
4938	span: span(`0`..`8`),
4939	kind: ast::LiteralKind::HexBrace(
4940	ast::HexLiteralKind::UnicodeLong
4941	),
4942	c: '⛄',
4943	}))
4944	);
4945	assert_eq!(
4946	parser(r"\x{26c4}").parse_escape(),
4947	Ok(Primitive::Literal(ast::Literal {
4948	span: span(`0`..`8`),
4949	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4950	c: '⛄',
4951	}))
4952	);
4953	assert_eq!(
4954	parser(r"\x{26C4}").parse_escape(),
4955	Ok(Primitive::Literal(ast::Literal {
4956	span: span(`0`..`8`),
4957	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4958	c: '⛄',
4959	}))
4960	);
4961	assert_eq!(
4962	parser(r"\x{10fFfF}").parse_escape(),
4963	Ok(Primitive::Literal(ast::Literal {
4964	span: span(`0`..`10`),
4965	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4966	c: '`\u{10FFFF}`',
4967	}))
4968	);
4969
4970	assert_eq!(
4971	parser(r"\x").parse_escape().unwrap_err(),
4972	TestError {
4973	span: span(`2`..`2`),
4974	kind: ast::ErrorKind::EscapeUnexpectedEof,
4975	}
4976	);
4977	assert_eq!(
4978	parser(r"\x{").parse_escape().unwrap_err(),
4979	TestError {
4980	span: span(`2`..`3`),
4981	kind: ast::ErrorKind::EscapeUnexpectedEof,
4982	}
4983	);
4984	assert_eq!(
4985	parser(r"\x{FF").parse_escape().unwrap_err(),
4986	TestError {
4987	span: span(`2`..`5`),
4988	kind: ast::ErrorKind::EscapeUnexpectedEof,
4989	}
4990	);
4991	assert_eq!(
4992	parser(r"\x{}").parse_escape().unwrap_err(),
4993	TestError {
4994	span: span(`2`..`4`),
4995	kind: ast::ErrorKind::EscapeHexEmpty,
4996	}
4997	);
4998	assert_eq!(
4999	parser(r"\x{FGF}").parse_escape().unwrap_err(),
5000	TestError {
5001	span: span(`4`..`5`),
5002	kind: ast::ErrorKind::EscapeHexInvalidDigit,
5003	}
5004	);
5005	assert_eq!(
5006	parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
5007	TestError {
5008	span: span(`3`..`9`),
5009	kind: ast::ErrorKind::EscapeHexInvalid,
5010	}
5011	);
5012	assert_eq!(
5013	parser(r"\x{D800}").parse_escape().unwrap_err(),
5014	TestError {
5015	span: span(`3`..`7`),
5016	kind: ast::ErrorKind::EscapeHexInvalid,
5017	}
5018	);
5019	assert_eq!(
5020	parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
5021	TestError {
5022	span: span(`3`..`12`),
5023	kind: ast::ErrorKind::EscapeHexInvalid,
5024	}
5025	);
5026	}
5027
5028	#[test]
5029	fn parse_decimal() {
5030	assert_eq!(parser("123").parse_decimal(), Ok(`123`));
5031	assert_eq!(parser("0").parse_decimal(), Ok(`0`));
5032	assert_eq!(parser("01").parse_decimal(), Ok(`1`));
5033
5034	assert_eq!(
5035	parser("-1").parse_decimal().unwrap_err(),
5036	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
5037	);
5038	assert_eq!(
5039	parser("").parse_decimal().unwrap_err(),
5040	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
5041	);
5042	assert_eq!(
5043	parser("9999999999").parse_decimal().unwrap_err(),
5044	TestError {
5045	span: span(`0`..`10`),
5046	kind: ast::ErrorKind::DecimalInvalid,
5047	}
5048	);
5049	}
5050
5051	#[test]
5052	fn parse_set_class() {
5053	fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
5054	ast::ClassSet::union(ast::ClassSetUnion { span, items })
5055	}
5056
5057	fn intersection(
5058	span: Span,
5059	lhs: ast::ClassSet,
5060	rhs: ast::ClassSet,
5061	) -> ast::ClassSet {
5062	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5063	span,
5064	kind: ast::ClassSetBinaryOpKind::Intersection,
5065	lhs: Box::new(lhs),
5066	rhs: Box::new(rhs),
5067	})
5068	}
5069
5070	fn difference(
5071	span: Span,
5072	lhs: ast::ClassSet,
5073	rhs: ast::ClassSet,
5074	) -> ast::ClassSet {
5075	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5076	span,
5077	kind: ast::ClassSetBinaryOpKind::Difference,
5078	lhs: Box::new(lhs),
5079	rhs: Box::new(rhs),
5080	})
5081	}
5082
5083	fn symdifference(
5084	span: Span,
5085	lhs: ast::ClassSet,
5086	rhs: ast::ClassSet,
5087	) -> ast::ClassSet {
5088	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5089	span,
5090	kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
5091	lhs: Box::new(lhs),
5092	rhs: Box::new(rhs),
5093	})
5094	}
5095
5096	fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
5097	ast::ClassSet::Item(item)
5098	}
5099
5100	fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
5101	ast::ClassSetItem::Ascii(cls)
5102	}
5103
5104	fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
5105	ast::ClassSetItem::Unicode(cls)
5106	}
5107
5108	fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
5109	ast::ClassSetItem::Perl(cls)
5110	}
5111
5112	fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
5113	ast::ClassSetItem::Bracketed(Box::new(cls))
5114	}
5115
5116	fn lit(span: Span, c: char) -> ast::ClassSetItem {
5117	ast::ClassSetItem::Literal(ast::Literal {
5118	span,
5119	kind: ast::LiteralKind::Verbatim,
5120	c,
5121	})
5122	}
5123
5124	fn empty(span: Span) -> ast::ClassSetItem {
5125	ast::ClassSetItem::Empty(span)
5126	}
5127
5128	fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
5129	let pos1 = Position {
5130	offset: span.start.offset + start.len_utf8(),
5131	column: span.start.column + `1`,
5132	..span.start
5133	};
5134	let pos2 = Position {
5135	offset: span.end.offset - end.len_utf8(),
5136	column: span.end.column - `1`,
5137	..span.end
5138	};
5139	ast::ClassSetItem::Range(ast::ClassSetRange {
5140	span,
5141	start: ast::Literal {
5142	span: Span { end: pos1, ..span },
5143	kind: ast::LiteralKind::Verbatim,
5144	c: start,
5145	},
5146	end: ast::Literal {
5147	span: Span { start: pos2, ..span },
5148	kind: ast::LiteralKind::Verbatim,
5149	c: end,
5150	},
5151	})
5152	}
5153
5154	fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
5155	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
5156	}
5157
5158	fn lower(span: Span, negated: bool) -> ast::ClassAscii {
5159	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
5160	}
5161
5162	assert_eq!(
5163	parser("[[:alnum:]]").parse(),
5164	Ok(Ast::class_bracketed(ast::ClassBracketed {
5165	span: span(`0`..`11`),
5166	negated: `false`,
5167	kind: itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5168	}))
5169	);
5170	assert_eq!(
5171	parser("[[[:alnum:]]]").parse(),
5172	Ok(Ast::class_bracketed(ast::ClassBracketed {
5173	span: span(`0`..`13`),
5174	negated: `false`,
5175	kind: itemset(item_bracket(ast::ClassBracketed {
5176	span: span(`1`..`12`),
5177	negated: `false`,
5178	kind: itemset(item_ascii(alnum(span(`2`..`11`), `false`))),
5179	})),
5180	}))
5181	);
5182	assert_eq!(
5183	parser("[[:alnum:]&&[:lower:]]").parse(),
5184	Ok(Ast::class_bracketed(ast::ClassBracketed {
5185	span: span(`0`..`22`),
5186	negated: `false`,
5187	kind: intersection(
5188	span(`1`..`21`),
5189	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5190	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5191	),
5192	}))
5193	);
5194	assert_eq!(
5195	parser("[[:alnum:]--[:lower:]]").parse(),
5196	Ok(Ast::class_bracketed(ast::ClassBracketed {
5197	span: span(`0`..`22`),
5198	negated: `false`,
5199	kind: difference(
5200	span(`1`..`21`),
5201	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5202	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5203	),
5204	}))
5205	);
5206	assert_eq!(
5207	parser("[[:alnum:]~~[:lower:]]").parse(),
5208	Ok(Ast::class_bracketed(ast::ClassBracketed {
5209	span: span(`0`..`22`),
5210	negated: `false`,
5211	kind: symdifference(
5212	span(`1`..`21`),
5213	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5214	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5215	),
5216	}))
5217	);
5218
5219	assert_eq!(
5220	parser("[a]").parse(),
5221	Ok(Ast::class_bracketed(ast::ClassBracketed {
5222	span: span(`0`..`3`),
5223	negated: `false`,
5224	kind: itemset(lit(span(`1`..`2`), 'a')),
5225	}))
5226	);
5227	assert_eq!(
5228	parser(r"[a\]]").parse(),
5229	Ok(Ast::class_bracketed(ast::ClassBracketed {
5230	span: span(`0`..`5`),
5231	negated: `false`,
5232	kind: union(
5233	span(`1`..`4`),
5234	vec![
5235	lit(span(`1`..`2`), 'a'),
5236	ast::ClassSetItem::Literal(ast::Literal {
5237	span: span(`2`..`4`),
5238	kind: ast::LiteralKind::Meta,
5239	c: ']',
5240	}),
5241	]
5242	),
5243	}))
5244	);
5245	assert_eq!(
5246	parser(r"[a\-z]").parse(),
5247	Ok(Ast::class_bracketed(ast::ClassBracketed {
5248	span: span(`0`..`6`),
5249	negated: `false`,
5250	kind: union(
5251	span(`1`..`5`),
5252	vec![
5253	lit(span(`1`..`2`), 'a'),
5254	ast::ClassSetItem::Literal(ast::Literal {
5255	span: span(`2`..`4`),
5256	kind: ast::LiteralKind::Meta,
5257	c: '-',
5258	}),
5259	lit(span(`4`..`5`), 'z'),
5260	]
5261	),
5262	}))
5263	);
5264	assert_eq!(
5265	parser("[ab]").parse(),
5266	Ok(Ast::class_bracketed(ast::ClassBracketed {
5267	span: span(`0`..`4`),
5268	negated: `false`,
5269	kind: union(
5270	span(`1`..`3`),
5271	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), 'b'),]
5272	),
5273	}))
5274	);
5275	assert_eq!(
5276	parser("[a-]").parse(),
5277	Ok(Ast::class_bracketed(ast::ClassBracketed {
5278	span: span(`0`..`4`),
5279	negated: `false`,
5280	kind: union(
5281	span(`1`..`3`),
5282	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), '-'),]
5283	),
5284	}))
5285	);
5286	assert_eq!(
5287	parser("[-a]").parse(),
5288	Ok(Ast::class_bracketed(ast::ClassBracketed {
5289	span: span(`0`..`4`),
5290	negated: `false`,
5291	kind: union(
5292	span(`1`..`3`),
5293	vec![lit(span(`1`..`2`), '-'), lit(span(`2`..`3`), 'a'),]
5294	),
5295	}))
5296	);
5297	assert_eq!(
5298	parser(r"[\pL]").parse(),
5299	Ok(Ast::class_bracketed(ast::ClassBracketed {
5300	span: span(`0`..`5`),
5301	negated: `false`,
5302	kind: itemset(item_unicode(ast::ClassUnicode {
5303	span: span(`1`..`4`),
5304	negated: `false`,
5305	kind: ast::ClassUnicodeKind::OneLetter('L'),
5306	})),
5307	}))
5308	);
5309	assert_eq!(
5310	parser(r"[\w]").parse(),
5311	Ok(Ast::class_bracketed(ast::ClassBracketed {
5312	span: span(`0`..`4`),
5313	negated: `false`,
5314	kind: itemset(item_perl(ast::ClassPerl {
5315	span: span(`1`..`3`),
5316	kind: ast::ClassPerlKind::Word,
5317	negated: `false`,
5318	})),
5319	}))
5320	);
5321	assert_eq!(
5322	parser(r"[a\wz]").parse(),
5323	Ok(Ast::class_bracketed(ast::ClassBracketed {
5324	span: span(`0`..`6`),
5325	negated: `false`,
5326	kind: union(
5327	span(`1`..`5`),
5328	vec![
5329	lit(span(`1`..`2`), 'a'),
5330	item_perl(ast::ClassPerl {
5331	span: span(`2`..`4`),
5332	kind: ast::ClassPerlKind::Word,
5333	negated: `false`,
5334	}),
5335	lit(span(`4`..`5`), 'z'),
5336	]
5337	),
5338	}))
5339	);
5340
5341	assert_eq!(
5342	parser("[a-z]").parse(),
5343	Ok(Ast::class_bracketed(ast::ClassBracketed {
5344	span: span(`0`..`5`),
5345	negated: `false`,
5346	kind: itemset(range(span(`1`..`4`), 'a', 'z')),
5347	}))
5348	);
5349	assert_eq!(
5350	parser("[a-cx-z]").parse(),
5351	Ok(Ast::class_bracketed(ast::ClassBracketed {
5352	span: span(`0`..`8`),
5353	negated: `false`,
5354	kind: union(
5355	span(`1`..`7`),
5356	vec![
5357	range(span(`1`..`4`), 'a', 'c'),
5358	range(span(`4`..`7`), 'x', 'z'),
5359	]
5360	),
5361	}))
5362	);
5363	assert_eq!(
5364	parser(r"[\w&&a-cx-z]").parse(),
5365	Ok(Ast::class_bracketed(ast::ClassBracketed {
5366	span: span(`0`..`12`),
5367	negated: `false`,
5368	kind: intersection(
5369	span(`1`..`11`),
5370	itemset(item_perl(ast::ClassPerl {
5371	span: span(`1`..`3`),
5372	kind: ast::ClassPerlKind::Word,
5373	negated: `false`,
5374	})),
5375	union(
5376	span(`5`..`11`),
5377	vec![
5378	range(span(`5`..`8`), 'a', 'c'),
5379	range(span(`8`..`11`), 'x', 'z'),
5380	]
5381	),
5382	),
5383	}))
5384	);
5385	assert_eq!(
5386	parser(r"[a-cx-z&&\w]").parse(),
5387	Ok(Ast::class_bracketed(ast::ClassBracketed {
5388	span: span(`0`..`12`),
5389	negated: `false`,
5390	kind: intersection(
5391	span(`1`..`11`),
5392	union(
5393	span(`1`..`7`),
5394	vec![
5395	range(span(`1`..`4`), 'a', 'c'),
5396	range(span(`4`..`7`), 'x', 'z'),
5397	]
5398	),
5399	itemset(item_perl(ast::ClassPerl {
5400	span: span(`9`..`11`),
5401	kind: ast::ClassPerlKind::Word,
5402	negated: `false`,
5403	})),
5404	),
5405	}))
5406	);
5407	assert_eq!(
5408	parser(r"[a--b--c]").parse(),
5409	Ok(Ast::class_bracketed(ast::ClassBracketed {
5410	span: span(`0`..`9`),
5411	negated: `false`,
5412	kind: difference(
5413	span(`1`..`8`),
5414	difference(
5415	span(`1`..`5`),
5416	itemset(lit(span(`1`..`2`), 'a')),
5417	itemset(lit(span(`4`..`5`), 'b')),
5418	),
5419	itemset(lit(span(`7`..`8`), 'c')),
5420	),
5421	}))
5422	);
5423	assert_eq!(
5424	parser(r"[a~~b~~c]").parse(),
5425	Ok(Ast::class_bracketed(ast::ClassBracketed {
5426	span: span(`0`..`9`),
5427	negated: `false`,
5428	kind: symdifference(
5429	span(`1`..`8`),
5430	symdifference(
5431	span(`1`..`5`),
5432	itemset(lit(span(`1`..`2`), 'a')),
5433	itemset(lit(span(`4`..`5`), 'b')),
5434	),
5435	itemset(lit(span(`7`..`8`), 'c')),
5436	),
5437	}))
5438	);
5439	assert_eq!(
5440	parser(r"[\^&&^]").parse(),
5441	Ok(Ast::class_bracketed(ast::ClassBracketed {
5442	span: span(`0`..`7`),
5443	negated: `false`,
5444	kind: intersection(
5445	span(`1`..`6`),
5446	itemset(ast::ClassSetItem::Literal(ast::Literal {
5447	span: span(`1`..`3`),
5448	kind: ast::LiteralKind::Meta,
5449	c: '^',
5450	})),
5451	itemset(lit(span(`5`..`6`), '^')),
5452	),
5453	}))
5454	);
5455	assert_eq!(
5456	parser(r"[\&&&&]").parse(),
5457	Ok(Ast::class_bracketed(ast::ClassBracketed {
5458	span: span(`0`..`7`),
5459	negated: `false`,
5460	kind: intersection(
5461	span(`1`..`6`),
5462	itemset(ast::ClassSetItem::Literal(ast::Literal {
5463	span: span(`1`..`3`),
5464	kind: ast::LiteralKind::Meta,
5465	c: '&',
5466	})),
5467	itemset(lit(span(`5`..`6`), '&')),
5468	),
5469	}))
5470	);
5471	assert_eq!(
5472	parser(r"[&&&&]").parse(),
5473	Ok(Ast::class_bracketed(ast::ClassBracketed {
5474	span: span(`0`..`6`),
5475	negated: `false`,
5476	kind: intersection(
5477	span(`1`..`5`),
5478	intersection(
5479	span(`1`..`3`),
5480	itemset(empty(span(`1`..`1`))),
5481	itemset(empty(span(`3`..`3`))),
5482	),
5483	itemset(empty(span(`5`..`5`))),
5484	),
5485	}))
5486	);
5487
5488	let pat = "[☃-⛄]";
5489	assert_eq!(
5490	parser(pat).parse(),
5491	Ok(Ast::class_bracketed(ast::ClassBracketed {
5492	span: span_range(pat, `0`..`9`),
5493	negated: `false`,
5494	kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5495	span: span_range(pat, `1`..`8`),
5496	start: ast::Literal {
5497	span: span_range(pat, `1`..`4`),
5498	kind: ast::LiteralKind::Verbatim,
5499	c: '☃',
5500	},
5501	end: ast::Literal {
5502	span: span_range(pat, `5`..`8`),
5503	kind: ast::LiteralKind::Verbatim,
5504	c: '⛄',
5505	},
5506	})),
5507	}))
5508	);
5509
5510	assert_eq!(
5511	parser(r"[]]").parse(),
5512	Ok(Ast::class_bracketed(ast::ClassBracketed {
5513	span: span(`0`..`3`),
5514	negated: `false`,
5515	kind: itemset(lit(span(`1`..`2`), ']')),
5516	}))
5517	);
5518	assert_eq!(
5519	parser(r"[]\[]").parse(),
5520	Ok(Ast::class_bracketed(ast::ClassBracketed {
5521	span: span(`0`..`5`),
5522	negated: `false`,
5523	kind: union(
5524	span(`1`..`4`),
5525	vec![
5526	lit(span(`1`..`2`), ']'),
5527	ast::ClassSetItem::Literal(ast::Literal {
5528	span: span(`2`..`4`),
5529	kind: ast::LiteralKind::Meta,
5530	c: '[',
5531	}),
5532	]
5533	),
5534	}))
5535	);
5536	assert_eq!(
5537	parser(r"[\[]]").parse(),
5538	Ok(concat(
5539	`0`..`5`,
5540	vec![
5541	Ast::class_bracketed(ast::ClassBracketed {
5542	span: span(`0`..`4`),
5543	negated: `false`,
5544	kind: itemset(ast::ClassSetItem::Literal(
5545	ast::Literal {
5546	span: span(`1`..`3`),
5547	kind: ast::LiteralKind::Meta,
5548	c: '[',
5549	}
5550	)),
5551	}),
5552	Ast::literal(ast::Literal {
5553	span: span(`4`..`5`),
5554	kind: ast::LiteralKind::Verbatim,
5555	c: ']',
5556	}),
5557	]
5558	))
5559	);
5560
5561	assert_eq!(
5562	parser("[").parse().unwrap_err(),
5563	TestError {
5564	span: span(`0`..`1`),
5565	kind: ast::ErrorKind::ClassUnclosed,
5566	}
5567	);
5568	assert_eq!(
5569	parser("[[").parse().unwrap_err(),
5570	TestError {
5571	span: span(`1`..`2`),
5572	kind: ast::ErrorKind::ClassUnclosed,
5573	}
5574	);
5575	assert_eq!(
5576	parser("[[-]").parse().unwrap_err(),
5577	TestError {
5578	span: span(`0`..`1`),
5579	kind: ast::ErrorKind::ClassUnclosed,
5580	}
5581	);
5582	assert_eq!(
5583	parser("[[[:alnum:]").parse().unwrap_err(),
5584	TestError {
5585	span: span(`1`..`2`),
5586	kind: ast::ErrorKind::ClassUnclosed,
5587	}
5588	);
5589	assert_eq!(
5590	parser(r"[\b]").parse().unwrap_err(),
5591	TestError {
5592	span: span(`1`..`3`),
5593	kind: ast::ErrorKind::ClassEscapeInvalid,
5594	}
5595	);
5596	assert_eq!(
5597	parser(r"[\w-a]").parse().unwrap_err(),
5598	TestError {
5599	span: span(`1`..`3`),
5600	kind: ast::ErrorKind::ClassRangeLiteral,
5601	}
5602	);
5603	assert_eq!(
5604	parser(r"[a-\w]").parse().unwrap_err(),
5605	TestError {
5606	span: span(`3`..`5`),
5607	kind: ast::ErrorKind::ClassRangeLiteral,
5608	}
5609	);
5610	assert_eq!(
5611	parser(r"[z-a]").parse().unwrap_err(),
5612	TestError {
5613	span: span(`1`..`4`),
5614	kind: ast::ErrorKind::ClassRangeInvalid,
5615	}
5616	);
5617
5618	assert_eq!(
5619	parser_ignore_whitespace("[a ").parse().unwrap_err(),
5620	TestError {
5621	span: span(`0`..`1`),
5622	kind: ast::ErrorKind::ClassUnclosed,
5623	}
5624	);
5625	assert_eq!(
5626	parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5627	TestError {
5628	span: span(`0`..`1`),
5629	kind: ast::ErrorKind::ClassUnclosed,
5630	}
5631	);
5632	}
5633
5634	#[test]
5635	fn parse_set_class_open() {
5636	assert_eq!(parser("[a]").parse_set_class_open(), {
5637	let set = ast::ClassBracketed {
5638	span: span(`0`..`1`),
5639	negated: `false`,
5640	kind: ast::ClassSet::union(ast::ClassSetUnion {
5641	span: span(`1`..`1`),
5642	items: vec![],
5643	}),
5644	};
5645	let union = ast::ClassSetUnion { span: span(`1`..`1`), items: vec![] };
5646	Ok((set, union))
5647	});
5648	assert_eq!(
5649	parser_ignore_whitespace("[ a]").parse_set_class_open(),
5650	{
5651	let set = ast::ClassBracketed {
5652	span: span(`0`..`4`),
5653	negated: `false`,
5654	kind: ast::ClassSet::union(ast::ClassSetUnion {
5655	span: span(`4`..`4`),
5656	items: vec![],
5657	}),
5658	};
5659	let union =
5660	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5661	Ok((set, union))
5662	}
5663	);
5664	assert_eq!(parser("[^a]").parse_set_class_open(), {
5665	let set = ast::ClassBracketed {
5666	span: span(`0`..`2`),
5667	negated: `true`,
5668	kind: ast::ClassSet::union(ast::ClassSetUnion {
5669	span: span(`2`..`2`),
5670	items: vec![],
5671	}),
5672	};
5673	let union = ast::ClassSetUnion { span: span(`2`..`2`), items: vec![] };
5674	Ok((set, union))
5675	});
5676	assert_eq!(
5677	parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5678	{
5679	let set = ast::ClassBracketed {
5680	span: span(`0`..`4`),
5681	negated: `true`,
5682	kind: ast::ClassSet::union(ast::ClassSetUnion {
5683	span: span(`4`..`4`),
5684	items: vec![],
5685	}),
5686	};
5687	let union =
5688	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5689	Ok((set, union))
5690	}
5691	);
5692	assert_eq!(parser("[-a]").parse_set_class_open(), {
5693	let set = ast::ClassBracketed {
5694	span: span(`0`..`2`),
5695	negated: `false`,
5696	kind: ast::ClassSet::union(ast::ClassSetUnion {
5697	span: span(`1`..`1`),
5698	items: vec![],
5699	}),
5700	};
5701	let union = ast::ClassSetUnion {
5702	span: span(`1`..`2`),
5703	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5704	span: span(`1`..`2`),
5705	kind: ast::LiteralKind::Verbatim,
5706	c: '-',
5707	})],
5708	};
5709	Ok((set, union))
5710	});
5711	assert_eq!(
5712	parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5713	{
5714	let set = ast::ClassBracketed {
5715	span: span(`0`..`4`),
5716	negated: `false`,
5717	kind: ast::ClassSet::union(ast::ClassSetUnion {
5718	span: span(`2`..`2`),
5719	items: vec![],
5720	}),
5721	};
5722	let union = ast::ClassSetUnion {
5723	span: span(`2`..`3`),
5724	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5725	span: span(`2`..`3`),
5726	kind: ast::LiteralKind::Verbatim,
5727	c: '-',
5728	})],
5729	};
5730	Ok((set, union))
5731	}
5732	);
5733	assert_eq!(parser("[^-a]").parse_set_class_open(), {
5734	let set = ast::ClassBracketed {
5735	span: span(`0`..`3`),
5736	negated: `true`,
5737	kind: ast::ClassSet::union(ast::ClassSetUnion {
5738	span: span(`2`..`2`),
5739	items: vec![],
5740	}),
5741	};
5742	let union = ast::ClassSetUnion {
5743	span: span(`2`..`3`),
5744	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5745	span: span(`2`..`3`),
5746	kind: ast::LiteralKind::Verbatim,
5747	c: '-',
5748	})],
5749	};
5750	Ok((set, union))
5751	});
5752	assert_eq!(parser("[--a]").parse_set_class_open(), {
5753	let set = ast::ClassBracketed {
5754	span: span(`0`..`3`),
5755	negated: `false`,
5756	kind: ast::ClassSet::union(ast::ClassSetUnion {
5757	span: span(`1`..`1`),
5758	items: vec![],
5759	}),
5760	};
5761	let union = ast::ClassSetUnion {
5762	span: span(`1`..`3`),
5763	items: vec![
5764	ast::ClassSetItem::Literal(ast::Literal {
5765	span: span(`1`..`2`),
5766	kind: ast::LiteralKind::Verbatim,
5767	c: '-',
5768	}),
5769	ast::ClassSetItem::Literal(ast::Literal {
5770	span: span(`2`..`3`),
5771	kind: ast::LiteralKind::Verbatim,
5772	c: '-',
5773	}),
5774	],
5775	};
5776	Ok((set, union))
5777	});
5778	assert_eq!(parser("[]a]").parse_set_class_open(), {
5779	let set = ast::ClassBracketed {
5780	span: span(`0`..`2`),
5781	negated: `false`,
5782	kind: ast::ClassSet::union(ast::ClassSetUnion {
5783	span: span(`1`..`1`),
5784	items: vec![],
5785	}),
5786	};
5787	let union = ast::ClassSetUnion {
5788	span: span(`1`..`2`),
5789	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5790	span: span(`1`..`2`),
5791	kind: ast::LiteralKind::Verbatim,
5792	c: ']',
5793	})],
5794	};
5795	Ok((set, union))
5796	});
5797	assert_eq!(
5798	parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5799	{
5800	let set = ast::ClassBracketed {
5801	span: span(`0`..`4`),
5802	negated: `false`,
5803	kind: ast::ClassSet::union(ast::ClassSetUnion {
5804	span: span(`2`..`2`),
5805	items: vec![],
5806	}),
5807	};
5808	let union = ast::ClassSetUnion {
5809	span: span(`2`..`3`),
5810	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5811	span: span(`2`..`3`),
5812	kind: ast::LiteralKind::Verbatim,
5813	c: ']',
5814	})],
5815	};
5816	Ok((set, union))
5817	}
5818	);
5819	assert_eq!(parser("[^]a]").parse_set_class_open(), {
5820	let set = ast::ClassBracketed {
5821	span: span(`0`..`3`),
5822	negated: `true`,
5823	kind: ast::ClassSet::union(ast::ClassSetUnion {
5824	span: span(`2`..`2`),
5825	items: vec![],
5826	}),
5827	};
5828	let union = ast::ClassSetUnion {
5829	span: span(`2`..`3`),
5830	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5831	span: span(`2`..`3`),
5832	kind: ast::LiteralKind::Verbatim,
5833	c: ']',
5834	})],
5835	};
5836	Ok((set, union))
5837	});
5838	assert_eq!(parser("[-]a]").parse_set_class_open(), {
5839	let set = ast::ClassBracketed {
5840	span: span(`0`..`2`),
5841	negated: `false`,
5842	kind: ast::ClassSet::union(ast::ClassSetUnion {
5843	span: span(`1`..`1`),
5844	items: vec![],
5845	}),
5846	};
5847	let union = ast::ClassSetUnion {
5848	span: span(`1`..`2`),
5849	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5850	span: span(`1`..`2`),
5851	kind: ast::LiteralKind::Verbatim,
5852	c: '-',
5853	})],
5854	};
5855	Ok((set, union))
5856	});
5857
5858	assert_eq!(
5859	parser("[").parse_set_class_open().unwrap_err(),
5860	TestError {
5861	span: span(`0`..`1`),
5862	kind: ast::ErrorKind::ClassUnclosed,
5863	}
5864	);
5865	assert_eq!(
5866	parser_ignore_whitespace("[ ")
5867	.parse_set_class_open()
5868	.unwrap_err(),
5869	TestError {
5870	span: span(`0`..`5`),
5871	kind: ast::ErrorKind::ClassUnclosed,
5872	}
5873	);
5874	assert_eq!(
5875	parser("[^").parse_set_class_open().unwrap_err(),
5876	TestError {
5877	span: span(`0`..`2`),
5878	kind: ast::ErrorKind::ClassUnclosed,
5879	}
5880	);
5881	assert_eq!(
5882	parser("[]").parse_set_class_open().unwrap_err(),
5883	TestError {
5884	span: span(`0`..`2`),
5885	kind: ast::ErrorKind::ClassUnclosed,
5886	}
5887	);
5888	assert_eq!(
5889	parser("[-").parse_set_class_open().unwrap_err(),
5890	TestError {
5891	span: span(`0`..`0`),
5892	kind: ast::ErrorKind::ClassUnclosed,
5893	}
5894	);
5895	assert_eq!(
5896	parser("[--").parse_set_class_open().unwrap_err(),
5897	TestError {
5898	span: span(`0`..`0`),
5899	kind: ast::ErrorKind::ClassUnclosed,
5900	}
5901	);
5902
5903	// See: https://github.com/rust-lang/regex/issues/792
5904	assert_eq!(
5905	parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5906	TestError {
5907	span: span(`4`..`4`),
5908	kind: ast::ErrorKind::ClassUnclosed,
5909	}
5910	);
5911	}
5912
5913	#[test]
5914	fn maybe_parse_ascii_class() {
5915	assert_eq!(
5916	parser(r"[:alnum:]").maybe_parse_ascii_class(),
5917	Some(ast::ClassAscii {
5918	span: span(`0`..`9`),
5919	kind: ast::ClassAsciiKind::Alnum,
5920	negated: `false`,
5921	})
5922	);
5923	assert_eq!(
5924	parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5925	Some(ast::ClassAscii {
5926	span: span(`0`..`9`),
5927	kind: ast::ClassAsciiKind::Alnum,
5928	negated: `false`,
5929	})
5930	);
5931	assert_eq!(
5932	parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5933	Some(ast::ClassAscii {
5934	span: span(`0`..`10`),
5935	kind: ast::ClassAsciiKind::Alnum,
5936	negated: `true`,
5937	})
5938	);
5939
5940	let p = parser(r"[:");
5941	assert_eq!(p.maybe_parse_ascii_class(), None);
5942	assert_eq!(p.offset(), `0`);
5943
5944	let p = parser(r"[:^");
5945	assert_eq!(p.maybe_parse_ascii_class(), None);
5946	assert_eq!(p.offset(), `0`);
5947
5948	let p = parser(r"[^:alnum:]");
5949	assert_eq!(p.maybe_parse_ascii_class(), None);
5950	assert_eq!(p.offset(), `0`);
5951
5952	let p = parser(r"[:alnnum:]");
5953	assert_eq!(p.maybe_parse_ascii_class(), None);
5954	assert_eq!(p.offset(), `0`);
5955
5956	let p = parser(r"[:alnum]");
5957	assert_eq!(p.maybe_parse_ascii_class(), None);
5958	assert_eq!(p.offset(), `0`);
5959
5960	let p = parser(r"[:alnum:");
5961	assert_eq!(p.maybe_parse_ascii_class(), None);
5962	assert_eq!(p.offset(), `0`);
5963	}
5964
5965	#[test]
5966	fn parse_unicode_class() {
5967	assert_eq!(
5968	parser(r"\pN").parse_escape(),
5969	Ok(Primitive::Unicode(ast::ClassUnicode {
5970	span: span(`0`..`3`),
5971	negated: `false`,
5972	kind: ast::ClassUnicodeKind::OneLetter('N'),
5973	}))
5974	);
5975	assert_eq!(
5976	parser(r"\PN").parse_escape(),
5977	Ok(Primitive::Unicode(ast::ClassUnicode {
5978	span: span(`0`..`3`),
5979	negated: `true`,
5980	kind: ast::ClassUnicodeKind::OneLetter('N'),
5981	}))
5982	);
5983	assert_eq!(
5984	parser(r"\p{N}").parse_escape(),
5985	Ok(Primitive::Unicode(ast::ClassUnicode {
5986	span: span(`0`..`5`),
5987	negated: `false`,
5988	kind: ast::ClassUnicodeKind::Named(s("N")),
5989	}))
5990	);
5991	assert_eq!(
5992	parser(r"\P{N}").parse_escape(),
5993	Ok(Primitive::Unicode(ast::ClassUnicode {
5994	span: span(`0`..`5`),
5995	negated: `true`,
5996	kind: ast::ClassUnicodeKind::Named(s("N")),
5997	}))
5998	);
5999	assert_eq!(
6000	parser(r"\p{Greek}").parse_escape(),
6001	Ok(Primitive::Unicode(ast::ClassUnicode {
6002	span: span(`0`..`9`),
6003	negated: `false`,
6004	kind: ast::ClassUnicodeKind::Named(s("Greek")),
6005	}))
6006	);
6007
6008	assert_eq!(
6009	parser(r"\p{scx:Katakana}").parse_escape(),
6010	Ok(Primitive::Unicode(ast::ClassUnicode {
6011	span: span(`0`..`16`),
6012	negated: `false`,
6013	kind: ast::ClassUnicodeKind::NamedValue {
6014	op: ast::ClassUnicodeOpKind::Colon,
6015	name: s("scx"),
6016	value: s("Katakana"),
6017	},
6018	}))
6019	);
6020	assert_eq!(
6021	parser(r"\p{scx=Katakana}").parse_escape(),
6022	Ok(Primitive::Unicode(ast::ClassUnicode {
6023	span: span(`0`..`16`),
6024	negated: `false`,
6025	kind: ast::ClassUnicodeKind::NamedValue {
6026	op: ast::ClassUnicodeOpKind::Equal,
6027	name: s("scx"),
6028	value: s("Katakana"),
6029	},
6030	}))
6031	);
6032	assert_eq!(
6033	parser(r"\p{scx!=Katakana}").parse_escape(),
6034	Ok(Primitive::Unicode(ast::ClassUnicode {
6035	span: span(`0`..`17`),
6036	negated: `false`,
6037	kind: ast::ClassUnicodeKind::NamedValue {
6038	op: ast::ClassUnicodeOpKind::NotEqual,
6039	name: s("scx"),
6040	value: s("Katakana"),
6041	},
6042	}))
6043	);
6044
6045	assert_eq!(
6046	parser(r"\p{:}").parse_escape(),
6047	Ok(Primitive::Unicode(ast::ClassUnicode {
6048	span: span(`0`..`5`),
6049	negated: `false`,
6050	kind: ast::ClassUnicodeKind::NamedValue {
6051	op: ast::ClassUnicodeOpKind::Colon,
6052	name: s(""),
6053	value: s(""),
6054	},
6055	}))
6056	);
6057	assert_eq!(
6058	parser(r"\p{=}").parse_escape(),
6059	Ok(Primitive::Unicode(ast::ClassUnicode {
6060	span: span(`0`..`5`),
6061	negated: `false`,
6062	kind: ast::ClassUnicodeKind::NamedValue {
6063	op: ast::ClassUnicodeOpKind::Equal,
6064	name: s(""),
6065	value: s(""),
6066	},
6067	}))
6068	);
6069	assert_eq!(
6070	parser(r"\p{!=}").parse_escape(),
6071	Ok(Primitive::Unicode(ast::ClassUnicode {
6072	span: span(`0`..`6`),
6073	negated: `false`,
6074	kind: ast::ClassUnicodeKind::NamedValue {
6075	op: ast::ClassUnicodeOpKind::NotEqual,
6076	name: s(""),
6077	value: s(""),
6078	},
6079	}))
6080	);
6081
6082	assert_eq!(
6083	parser(r"\p").parse_escape().unwrap_err(),
6084	TestError {
6085	span: span(`2`..`2`),
6086	kind: ast::ErrorKind::EscapeUnexpectedEof,
6087	}
6088	);
6089	assert_eq!(
6090	parser(r"\p{").parse_escape().unwrap_err(),
6091	TestError {
6092	span: span(`3`..`3`),
6093	kind: ast::ErrorKind::EscapeUnexpectedEof,
6094	}
6095	);
6096	assert_eq!(
6097	parser(r"\p{N").parse_escape().unwrap_err(),
6098	TestError {
6099	span: span(`4`..`4`),
6100	kind: ast::ErrorKind::EscapeUnexpectedEof,
6101	}
6102	);
6103	assert_eq!(
6104	parser(r"\p{Greek").parse_escape().unwrap_err(),
6105	TestError {
6106	span: span(`8`..`8`),
6107	kind: ast::ErrorKind::EscapeUnexpectedEof,
6108	}
6109	);
6110
6111	assert_eq!(
6112	parser(r"\pNz").parse(),
6113	Ok(Ast::concat(ast::Concat {
6114	span: span(`0`..`4`),
6115	asts: vec![
6116	Ast::class_unicode(ast::ClassUnicode {
6117	span: span(`0`..`3`),
6118	negated: `false`,
6119	kind: ast::ClassUnicodeKind::OneLetter('N'),
6120	}),
6121	Ast::literal(ast::Literal {
6122	span: span(`3`..`4`),
6123	kind: ast::LiteralKind::Verbatim,
6124	c: 'z',
6125	}),
6126	],
6127	}))
6128	);
6129	assert_eq!(
6130	parser(r"\p{Greek}z").parse(),
6131	Ok(Ast::concat(ast::Concat {
6132	span: span(`0`..`10`),
6133	asts: vec![
6134	Ast::class_unicode(ast::ClassUnicode {
6135	span: span(`0`..`9`),
6136	negated: `false`,
6137	kind: ast::ClassUnicodeKind::Named(s("Greek")),
6138	}),
6139	Ast::literal(ast::Literal {
6140	span: span(`9`..`10`),
6141	kind: ast::LiteralKind::Verbatim,
6142	c: 'z',
6143	}),
6144	],
6145	}))
6146	);
6147	assert_eq!(
6148	parser(r"\p\{").parse().unwrap_err(),
6149	TestError {
6150	span: span(`2`..`3`),
6151	kind: ast::ErrorKind::UnicodeClassInvalid,
6152	}
6153	);
6154	assert_eq!(
6155	parser(r"\P\{").parse().unwrap_err(),
6156	TestError {
6157	span: span(`2`..`3`),
6158	kind: ast::ErrorKind::UnicodeClassInvalid,
6159	}
6160	);
6161	}
6162
6163	#[test]
6164	fn parse_perl_class() {
6165	assert_eq!(
6166	parser(r"\d").parse_escape(),
6167	Ok(Primitive::Perl(ast::ClassPerl {
6168	span: span(`0`..`2`),
6169	kind: ast::ClassPerlKind::Digit,
6170	negated: `false`,
6171	}))
6172	);
6173	assert_eq!(
6174	parser(r"\D").parse_escape(),
6175	Ok(Primitive::Perl(ast::ClassPerl {
6176	span: span(`0`..`2`),
6177	kind: ast::ClassPerlKind::Digit,
6178	negated: `true`,
6179	}))
6180	);
6181	assert_eq!(
6182	parser(r"\s").parse_escape(),
6183	Ok(Primitive::Perl(ast::ClassPerl {
6184	span: span(`0`..`2`),
6185	kind: ast::ClassPerlKind::Space,
6186	negated: `false`,
6187	}))
6188	);
6189	assert_eq!(
6190	parser(r"\S").parse_escape(),
6191	Ok(Primitive::Perl(ast::ClassPerl {
6192	span: span(`0`..`2`),
6193	kind: ast::ClassPerlKind::Space,
6194	negated: `true`,
6195	}))
6196	);
6197	assert_eq!(
6198	parser(r"\w").parse_escape(),
6199	Ok(Primitive::Perl(ast::ClassPerl {
6200	span: span(`0`..`2`),
6201	kind: ast::ClassPerlKind::Word,
6202	negated: `false`,
6203	}))
6204	);
6205	assert_eq!(
6206	parser(r"\W").parse_escape(),
6207	Ok(Primitive::Perl(ast::ClassPerl {
6208	span: span(`0`..`2`),
6209	kind: ast::ClassPerlKind::Word,
6210	negated: `true`,
6211	}))
6212	);
6213
6214	assert_eq!(
6215	parser(r"\d").parse(),
6216	Ok(Ast::class_perl(ast::ClassPerl {
6217	span: span(`0`..`2`),
6218	kind: ast::ClassPerlKind::Digit,
6219	negated: `false`,
6220	}))
6221	);
6222	assert_eq!(
6223	parser(r"\dz").parse(),
6224	Ok(Ast::concat(ast::Concat {
6225	span: span(`0`..`3`),
6226	asts: vec![
6227	Ast::class_perl(ast::ClassPerl {
6228	span: span(`0`..`2`),
6229	kind: ast::ClassPerlKind::Digit,
6230	negated: `false`,
6231	}),
6232	Ast::literal(ast::Literal {
6233	span: span(`2`..`3`),
6234	kind: ast::LiteralKind::Verbatim,
6235	c: 'z',
6236	}),
6237	],
6238	}))
6239	);
6240	}
6241
6242	// This tests a bug fix where the nest limit checker wasn't decrementing
6243	// its depth during post-traversal, which causes long regexes to trip
6244	// the default limit too aggressively.
6245	#[test]
6246	fn regression_454_nest_too_big() {
6247	let pattern = r#"
6248	2(?:
6249	[45]\d{3}\|
6250	7(?:
6251	1[0-267]\|
6252	2[0-289]\|
6253	3[0-29]\|
6254	4[01]\|
6255	5[1-3]\|
6256	6[013]\|
6257	7[0178]\|
6258	91
6259	)\|
6260	8(?:
6261	0[125]\|
6262	[139][1-6]\|
6263	2[0157-9]\|
6264	41\|
6265	6[1-35]\|
6266	7[1-5]\|
6267	8[1-8]\|
6268	90
6269	)\|
6270	9(?:
6271	0[0-2]\|
6272	1[0-4]\|
6273	2[568]\|
6274	3[3-6]\|
6275	5[5-7]\|
6276	6[0167]\|
6277	7[15]\|
6278	8[0146-9]
6279	)
6280	)\d{4}
6281	"#;
6282	assert!(parser_nest_limit(pattern, `50`).parse().is_ok());
6283	}
6284
6285	// This tests that we treat a trailing `-` in a character class as a
6286	// literal `-` even when whitespace mode is enabled and there is whitespace
6287	// after the trailing `-`.
6288	#[test]
6289	fn regression_455_trailing_dash_ignore_whitespace() {
6290	assert!(parser("(?x)[ / - ]").parse().is_ok());
6291	assert!(parser("(?x)[ a - ]").parse().is_ok());
6292	assert!(parser(
6293	"(?x)[
6294	a
6295	- ]
6296	"
6297	)
6298	.parse()
6299	.is_ok());
6300	assert!(parser(
6301	"(?x)[
6302	a # wat
6303	- ]
6304	"
6305	)
6306	.parse()
6307	.is_ok());
6308
6309	assert!(parser("(?x)[ / -").parse().is_err());
6310	assert!(parser("(?x)[ / - ").parse().is_err());
6311	assert!(parser(
6312	"(?x)[
6313	/ -
6314	"
6315	)
6316	.parse()
6317	.is_err());
6318	assert!(parser(
6319	"(?x)[
6320	/ - # wat
6321	"
6322	)
6323	.parse()
6324	.is_err());
6325	}
6326	}
6327