parse.rs source code [crates/regex_syntax/src/ast/parse.rs]

1	/!*
2	This module provides a regular expression parser.
3	*/
4
5	use core::{
6	borrow::Borrow,
7	cell::{Cell, RefCell},
8	mem,
9	};
10
11	use alloc::{
12	boxed::Box,
13	string::{String, ToString},
14	vec,
15	vec::Vec,
16	};
17
18	use crate::{
19	ast::{self, Ast, Position, Span},
20	either::Either,
21	is_escapeable_character, is_meta_character,
22	};
23
24	type Result<T> = core::result::Result<T, ast::Error>;
25
26	/// A primitive is an expression with no sub-expressions. This includes
27	/// literals, assertions and non-set character classes. This representation
28	/// is used as intermediate state in the parser.
29	///
30	/// This does not include ASCII character classes, since they can only appear
31	/// within a set character class.
32	#[derive(Clone, Debug, Eq, PartialEq)]
33	enum Primitive {
34	Literal(ast::Literal),
35	Assertion(ast::Assertion),
36	Dot(Span),
37	Perl(ast::ClassPerl),
38	Unicode(ast::ClassUnicode),
39	}
40
41	impl Primitive {
42	/// Return the span of this primitive.
43	fn span(&self) -> &Span {
44	match *self {
45	Primitive::Literal(ref x) => &x.span,
46	Primitive::Assertion(ref x) => &x.span,
47	Primitive::Dot(ref span) => span,
48	Primitive::Perl(ref x) => &x.span,
49	Primitive::Unicode(ref x) => &x.span,
50	}
51	}
52
53	/// Convert this primitive into a proper AST.
54	fn into_ast(self) -> Ast {
55	match self {
56	Primitive::Literal(lit) => Ast::literal(lit),
57	Primitive::Assertion(assert) => Ast::assertion(assert),
58	Primitive::Dot(span) => Ast::dot(span),
59	Primitive::Perl(cls) => Ast::class_perl(cls),
60	Primitive::Unicode(cls) => Ast::class_unicode(cls),
61	}
62	}
63
64	/// Convert this primitive into an item in a character class.
65	///
66	/// If this primitive is not a legal item (i.e., an assertion or a dot),
67	/// then return an error.
68	fn into_class_set_item<P: Borrow<Parser>>(
69	self,
70	p: &ParserI<'_, P>,
71	) -> Result<ast::ClassSetItem> {
72	use self::Primitive::*;
73	use crate::ast::ClassSetItem;
74
75	match self {
76	Literal(lit) => Ok(ClassSetItem::Literal(lit)),
77	Perl(cls) => Ok(ClassSetItem::Perl(cls)),
78	Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
79	x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
80	}
81	}
82
83	/// Convert this primitive into a literal in a character class. In
84	/// particular, literals are the only valid items that can appear in
85	/// ranges.
86	///
87	/// If this primitive is not a legal item (i.e., a class, assertion or a
88	/// dot), then return an error.
89	fn into_class_literal<P: Borrow<Parser>>(
90	self,
91	p: &ParserI<'_, P>,
92	) -> Result<ast::Literal> {
93	use self::Primitive::*;
94
95	match self {
96	Literal(lit) => Ok(lit),
97	x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
98	}
99	}
100	}
101
102	/// Returns true if the given character is a hexadecimal digit.
103	fn is_hex(c: char) -> bool {
104	('0' <= c && c <= '9') \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F')
105	}
106
107	/// Returns true if the given character is a valid in a capture group name.
108	///
109	/// If `first` is true, then `c` is treated as the first character in the
110	/// group name (which must be alphabetic or underscore).
111	fn is_capture_char(c: char, first: bool) -> bool {
112	if first {
113	c == '_' \|\| c.is_alphabetic()
114	} else {
115	c == '_' \|\| c == '.' \|\| c == '[' \|\| c == ']' \|\| c.is_alphanumeric()
116	}
117	}
118
119	/// A builder for a regular expression parser.
120	///
121	/// This builder permits modifying configuration options for the parser.
122	#[derive(Clone, Debug)]
123	pub struct ParserBuilder {
124	ignore_whitespace: bool,
125	nest_limit: u32,
126	octal: bool,
127	empty_min_range: bool,
128	}
129
130	impl Default for ParserBuilder {
131	fn default() -> ParserBuilder {
132	ParserBuilder::new()
133	}
134	}
135
136	impl ParserBuilder {
137	/// Create a new parser builder with a default configuration.
138	pub fn new() -> ParserBuilder {
139	ParserBuilder {
140	ignore_whitespace: `false`,
141	nest_limit: `250`,
142	octal: `false`,
143	empty_min_range: `false`,
144	}
145	}
146
147	/// Build a parser from this configuration with the given pattern.
148	pub fn build(&self) -> Parser {
149	Parser {
150	pos: Cell::new(Position { offset: `0`, line: `1`, column: `1` }),
151	capture_index: Cell::new(`0`),
152	nest_limit: self.nest_limit,
153	octal: self.octal,
154	empty_min_range: self.empty_min_range,
155	initial_ignore_whitespace: self.ignore_whitespace,
156	ignore_whitespace: Cell::new(self.ignore_whitespace),
157	comments: RefCell::new(vec![]),
158	stack_group: RefCell::new(vec![]),
159	stack_class: RefCell::new(vec![]),
160	capture_names: RefCell::new(vec![]),
161	scratch: RefCell::new(String::new()),
162	}
163	}
164
165	/// Set the nesting limit for this parser.
166	///
167	/// The nesting limit controls how deep the abstract syntax tree is allowed
168	/// to be. If the AST exceeds the given limit (e.g., with too many nested
169	/// groups), then an error is returned by the parser.
170	///
171	/// The purpose of this limit is to act as a heuristic to prevent stack
172	/// overflow for consumers that do structural induction on an `Ast` using
173	/// explicit recursion. While this crate never does this (instead using
174	/// constant stack space and moving the call stack to the heap), other
175	/// crates may.
176	///
177	/// This limit is not checked until the entire AST is parsed. Therefore,
178	/// if callers want to put a limit on the amount of heap space used, then
179	/// they should impose a limit on the length, in bytes, of the concrete
180	/// pattern string. In particular, this is viable since this parser
181	/// implementation will limit itself to heap space proportional to the
182	/// length of the pattern string.
183	///
184	/// Note that a nest limit of `0` will return a nest limit error for most
185	/// patterns but not all. For example, a nest limit of `0` permits `a` but
186	/// not `ab`, since `ab` requires a concatenation, which results in a nest
187	/// depth of `1`. In general, a nest limit is not something that manifests
188	/// in an obvious way in the concrete syntax, therefore, it should not be
189	/// used in a granular way.
190	pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
191	self.nest_limit = limit;
192	self
193	}
194
195	/// Whether to support octal syntax or not.
196	///
197	/// Octal syntax is a little-known way of uttering Unicode codepoints in
198	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
199	/// `\141` are all equivalent regular expressions, where the last example
200	/// shows octal syntax.
201	///
202	/// While supporting octal syntax isn't in and of itself a problem, it does
203	/// make good error messages harder. That is, in PCRE based regex engines,
204	/// syntax like `\0` invokes a backreference, which is explicitly
205	/// unsupported in Rust's regex engine. However, many users expect it to
206	/// be supported. Therefore, when octal support is disabled, the error
207	/// message will explicitly mention that backreferences aren't supported.
208	///
209	/// Octal syntax is disabled by default.
210	pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
211	self.octal = yes;
212	self
213	}
214
215	/// Enable verbose mode in the regular expression.
216	///
217	/// When enabled, verbose mode permits insignificant whitespace in many
218	/// places in the regular expression, as well as comments. Comments are
219	/// started using `#` and continue until the end of the line.
220	///
221	/// By default, this is disabled. It may be selectively enabled in the
222	/// regular expression by using the `x` flag regardless of this setting.
223	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
224	self.ignore_whitespace = yes;
225	self
226	}
227
228	/// Allow using `{,n}` as an equivalent to `{0,n}`.
229	///
230	/// When enabled, the parser accepts `{,n}` as valid syntax for `{0,n}`.
231	/// Most regular expression engines don't support the `{,n}` syntax, but
232	/// some others do it, namely Python's `re` library.
233	///
234	/// This is disabled by default.
235	pub fn empty_min_range(&mut self, yes: bool) -> &mut ParserBuilder {
236	self.empty_min_range = yes;
237	self
238	}
239	}
240
241	/// A regular expression parser.
242	///
243	/// This parses a string representation of a regular expression into an
244	/// abstract syntax tree. The size of the tree is proportional to the length
245	/// of the regular expression pattern.
246	///
247	/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
248	#[derive(Clone, Debug)]
249	pub struct Parser {
250	/// The current position of the parser.
251	pos: Cell<Position>,
252	/// The current capture index.
253	capture_index: Cell<u32>,
254	/// The maximum number of open parens/brackets allowed. If the parser
255	/// exceeds this number, then an error is returned.
256	nest_limit: u32,
257	/// Whether to support octal syntax or not. When `false`, the parser will
258	/// return an error helpfully pointing out that backreferences are not
259	/// supported.
260	octal: bool,
261	/// The initial setting for `ignore_whitespace` as provided by
262	/// `ParserBuilder`. It is used when resetting the parser's state.
263	initial_ignore_whitespace: bool,
264	/// Whether the parser supports `{,n}` repetitions as an equivalent to
265	/// `{0,n}.`
266	empty_min_range: bool,
267	/// Whether whitespace should be ignored. When enabled, comments are
268	/// also permitted.
269	ignore_whitespace: Cell<bool>,
270	/// A list of comments, in order of appearance.
271	comments: RefCell<Vec<ast::Comment>>,
272	/// A stack of grouped sub-expressions, including alternations.
273	stack_group: RefCell<Vec<GroupState>>,
274	/// A stack of nested character classes. This is only non-empty when
275	/// parsing a class.
276	stack_class: RefCell<Vec<ClassState>>,
277	/// A sorted sequence of capture names. This is used to detect duplicate
278	/// capture names and report an error if one is detected.
279	capture_names: RefCell<Vec<ast::CaptureName>>,
280	/// A scratch buffer used in various places. Mostly this is used to
281	/// accumulate relevant characters from parts of a pattern.
282	scratch: RefCell<String>,
283	}
284
285	/// ParserI is the internal parser implementation.
286	///
287	/// We use this separate type so that we can carry the provided pattern string
288	/// along with us. In particular, a `Parser` internal state is not tied to any
289	/// one pattern, but `ParserI` is.
290	///
291	/// This type also lets us use `ParserI<&Parser>` in production code while
292	/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
293	/// work against the internal interface of the parser.
294	#[derive(Clone, Debug)]
295	struct ParserI<'s, P> {
296	/// The parser state/configuration.
297	parser: P,
298	/// The full regular expression provided by the user.
299	pattern: &'s str,
300	}
301
302	/// GroupState represents a single stack frame while parsing nested groups
303	/// and alternations. Each frame records the state up to an opening parenthesis
304	/// or a alternating bracket `\|`.
305	#[derive(Clone, Debug)]
306	enum GroupState {
307	/// This state is pushed whenever an opening group is found.
308	Group {
309	/// The concatenation immediately preceding the opening group.
310	concat: ast::Concat,
311	/// The group that has been opened. Its sub-AST is always empty.
312	group: ast::Group,
313	/// Whether this group has the `x` flag enabled or not.
314	ignore_whitespace: bool,
315	},
316	/// This state is pushed whenever a new alternation branch is found. If
317	/// an alternation branch is found and this state is at the top of the
318	/// stack, then this state should be modified to include the new
319	/// alternation.
320	Alternation(ast::Alternation),
321	}
322
323	/// ClassState represents a single stack frame while parsing character classes.
324	/// Each frame records the state up to an intersection, difference, symmetric
325	/// difference or nested class.
326	///
327	/// Note that a parser's character class stack is only non-empty when parsing
328	/// a character class. In all other cases, it is empty.
329	#[derive(Clone, Debug)]
330	enum ClassState {
331	/// This state is pushed whenever an opening bracket is found.
332	Open {
333	/// The union of class items immediately preceding this class.
334	union: ast::ClassSetUnion,
335	/// The class that has been opened. Typically this just corresponds
336	/// to the `[`, but it can also include `[^` since `^` indicates
337	/// negation of the class.
338	set: ast::ClassBracketed,
339	},
340	/// This state is pushed when a operator is seen. When popped, the stored
341	/// set becomes the left hand side of the operator.
342	Op {
343	/// The type of the operation, i.e., &&, -- or ~~.
344	kind: ast::ClassSetBinaryOpKind,
345	/// The left-hand side of the operator.
346	lhs: ast::ClassSet,
347	},
348	}
349
350	impl Parser {
351	/// Create a new parser with a default configuration.
352	///
353	/// The parser can be run with either the `parse` or `parse_with_comments`
354	/// methods. The parse methods return an abstract syntax tree.
355	///
356	/// To set configuration options on the parser, use [`ParserBuilder`].
357	pub fn new() -> Parser {
358	ParserBuilder::new().build()
359	}
360
361	/// Parse the regular expression into an abstract syntax tree.
362	pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
363	ParserI::new(self, pattern).parse()
364	}
365
366	/// Parse the regular expression and return an abstract syntax tree with
367	/// all of the comments found in the pattern.
368	pub fn parse_with_comments(
369	&mut self,
370	pattern: &str,
371	) -> Result<ast::WithComments> {
372	ParserI::new(self, pattern).parse_with_comments()
373	}
374
375	/// Reset the internal state of a parser.
376	///
377	/// This is called at the beginning of every parse. This prevents the
378	/// parser from running with inconsistent state (say, if a previous
379	/// invocation returned an error and the parser is reused).
380	fn reset(&self) {
381	// These settings should be in line with the construction
382	// in `ParserBuilder::build`.
383	self.pos.set(Position { offset: `0`, line: `1`, column: `1` });
384	self.ignore_whitespace.set(self.initial_ignore_whitespace);
385	self.comments.borrow_mut().clear();
386	self.stack_group.borrow_mut().clear();
387	self.stack_class.borrow_mut().clear();
388	}
389	}
390
391	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
392	/// Build an internal parser from a parser configuration and a pattern.
393	fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
394	ParserI { parser, pattern }
395	}
396
397	/// Return a reference to the parser state.
398	fn parser(&self) -> &Parser {
399	self.parser.borrow()
400	}
401
402	/// Return a reference to the pattern being parsed.
403	fn pattern(&self) -> &str {
404	self.pattern
405	}
406
407	/// Create a new error with the given span and error type.
408	fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
409	ast::Error { kind, pattern: self.pattern().to_string(), span }
410	}
411
412	/// Return the current offset of the parser.
413	///
414	/// The offset starts at `0` from the beginning of the regular expression
415	/// pattern string.
416	fn offset(&self) -> usize {
417	self.parser().pos.get().offset
418	}
419
420	/// Return the current line number of the parser.
421	///
422	/// The line number starts at `1`.
423	fn line(&self) -> usize {
424	self.parser().pos.get().line
425	}
426
427	/// Return the current column of the parser.
428	///
429	/// The column number starts at `1` and is reset whenever a `\n` is seen.
430	fn column(&self) -> usize {
431	self.parser().pos.get().column
432	}
433
434	/// Return the next capturing index. Each subsequent call increments the
435	/// internal index.
436	///
437	/// The span given should correspond to the location of the opening
438	/// parenthesis.
439	///
440	/// If the capture limit is exceeded, then an error is returned.
441	fn next_capture_index(&self, span: Span) -> Result<u32> {
442	let current = self.parser().capture_index.get();
443	let i = current.checked_add(`1`).ok_or_else(\|\| {
444	self.error(span, ast::ErrorKind::CaptureLimitExceeded)
445	})?;
446	self.parser().capture_index.set(i);
447	Ok(i)
448	}
449
450	/// Adds the given capture name to this parser. If this capture name has
451	/// already been used, then an error is returned.
452	fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
453	let mut names = self.parser().capture_names.borrow_mut();
454	match names
455	.binary_search_by_key(&cap.name.as_str(), \|c\| c.name.as_str())
456	{
457	Err(i) => {
458	names.insert(i, cap.clone());
459	Ok(())
460	}
461	Ok(i) => Err(self.error(
462	cap.span,
463	ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
464	)),
465	}
466	}
467
468	/// Return whether the parser should ignore whitespace or not.
469	fn ignore_whitespace(&self) -> bool {
470	self.parser().ignore_whitespace.get()
471	}
472
473	/// Return the character at the current position of the parser.
474	///
475	/// This panics if the current position does not point to a valid char.
476	fn char(&self) -> char {
477	self.char_at(self.offset())
478	}
479
480	/// Return the character at the given position.
481	///
482	/// This panics if the given position does not point to a valid char.
483	fn char_at(&self, i: usize) -> char {
484	self.pattern()[i..]
485	.chars()
486	.next()
487	.unwrap_or_else(\|\| panic!("expected char at offset {}", i))
488	}
489
490	/// Bump the parser to the next Unicode scalar value.
491	///
492	/// If the end of the input has been reached, then `false` is returned.
493	fn bump(&self) -> bool {
494	if self.is_eof() {
495	return `false`;
496	}
497	let Position { mut offset, mut line, mut column } = self.pos();
498	if self.char() == '`\n`' {
499	line = line.checked_add(`1`).unwrap();
500	column = `1`;
501	} else {
502	column = column.checked_add(`1`).unwrap();
503	}
504	offset += self.char().len_utf8();
505	self.parser().pos.set(Position { offset, line, column });
506	self.pattern()[self.offset()..].chars().next().is_some()
507	}
508
509	/// If the substring starting at the current position of the parser has
510	/// the given prefix, then bump the parser to the character immediately
511	/// following the prefix and return true. Otherwise, don't bump the parser
512	/// and return false.
513	fn bump_if(&self, prefix: &str) -> bool {
514	if self.pattern()[self.offset()..].starts_with(prefix) {
515	for _ in `0`..prefix.chars().count() {
516	self.bump();
517	}
518	`true`
519	} else {
520	`false`
521	}
522	}
523
524	/// Returns true if and only if the parser is positioned at a look-around
525	/// prefix. The conditions under which this returns true must always
526	/// correspond to a regular expression that would otherwise be consider
527	/// invalid.
528	///
529	/// This should only be called immediately after parsing the opening of
530	/// a group or a set of flags.
531	fn is_lookaround_prefix(&self) -> bool {
532	self.bump_if("?=")
533	\|\| self.bump_if("?!")
534	\|\| self.bump_if("?<=")
535	\|\| self.bump_if("?<!")
536	}
537
538	/// Bump the parser, and if the `x` flag is enabled, bump through any
539	/// subsequent spaces. Return true if and only if the parser is not at
540	/// EOF.
541	fn bump_and_bump_space(&self) -> bool {
542	if !self.bump() {
543	return `false`;
544	}
545	self.bump_space();
546	!self.is_eof()
547	}
548
549	/// If the `x` flag is enabled (i.e., whitespace insensitivity with
550	/// comments), then this will advance the parser through all whitespace
551	/// and comments to the next non-whitespace non-comment byte.
552	///
553	/// If the `x` flag is disabled, then this is a no-op.
554	///
555	/// This should be used selectively throughout the parser where
556	/// arbitrary whitespace is permitted when the `x` flag is enabled. For
557	/// example, `{ 5 , 6}` is equivalent to `{5,6}`.
558	fn bump_space(&self) {
559	if !self.ignore_whitespace() {
560	return;
561	}
562	while !self.is_eof() {
563	if self.char().is_whitespace() {
564	self.bump();
565	} else if self.char() == '#' {
566	let start = self.pos();
567	let mut comment_text = String::new();
568	self.bump();
569	while !self.is_eof() {
570	let c = self.char();
571	self.bump();
572	if c == '`\n`' {
573	break;
574	}
575	comment_text.push(c);
576	}
577	let comment = ast::Comment {
578	span: Span::new(start, self.pos()),
579	comment: comment_text,
580	};
581	self.parser().comments.borrow_mut().push(comment);
582	} else {
583	break;
584	}
585	}
586	}
587
588	/// Peek at the next character in the input without advancing the parser.
589	///
590	/// If the input has been exhausted, then this returns `None`.
591	fn peek(&self) -> Option<char> {
592	if self.is_eof() {
593	return None;
594	}
595	self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
596	}
597
598	/// Like peek, but will ignore spaces when the parser is in whitespace
599	/// insensitive mode.
600	fn peek_space(&self) -> Option<char> {
601	if !self.ignore_whitespace() {
602	return self.peek();
603	}
604	if self.is_eof() {
605	return None;
606	}
607	let mut start = self.offset() + self.char().len_utf8();
608	let mut in_comment = `false`;
609	for (i, c) in self.pattern()[start..].char_indices() {
610	if c.is_whitespace() {
611	continue;
612	} else if !in_comment && c == '#' {
613	in_comment = `true`;
614	} else if in_comment && c == '`\n`' {
615	in_comment = `false`;
616	} else {
617	start += i;
618	break;
619	}
620	}
621	self.pattern()[start..].chars().next()
622	}
623
624	/// Returns true if the next call to `bump` would return false.
625	fn is_eof(&self) -> bool {
626	self.offset() == self.pattern().len()
627	}
628
629	/// Return the current position of the parser, which includes the offset,
630	/// line and column.
631	fn pos(&self) -> Position {
632	self.parser().pos.get()
633	}
634
635	/// Create a span at the current position of the parser. Both the start
636	/// and end of the span are set.
637	fn span(&self) -> Span {
638	Span::splat(self.pos())
639	}
640
641	/// Create a span that covers the current character.
642	fn span_char(&self) -> Span {
643	let mut next = Position {
644	offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
645	line: self.line(),
646	column: self.column().checked_add(`1`).unwrap(),
647	};
648	if self.char() == '`\n`' {
649	next.line += `1`;
650	next.column = `1`;
651	}
652	Span::new(self.pos(), next)
653	}
654
655	/// Parse and push a single alternation on to the parser's internal stack.
656	/// If the top of the stack already has an alternation, then add to that
657	/// instead of pushing a new one.
658	///
659	/// The concatenation given corresponds to a single alternation branch.
660	/// The concatenation returned starts the next branch and is empty.
661	///
662	/// This assumes the parser is currently positioned at `\|` and will advance
663	/// the parser to the character following `\|`.
664	#[inline(never)]
665	fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
666	assert_eq!(self.char(), '\|');
667	concat.span.end = self.pos();
668	self.push_or_add_alternation(concat);
669	self.bump();
670	Ok(ast::Concat { span: self.span(), asts: vec![] })
671	}
672
673	/// Pushes or adds the given branch of an alternation to the parser's
674	/// internal stack of state.
675	fn push_or_add_alternation(&self, concat: ast::Concat) {
676	use self::GroupState::*;
677
678	let mut stack = self.parser().stack_group.borrow_mut();
679	if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
680	alts.asts.push(concat.into_ast());
681	return;
682	}
683	stack.push(Alternation(ast::Alternation {
684	span: Span::new(concat.span.start, self.pos()),
685	asts: vec![concat.into_ast()],
686	}));
687	}
688
689	/// Parse and push a group AST (and its parent concatenation) on to the
690	/// parser's internal stack. Return a fresh concatenation corresponding
691	/// to the group's sub-AST.
692	///
693	/// If a set of flags was found (with no group), then the concatenation
694	/// is returned with that set of flags added.
695	///
696	/// This assumes that the parser is currently positioned on the opening
697	/// parenthesis. It advances the parser to the character at the start
698	/// of the sub-expression (or adjoining expression).
699	///
700	/// If there was a problem parsing the start of the group, then an error
701	/// is returned.
702	#[inline(never)]
703	fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
704	assert_eq!(self.char(), '(');
705	match self.parse_group()? {
706	Either::Left(set) => {
707	let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
708	if let Some(v) = ignore {
709	self.parser().ignore_whitespace.set(v);
710	}
711
712	concat.asts.push(Ast::flags(set));
713	Ok(concat)
714	}
715	Either::Right(group) => {
716	let old_ignore_whitespace = self.ignore_whitespace();
717	let new_ignore_whitespace = group
718	.flags()
719	.and_then(\|f\| f.flag_state(ast::Flag::IgnoreWhitespace))
720	.unwrap_or(old_ignore_whitespace);
721	self.parser().stack_group.borrow_mut().push(
722	GroupState::Group {
723	concat,
724	group,
725	ignore_whitespace: old_ignore_whitespace,
726	},
727	);
728	self.parser().ignore_whitespace.set(new_ignore_whitespace);
729	Ok(ast::Concat { span: self.span(), asts: vec![] })
730	}
731	}
732	}
733
734	/// Pop a group AST from the parser's internal stack and set the group's
735	/// AST to the given concatenation. Return the concatenation containing
736	/// the group.
737	///
738	/// This assumes that the parser is currently positioned on the closing
739	/// parenthesis and advances the parser to the character following the `)`.
740	///
741	/// If no such group could be popped, then an unopened group error is
742	/// returned.
743	#[inline(never)]
744	fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
745	use self::GroupState::*;
746
747	assert_eq!(self.char(), ')');
748	let mut stack = self.parser().stack_group.borrow_mut();
749	let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
750	.pop()
751	{
752	Some(Group { concat, group, ignore_whitespace }) => {
753	(concat, group, ignore_whitespace, None)
754	}
755	Some(Alternation(alt)) => match stack.pop() {
756	Some(Group { concat, group, ignore_whitespace }) => {
757	(concat, group, ignore_whitespace, Some(alt))
758	}
759	None \| Some(Alternation(_)) => {
760	return Err(self.error(
761	self.span_char(),
762	ast::ErrorKind::GroupUnopened,
763	));
764	}
765	},
766	None => {
767	return Err(self
768	.error(self.span_char(), ast::ErrorKind::GroupUnopened));
769	}
770	};
771	self.parser().ignore_whitespace.set(ignore_whitespace);
772	group_concat.span.end = self.pos();
773	self.bump();
774	group.span.end = self.pos();
775	match alt {
776	Some(mut alt) => {
777	alt.span.end = group_concat.span.end;
778	alt.asts.push(group_concat.into_ast());
779	group.ast = Box::new(alt.into_ast());
780	}
781	None => {
782	group.ast = Box::new(group_concat.into_ast());
783	}
784	}
785	prior_concat.asts.push(Ast::group(group));
786	Ok(prior_concat)
787	}
788
789	/// Pop the last state from the parser's internal stack, if it exists, and
790	/// add the given concatenation to it. There either must be no state or a
791	/// single alternation item on the stack. Any other scenario produces an
792	/// error.
793	///
794	/// This assumes that the parser has advanced to the end.
795	#[inline(never)]
796	fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
797	concat.span.end = self.pos();
798	let mut stack = self.parser().stack_group.borrow_mut();
799	let ast = match stack.pop() {
800	None => Ok(concat.into_ast()),
801	Some(GroupState::Alternation(mut alt)) => {
802	alt.span.end = self.pos();
803	alt.asts.push(concat.into_ast());
804	Ok(Ast::alternation(alt))
805	}
806	Some(GroupState::Group { group, .. }) => {
807	return Err(
808	self.error(group.span, ast::ErrorKind::GroupUnclosed)
809	);
810	}
811	};
812	// If we try to pop again, there should be nothing.
813	match stack.pop() {
814	None => ast,
815	Some(GroupState::Alternation(_)) => {
816	// This unreachable is unfortunate. This case can't happen
817	// because the only way we can be here is if there were two
818	// `GroupState::Alternation`s adjacent in the parser's stack,
819	// which we guarantee to never happen because we never push a
820	// `GroupState::Alternation` if one is already at the top of
821	// the stack.
822	unreachable!()
823	}
824	Some(GroupState::Group { group, .. }) => {
825	Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
826	}
827	}
828	}
829
830	/// Parse the opening of a character class and push the current class
831	/// parsing context onto the parser's stack. This assumes that the parser
832	/// is positioned at an opening `[`. The given union should correspond to
833	/// the union of set items built up before seeing the `[`.
834	///
835	/// If there was a problem parsing the opening of the class, then an error
836	/// is returned. Otherwise, a new union of set items for the class is
837	/// returned (which may be populated with either a `]` or a `-`).
838	#[inline(never)]
839	fn push_class_open(
840	&self,
841	parent_union: ast::ClassSetUnion,
842	) -> Result<ast::ClassSetUnion> {
843	assert_eq!(self.char(), '[');
844
845	let (nested_set, nested_union) = self.parse_set_class_open()?;
846	self.parser()
847	.stack_class
848	.borrow_mut()
849	.push(ClassState::Open { union: parent_union, set: nested_set });
850	Ok(nested_union)
851	}
852
853	/// Parse the end of a character class set and pop the character class
854	/// parser stack. The union given corresponds to the last union built
855	/// before seeing the closing `]`. The union returned corresponds to the
856	/// parent character class set with the nested class added to it.
857	///
858	/// This assumes that the parser is positioned at a `]` and will advance
859	/// the parser to the byte immediately following the `]`.
860	///
861	/// If the stack is empty after popping, then this returns the final
862	/// "top-level" character class AST (where a "top-level" character class
863	/// is one that is not nested inside any other character class).
864	///
865	/// If there is no corresponding opening bracket on the parser's stack,
866	/// then an error is returned.
867	#[inline(never)]
868	fn pop_class(
869	&self,
870	nested_union: ast::ClassSetUnion,
871	) -> Result<Either<ast::ClassSetUnion, ast::ClassBracketed>> {
872	assert_eq!(self.char(), ']');
873
874	let item = ast::ClassSet::Item(nested_union.into_item());
875	let prevset = self.pop_class_op(item);
876	let mut stack = self.parser().stack_class.borrow_mut();
877	match stack.pop() {
878	None => {
879	// We can never observe an empty stack:
880	//
881	// 1) We are guaranteed to start with a non-empty stack since
882	// the character class parser is only initiated when it sees
883	// a `[`.
884	// 2) If we ever observe an empty stack while popping after
885	// seeing a `]`, then we signal the character class parser
886	// to terminate.
887	panic!("unexpected empty character class stack")
888	}
889	Some(ClassState::Op { .. }) => {
890	// This panic is unfortunate, but this case is impossible
891	// since we already popped the Op state if one exists above.
892	// Namely, every push to the class parser stack is guarded by
893	// whether an existing Op is already on the top of the stack.
894	// If it is, the existing Op is modified. That is, the stack
895	// can never have consecutive Op states.
896	panic!("unexpected ClassState::Op")
897	}
898	Some(ClassState::Open { mut union, mut set }) => {
899	self.bump();
900	set.span.end = self.pos();
901	set.kind = prevset;
902	if stack.is_empty() {
903	Ok(Either::Right(set))
904	} else {
905	union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
906	Ok(Either::Left(union))
907	}
908	}
909	}
910	}
911
912	/// Return an "unclosed class" error whose span points to the most
913	/// recently opened class.
914	///
915	/// This should only be called while parsing a character class.
916	#[inline(never)]
917	fn unclosed_class_error(&self) -> ast::Error {
918	for state in self.parser().stack_class.borrow().iter().rev() {
919	if let ClassState::Open { ref set, .. } = *state {
920	return self.error(set.span, ast::ErrorKind::ClassUnclosed);
921	}
922	}
923	// We are guaranteed to have a non-empty stack with at least
924	// one open bracket, so we should never get here.
925	panic!("no open character class found")
926	}
927
928	/// Push the current set of class items on to the class parser's stack as
929	/// the left hand side of the given operator.
930	///
931	/// A fresh set union is returned, which should be used to build the right
932	/// hand side of this operator.
933	#[inline(never)]
934	fn push_class_op(
935	&self,
936	next_kind: ast::ClassSetBinaryOpKind,
937	next_union: ast::ClassSetUnion,
938	) -> ast::ClassSetUnion {
939	let item = ast::ClassSet::Item(next_union.into_item());
940	let new_lhs = self.pop_class_op(item);
941	self.parser()
942	.stack_class
943	.borrow_mut()
944	.push(ClassState::Op { kind: next_kind, lhs: new_lhs });
945	ast::ClassSetUnion { span: self.span(), items: vec![] }
946	}
947
948	/// Pop a character class set from the character class parser stack. If the
949	/// top of the stack is just an item (not an operation), then return the
950	/// given set unchanged. If the top of the stack is an operation, then the
951	/// given set will be used as the rhs of the operation on the top of the
952	/// stack. In that case, the binary operation is returned as a set.
953	#[inline(never)]
954	fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
955	let mut stack = self.parser().stack_class.borrow_mut();
956	let (kind, lhs) = match stack.pop() {
957	Some(ClassState::Op { kind, lhs }) => (kind, lhs),
958	Some(state @ ClassState::Open { .. }) => {
959	stack.push(state);
960	return rhs;
961	}
962	None => unreachable!(),
963	};
964	let span = Span::new(lhs.span().start, rhs.span().end);
965	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
966	span,
967	kind,
968	lhs: Box::new(lhs),
969	rhs: Box::new(rhs),
970	})
971	}
972	}
973
974	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
975	/// Parse the regular expression into an abstract syntax tree.
976	fn parse(&self) -> Result<Ast> {
977	self.parse_with_comments().map(\|astc\| astc.ast)
978	}
979
980	/// Parse the regular expression and return an abstract syntax tree with
981	/// all of the comments found in the pattern.
982	fn parse_with_comments(&self) -> Result<ast::WithComments> {
983	assert_eq!(self.offset(), `0`, "parser can only be used once");
984	self.parser().reset();
985	let mut concat = ast::Concat { span: self.span(), asts: vec![] };
986	loop {
987	self.bump_space();
988	if self.is_eof() {
989	break;
990	}
991	match self.char() {
992	'(' => concat = self.push_group(concat)?,
993	')' => concat = self.pop_group(concat)?,
994	'\|' => concat = self.push_alternate(concat)?,
995	'[' => {
996	let class = self.parse_set_class()?;
997	concat.asts.push(Ast::class_bracketed(class));
998	}
999	'?' => {
1000	concat = self.parse_uncounted_repetition(
1001	concat,
1002	ast::RepetitionKind::ZeroOrOne,
1003	)?;
1004	}
1005	'*' => {
1006	concat = self.parse_uncounted_repetition(
1007	concat,
1008	ast::RepetitionKind::ZeroOrMore,
1009	)?;
1010	}
1011	'+' => {
1012	concat = self.parse_uncounted_repetition(
1013	concat,
1014	ast::RepetitionKind::OneOrMore,
1015	)?;
1016	}
1017	'{' => {
1018	concat = self.parse_counted_repetition(concat)?;
1019	}
1020	_ => concat.asts.push(self.parse_primitive()?.into_ast()),
1021	}
1022	}
1023	let ast = self.pop_group_end(concat)?;
1024	NestLimiter::new(self).check(&ast)?;
1025	Ok(ast::WithComments {
1026	ast,
1027	comments: mem::replace(
1028	&mut *self.parser().comments.borrow_mut(),
1029	vec![],
1030	),
1031	})
1032	}
1033
1034	/// Parses an uncounted repetition operation. An uncounted repetition
1035	/// operator includes ?, and +, but does not include the {m,n} syntax.*
1036	/// The given `kind` should correspond to the operator observed by the
1037	/// caller.
1038	///
1039	/// This assumes that the parser is currently positioned at the repetition
1040	/// operator and advances the parser to the first character after the
1041	/// operator. (Note that the operator may include a single additional `?`,
1042	/// which makes the operator ungreedy.)
1043	///
1044	/// The caller should include the concatenation that is being built. The
1045	/// concatenation returned includes the repetition operator applied to the
1046	/// last expression in the given concatenation.
1047	#[inline(never)]
1048	fn parse_uncounted_repetition(
1049	&self,
1050	mut concat: ast::Concat,
1051	kind: ast::RepetitionKind,
1052	) -> Result<ast::Concat> {
1053	assert!(
1054	self.char() == '?' \|\| self.char() == '*' \|\| self.char() == '+'
1055	);
1056	let op_start = self.pos();
1057	let ast = match concat.asts.pop() {
1058	Some(ast) => ast,
1059	None => {
1060	return Err(
1061	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1062	)
1063	}
1064	};
1065	match ast {
1066	Ast::Empty(_) \| Ast::Flags(_) => {
1067	return Err(
1068	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1069	)
1070	}
1071	_ => {}
1072	}
1073	let mut greedy = `true`;
1074	if self.bump() && self.char() == '?' {
1075	greedy = `false`;
1076	self.bump();
1077	}
1078	concat.asts.push(Ast::repetition(ast::Repetition {
1079	span: ast.span().with_end(self.pos()),
1080	op: ast::RepetitionOp {
1081	span: Span::new(op_start, self.pos()),
1082	kind,
1083	},
1084	greedy,
1085	ast: Box::new(ast),
1086	}));
1087	Ok(concat)
1088	}
1089
1090	/// Parses a counted repetition operation. A counted repetition operator
1091	/// corresponds to the {m,n} syntax, and does not include the ?, or +*
1092	/// operators.
1093	///
1094	/// This assumes that the parser is currently positioned at the opening `{`
1095	/// and advances the parser to the first character after the operator.
1096	/// (Note that the operator may include a single additional `?`, which
1097	/// makes the operator ungreedy.)
1098	///
1099	/// The caller should include the concatenation that is being built. The
1100	/// concatenation returned includes the repetition operator applied to the
1101	/// last expression in the given concatenation.
1102	#[inline(never)]
1103	fn parse_counted_repetition(
1104	&self,
1105	mut concat: ast::Concat,
1106	) -> Result<ast::Concat> {
1107	assert!(self.char() == '{');
1108	let start = self.pos();
1109	let ast = match concat.asts.pop() {
1110	Some(ast) => ast,
1111	None => {
1112	return Err(
1113	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1114	)
1115	}
1116	};
1117	match ast {
1118	Ast::Empty(_) \| Ast::Flags(_) => {
1119	return Err(
1120	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1121	)
1122	}
1123	_ => {}
1124	}
1125	if !self.bump_and_bump_space() {
1126	return Err(self.error(
1127	Span::new(start, self.pos()),
1128	ast::ErrorKind::RepetitionCountUnclosed,
1129	));
1130	}
1131	let count_start = specialize_err(
1132	self.parse_decimal(),
1133	ast::ErrorKind::DecimalEmpty,
1134	ast::ErrorKind::RepetitionCountDecimalEmpty,
1135	);
1136	if self.is_eof() {
1137	return Err(self.error(
1138	Span::new(start, self.pos()),
1139	ast::ErrorKind::RepetitionCountUnclosed,
1140	));
1141	}
1142	let range = if self.char() == ',' {
1143	if !self.bump_and_bump_space() {
1144	return Err(self.error(
1145	Span::new(start, self.pos()),
1146	ast::ErrorKind::RepetitionCountUnclosed,
1147	));
1148	}
1149	if self.char() != '}' {
1150	let count_start = match count_start {
1151	Ok(c) => c,
1152	Err(err)
1153	if err.kind
1154	== ast::ErrorKind::RepetitionCountDecimalEmpty =>
1155	{
1156	if self.parser().empty_min_range {
1157	`0`
1158	} else {
1159	return Err(err);
1160	}
1161	}
1162	err => err?,
1163	};
1164	let count_end = specialize_err(
1165	self.parse_decimal(),
1166	ast::ErrorKind::DecimalEmpty,
1167	ast::ErrorKind::RepetitionCountDecimalEmpty,
1168	)?;
1169	ast::RepetitionRange::Bounded(count_start, count_end)
1170	} else {
1171	ast::RepetitionRange::AtLeast(count_start?)
1172	}
1173	} else {
1174	ast::RepetitionRange::Exactly(count_start?)
1175	};
1176
1177	if self.is_eof() \|\| self.char() != '}' {
1178	return Err(self.error(
1179	Span::new(start, self.pos()),
1180	ast::ErrorKind::RepetitionCountUnclosed,
1181	));
1182	}
1183
1184	let mut greedy = `true`;
1185	if self.bump_and_bump_space() && self.char() == '?' {
1186	greedy = `false`;
1187	self.bump();
1188	}
1189
1190	let op_span = Span::new(start, self.pos());
1191	if !range.is_valid() {
1192	return Err(
1193	self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1194	);
1195	}
1196	concat.asts.push(Ast::repetition(ast::Repetition {
1197	span: ast.span().with_end(self.pos()),
1198	op: ast::RepetitionOp {
1199	span: op_span,
1200	kind: ast::RepetitionKind::Range(range),
1201	},
1202	greedy,
1203	ast: Box::new(ast),
1204	}));
1205	Ok(concat)
1206	}
1207
1208	/// Parse a group (which contains a sub-expression) or a set of flags.
1209	///
1210	/// If a group was found, then it is returned with an empty AST. If a set
1211	/// of flags is found, then that set is returned.
1212	///
1213	/// The parser should be positioned at the opening parenthesis.
1214	///
1215	/// This advances the parser to the character before the start of the
1216	/// sub-expression (in the case of a group) or to the closing parenthesis
1217	/// immediately following the set of flags.
1218	///
1219	/// # Errors
1220	///
1221	/// If flags are given and incorrectly specified, then a corresponding
1222	/// error is returned.
1223	///
1224	/// If a capture name is given and it is incorrectly specified, then a
1225	/// corresponding error is returned.
1226	#[inline(never)]
1227	fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1228	assert_eq!(self.char(), '(');
1229	let open_span = self.span_char();
1230	self.bump();
1231	self.bump_space();
1232	if self.is_lookaround_prefix() {
1233	return Err(self.error(
1234	Span::new(open_span.start, self.span().end),
1235	ast::ErrorKind::UnsupportedLookAround,
1236	));
1237	}
1238	let inner_span = self.span();
1239	let mut starts_with_p = `true`;
1240	if self.bump_if("?P<") \|\| {
1241	starts_with_p = `false`;
1242	self.bump_if("?<")
1243	} {
1244	let capture_index = self.next_capture_index(open_span)?;
1245	let name = self.parse_capture_name(capture_index)?;
1246	Ok(Either::Right(ast::Group {
1247	span: open_span,
1248	kind: ast::GroupKind::CaptureName { starts_with_p, name },
1249	ast: Box::new(Ast::empty(self.span())),
1250	}))
1251	} else if self.bump_if("?") {
1252	if self.is_eof() {
1253	return Err(
1254	self.error(open_span, ast::ErrorKind::GroupUnclosed)
1255	);
1256	}
1257	let flags = self.parse_flags()?;
1258	let char_end = self.char();
1259	self.bump();
1260	if char_end == ')' {
1261	// We don't allow empty flags, e.g., `(?)`. We instead
1262	// interpret it as a repetition operator missing its argument.
1263	if flags.items.is_empty() {
1264	return Err(self.error(
1265	inner_span,
1266	ast::ErrorKind::RepetitionMissing,
1267	));
1268	}
1269	Ok(Either::Left(ast::SetFlags {
1270	span: Span { end: self.pos(), ..open_span },
1271	flags,
1272	}))
1273	} else {
1274	assert_eq!(char_end, ':');
1275	Ok(Either::Right(ast::Group {
1276	span: open_span,
1277	kind: ast::GroupKind::NonCapturing(flags),
1278	ast: Box::new(Ast::empty(self.span())),
1279	}))
1280	}
1281	} else {
1282	let capture_index = self.next_capture_index(open_span)?;
1283	Ok(Either::Right(ast::Group {
1284	span: open_span,
1285	kind: ast::GroupKind::CaptureIndex(capture_index),
1286	ast: Box::new(Ast::empty(self.span())),
1287	}))
1288	}
1289	}
1290
1291	/// Parses a capture group name. Assumes that the parser is positioned at
1292	/// the first character in the name following the opening `<` (and may
1293	/// possibly be EOF). This advances the parser to the first character
1294	/// following the closing `>`.
1295	///
1296	/// The caller must provide the capture index of the group for this name.
1297	#[inline(never)]
1298	fn parse_capture_name(
1299	&self,
1300	capture_index: u32,
1301	) -> Result<ast::CaptureName> {
1302	if self.is_eof() {
1303	return Err(self
1304	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1305	}
1306	let start = self.pos();
1307	loop {
1308	if self.char() == '>' {
1309	break;
1310	}
1311	if !is_capture_char(self.char(), self.pos() == start) {
1312	return Err(self.error(
1313	self.span_char(),
1314	ast::ErrorKind::GroupNameInvalid,
1315	));
1316	}
1317	if !self.bump() {
1318	break;
1319	}
1320	}
1321	let end = self.pos();
1322	if self.is_eof() {
1323	return Err(self
1324	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1325	}
1326	assert_eq!(self.char(), '>');
1327	self.bump();
1328	let name = &self.pattern()[start.offset..end.offset];
1329	if name.is_empty() {
1330	return Err(self.error(
1331	Span::new(start, start),
1332	ast::ErrorKind::GroupNameEmpty,
1333	));
1334	}
1335	let capname = ast::CaptureName {
1336	span: Span::new(start, end),
1337	name: name.to_string(),
1338	index: capture_index,
1339	};
1340	self.add_capture_name(&capname)?;
1341	Ok(capname)
1342	}
1343
1344	/// Parse a sequence of flags starting at the current character.
1345	///
1346	/// This advances the parser to the character immediately following the
1347	/// flags, which is guaranteed to be either `:` or `)`.
1348	///
1349	/// # Errors
1350	///
1351	/// If any flags are duplicated, then an error is returned.
1352	///
1353	/// If the negation operator is used more than once, then an error is
1354	/// returned.
1355	///
1356	/// If no flags could be found or if the negation operation is not followed
1357	/// by any flags, then an error is returned.
1358	#[inline(never)]
1359	fn parse_flags(&self) -> Result<ast::Flags> {
1360	let mut flags = ast::Flags { span: self.span(), items: vec![] };
1361	let mut last_was_negation = None;
1362	while self.char() != ':' && self.char() != ')' {
1363	if self.char() == '-' {
1364	last_was_negation = Some(self.span_char());
1365	let item = ast::FlagsItem {
1366	span: self.span_char(),
1367	kind: ast::FlagsItemKind::Negation,
1368	};
1369	if let Some(i) = flags.add_item(item) {
1370	return Err(self.error(
1371	self.span_char(),
1372	ast::ErrorKind::FlagRepeatedNegation {
1373	original: flags.items[i].span,
1374	},
1375	));
1376	}
1377	} else {
1378	last_was_negation = None;
1379	let item = ast::FlagsItem {
1380	span: self.span_char(),
1381	kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1382	};
1383	if let Some(i) = flags.add_item(item) {
1384	return Err(self.error(
1385	self.span_char(),
1386	ast::ErrorKind::FlagDuplicate {
1387	original: flags.items[i].span,
1388	},
1389	));
1390	}
1391	}
1392	if !self.bump() {
1393	return Err(
1394	self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1395	);
1396	}
1397	}
1398	if let Some(span) = last_was_negation {
1399	return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1400	}
1401	flags.span.end = self.pos();
1402	Ok(flags)
1403	}
1404
1405	/// Parse the current character as a flag. Do not advance the parser.
1406	///
1407	/// # Errors
1408	///
1409	/// If the flag is not recognized, then an error is returned.
1410	#[inline(never)]
1411	fn parse_flag(&self) -> Result<ast::Flag> {
1412	match self.char() {
1413	'i' => Ok(ast::Flag::CaseInsensitive),
1414	'm' => Ok(ast::Flag::MultiLine),
1415	's' => Ok(ast::Flag::DotMatchesNewLine),
1416	'U' => Ok(ast::Flag::SwapGreed),
1417	'u' => Ok(ast::Flag::Unicode),
1418	'R' => Ok(ast::Flag::CRLF),
1419	'x' => Ok(ast::Flag::IgnoreWhitespace),
1420	_ => {
1421	Err(self
1422	.error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1423	}
1424	}
1425	}
1426
1427	/// Parse a primitive AST. e.g., A literal, non-set character class or
1428	/// assertion.
1429	///
1430	/// This assumes that the parser expects a primitive at the current
1431	/// location. i.e., All other non-primitive cases have been handled.
1432	/// For example, if the parser's position is at `\|`, then `\|` will be
1433	/// treated as a literal (e.g., inside a character class).
1434	///
1435	/// This advances the parser to the first character immediately following
1436	/// the primitive.
1437	fn parse_primitive(&self) -> Result<Primitive> {
1438	match self.char() {
1439	'`\\`' => self.parse_escape(),
1440	'.' => {
1441	let ast = Primitive::Dot(self.span_char());
1442	self.bump();
1443	Ok(ast)
1444	}
1445	'^' => {
1446	let ast = Primitive::Assertion(ast::Assertion {
1447	span: self.span_char(),
1448	kind: ast::AssertionKind::StartLine,
1449	});
1450	self.bump();
1451	Ok(ast)
1452	}
1453	'$' => {
1454	let ast = Primitive::Assertion(ast::Assertion {
1455	span: self.span_char(),
1456	kind: ast::AssertionKind::EndLine,
1457	});
1458	self.bump();
1459	Ok(ast)
1460	}
1461	c => {
1462	let ast = Primitive::Literal(ast::Literal {
1463	span: self.span_char(),
1464	kind: ast::LiteralKind::Verbatim,
1465	c,
1466	});
1467	self.bump();
1468	Ok(ast)
1469	}
1470	}
1471	}
1472
1473	/// Parse an escape sequence as a primitive AST.
1474	///
1475	/// This assumes the parser is positioned at the start of the escape
1476	/// sequence, i.e., `\`. It advances the parser to the first position
1477	/// immediately following the escape sequence.
1478	#[inline(never)]
1479	fn parse_escape(&self) -> Result<Primitive> {
1480	assert_eq!(self.char(), '`\\`');
1481	let start = self.pos();
1482	if !self.bump() {
1483	return Err(self.error(
1484	Span::new(start, self.pos()),
1485	ast::ErrorKind::EscapeUnexpectedEof,
1486	));
1487	}
1488	let c = self.char();
1489	// Put some of the more complicated routines into helpers.
1490	match c {
1491	'0'..='7' => {
1492	if !self.parser().octal {
1493	return Err(self.error(
1494	Span::new(start, self.span_char().end),
1495	ast::ErrorKind::UnsupportedBackreference,
1496	));
1497	}
1498	let mut lit = self.parse_octal();
1499	lit.span.start = start;
1500	return Ok(Primitive::Literal(lit));
1501	}
1502	'8'..='9' if !self.parser().octal => {
1503	return Err(self.error(
1504	Span::new(start, self.span_char().end),
1505	ast::ErrorKind::UnsupportedBackreference,
1506	));
1507	}
1508	'x' \| 'u' \| 'U' => {
1509	let mut lit = self.parse_hex()?;
1510	lit.span.start = start;
1511	return Ok(Primitive::Literal(lit));
1512	}
1513	'p' \| 'P' => {
1514	let mut cls = self.parse_unicode_class()?;
1515	cls.span.start = start;
1516	return Ok(Primitive::Unicode(cls));
1517	}
1518	'd' \| 's' \| 'w' \| 'D' \| 'S' \| 'W' => {
1519	let mut cls = self.parse_perl_class();
1520	cls.span.start = start;
1521	return Ok(Primitive::Perl(cls));
1522	}
1523	_ => {}
1524	}
1525
1526	// Handle all of the one letter sequences inline.
1527	self.bump();
1528	let span = Span::new(start, self.pos());
1529	if is_meta_character(c) {
1530	return Ok(Primitive::Literal(ast::Literal {
1531	span,
1532	kind: ast::LiteralKind::Meta,
1533	c,
1534	}));
1535	}
1536	if is_escapeable_character(c) {
1537	return Ok(Primitive::Literal(ast::Literal {
1538	span,
1539	kind: ast::LiteralKind::Superfluous,
1540	c,
1541	}));
1542	}
1543	let special = \|kind, c\| {
1544	Ok(Primitive::Literal(ast::Literal {
1545	span,
1546	kind: ast::LiteralKind::Special(kind),
1547	c,
1548	}))
1549	};
1550	match c {
1551	'a' => special(ast::SpecialLiteralKind::Bell, '`\x07`'),
1552	'f' => special(ast::SpecialLiteralKind::FormFeed, '`\x0C`'),
1553	't' => special(ast::SpecialLiteralKind::Tab, '`\t`'),
1554	'n' => special(ast::SpecialLiteralKind::LineFeed, '`\n`'),
1555	'r' => special(ast::SpecialLiteralKind::CarriageReturn, '`\r`'),
1556	'v' => special(ast::SpecialLiteralKind::VerticalTab, '`\x0B`'),
1557	'A' => Ok(Primitive::Assertion(ast::Assertion {
1558	span,
1559	kind: ast::AssertionKind::StartText,
1560	})),
1561	'z' => Ok(Primitive::Assertion(ast::Assertion {
1562	span,
1563	kind: ast::AssertionKind::EndText,
1564	})),
1565	'b' => {
1566	let mut wb = ast::Assertion {
1567	span,
1568	kind: ast::AssertionKind::WordBoundary,
1569	};
1570	// After a \b, we "try" to parse things like \b{start} for
1571	// special word boundary assertions.
1572	if !self.is_eof() && self.char() == '{' {
1573	if let Some(kind) =
1574	self.maybe_parse_special_word_boundary(start)?
1575	{
1576	wb.kind = kind;
1577	wb.span.end = self.pos();
1578	}
1579	}
1580	Ok(Primitive::Assertion(wb))
1581	}
1582	'B' => Ok(Primitive::Assertion(ast::Assertion {
1583	span,
1584	kind: ast::AssertionKind::NotWordBoundary,
1585	})),
1586	'<' => Ok(Primitive::Assertion(ast::Assertion {
1587	span,
1588	kind: ast::AssertionKind::WordBoundaryStartAngle,
1589	})),
1590	'>' => Ok(Primitive::Assertion(ast::Assertion {
1591	span,
1592	kind: ast::AssertionKind::WordBoundaryEndAngle,
1593	})),
1594	_ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1595	}
1596	}
1597
1598	/// Attempt to parse a specialty word boundary. That is, `\b{start}`,
1599	/// `\b{end}`, `\b{start-half}` or `\b{end-half}`.
1600	///
1601	/// This is similar to `maybe_parse_ascii_class` in that, in most cases,
1602	/// if it fails it will just return `None` with no error. This is done
1603	/// because `\b{5}` is a valid expression and we want to let that be parsed
1604	/// by the existing counted repetition parsing code. (I thought about just
1605	/// invoking the counted repetition code from here, but it seemed a little
1606	/// ham-fisted.)
1607	///
1608	/// Unlike `maybe_parse_ascii_class` though, this can return an error.
1609	/// Namely, if we definitely know it isn't a counted repetition, then we
1610	/// return an error specific to the specialty word boundaries.
1611	///
1612	/// This assumes the parser is positioned at a `{` immediately following
1613	/// a `\b`. When `None` is returned, the parser is returned to the position
1614	/// at which it started: pointing at a `{`.
1615	///
1616	/// The position given should correspond to the start of the `\b`.
1617	fn maybe_parse_special_word_boundary(
1618	&self,
1619	wb_start: Position,
1620	) -> Result<Option<ast::AssertionKind>> {
1621	assert_eq!(self.char(), '{');
1622
1623	let is_valid_char = \|c\| match c {
1624	'A'..='Z' \| 'a'..='z' \| '-' => `true`,
1625	_ => `false`,
1626	};
1627	let start = self.pos();
1628	if !self.bump_and_bump_space() {
1629	return Err(self.error(
1630	Span::new(wb_start, self.pos()),
1631	ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
1632	));
1633	}
1634	let start_contents = self.pos();
1635	// This is one of the critical bits: if the first non-whitespace
1636	// character isn't in [-A-Za-z] (i.e., this can't be a special word
1637	// boundary), then we bail and let the counted repetition parser deal
1638	// with this.
1639	if !is_valid_char(self.char()) {
1640	self.parser().pos.set(start);
1641	return Ok(None);
1642	}
1643
1644	// Now collect up our chars until we see a '}'.
1645	let mut scratch = self.parser().scratch.borrow_mut();
1646	scratch.clear();
1647	while !self.is_eof() && is_valid_char(self.char()) {
1648	scratch.push(self.char());
1649	self.bump_and_bump_space();
1650	}
1651	if self.is_eof() \|\| self.char() != '}' {
1652	return Err(self.error(
1653	Span::new(start, self.pos()),
1654	ast::ErrorKind::SpecialWordBoundaryUnclosed,
1655	));
1656	}
1657	let end = self.pos();
1658	self.bump();
1659	let kind = match scratch.as_str() {
1660	"start" => ast::AssertionKind::WordBoundaryStart,
1661	"end" => ast::AssertionKind::WordBoundaryEnd,
1662	"start-half" => ast::AssertionKind::WordBoundaryStartHalf,
1663	"end-half" => ast::AssertionKind::WordBoundaryEndHalf,
1664	_ => {
1665	return Err(self.error(
1666	Span::new(start_contents, end),
1667	ast::ErrorKind::SpecialWordBoundaryUnrecognized,
1668	))
1669	}
1670	};
1671	Ok(Some(kind))
1672	}
1673
1674	/// Parse an octal representation of a Unicode codepoint up to 3 digits
1675	/// long. This expects the parser to be positioned at the first octal
1676	/// digit and advances the parser to the first character immediately
1677	/// following the octal number. This also assumes that parsing octal
1678	/// escapes is enabled.
1679	///
1680	/// Assuming the preconditions are met, this routine can never fail.
1681	#[inline(never)]
1682	fn parse_octal(&self) -> ast::Literal {
1683	assert!(self.parser().octal);
1684	assert!('0' <= self.char() && self.char() <= '7');
1685	let start = self.pos();
1686	// Parse up to two more digits.
1687	while self.bump()
1688	&& '0' <= self.char()
1689	&& self.char() <= '7'
1690	&& self.pos().offset - start.offset <= `2`
1691	{}
1692	let end = self.pos();
1693	let octal = &self.pattern()[start.offset..end.offset];
1694	// Parsing the octal should never fail since the above guarantees a
1695	// valid number.
1696	let codepoint =
1697	u32::from_str_radix(octal, `8`).expect("valid octal number");
1698	// The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1699	// invalid Unicode scalar values.
1700	let c = char::from_u32(codepoint).expect("Unicode scalar value");
1701	ast::Literal {
1702	span: Span::new(start, end),
1703	kind: ast::LiteralKind::Octal,
1704	c,
1705	}
1706	}
1707
1708	/// Parse a hex representation of a Unicode codepoint. This handles both
1709	/// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1710	/// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1711	/// the first character immediately following the hexadecimal literal.
1712	#[inline(never)]
1713	fn parse_hex(&self) -> Result<ast::Literal> {
1714	assert!(
1715	self.char() == 'x' \|\| self.char() == 'u' \|\| self.char() == 'U'
1716	);
1717
1718	let hex_kind = match self.char() {
1719	'x' => ast::HexLiteralKind::X,
1720	'u' => ast::HexLiteralKind::UnicodeShort,
1721	_ => ast::HexLiteralKind::UnicodeLong,
1722	};
1723	if !self.bump_and_bump_space() {
1724	return Err(
1725	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1726	);
1727	}
1728	if self.char() == '{' {
1729	self.parse_hex_brace(hex_kind)
1730	} else {
1731	self.parse_hex_digits(hex_kind)
1732	}
1733	}
1734
1735	/// Parse an N-digit hex representation of a Unicode codepoint. This
1736	/// expects the parser to be positioned at the first digit and will advance
1737	/// the parser to the first character immediately following the escape
1738	/// sequence.
1739	///
1740	/// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1741	/// or 8 (for `\UNNNNNNNN`).
1742	#[inline(never)]
1743	fn parse_hex_digits(
1744	&self,
1745	kind: ast::HexLiteralKind,
1746	) -> Result<ast::Literal> {
1747	let mut scratch = self.parser().scratch.borrow_mut();
1748	scratch.clear();
1749
1750	let start = self.pos();
1751	for i in `0`..kind.digits() {
1752	if i > `0` && !self.bump_and_bump_space() {
1753	return Err(self
1754	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1755	}
1756	if !is_hex(self.char()) {
1757	return Err(self.error(
1758	self.span_char(),
1759	ast::ErrorKind::EscapeHexInvalidDigit,
1760	));
1761	}
1762	scratch.push(self.char());
1763	}
1764	// The final bump just moves the parser past the literal, which may
1765	// be EOF.
1766	self.bump_and_bump_space();
1767	let end = self.pos();
1768	let hex = scratch.as_str();
1769	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1770	None => Err(self.error(
1771	Span::new(start, end),
1772	ast::ErrorKind::EscapeHexInvalid,
1773	)),
1774	Some(c) => Ok(ast::Literal {
1775	span: Span::new(start, end),
1776	kind: ast::LiteralKind::HexFixed(kind),
1777	c,
1778	}),
1779	}
1780	}
1781
1782	/// Parse a hex representation of any Unicode scalar value. This expects
1783	/// the parser to be positioned at the opening brace `{` and will advance
1784	/// the parser to the first character following the closing brace `}`.
1785	#[inline(never)]
1786	fn parse_hex_brace(
1787	&self,
1788	kind: ast::HexLiteralKind,
1789	) -> Result<ast::Literal> {
1790	let mut scratch = self.parser().scratch.borrow_mut();
1791	scratch.clear();
1792
1793	let brace_pos = self.pos();
1794	let start = self.span_char().end;
1795	while self.bump_and_bump_space() && self.char() != '}' {
1796	if !is_hex(self.char()) {
1797	return Err(self.error(
1798	self.span_char(),
1799	ast::ErrorKind::EscapeHexInvalidDigit,
1800	));
1801	}
1802	scratch.push(self.char());
1803	}
1804	if self.is_eof() {
1805	return Err(self.error(
1806	Span::new(brace_pos, self.pos()),
1807	ast::ErrorKind::EscapeUnexpectedEof,
1808	));
1809	}
1810	let end = self.pos();
1811	let hex = scratch.as_str();
1812	assert_eq!(self.char(), '}');
1813	self.bump_and_bump_space();
1814
1815	if hex.is_empty() {
1816	return Err(self.error(
1817	Span::new(brace_pos, self.pos()),
1818	ast::ErrorKind::EscapeHexEmpty,
1819	));
1820	}
1821	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1822	None => Err(self.error(
1823	Span::new(start, end),
1824	ast::ErrorKind::EscapeHexInvalid,
1825	)),
1826	Some(c) => Ok(ast::Literal {
1827	span: Span::new(start, self.pos()),
1828	kind: ast::LiteralKind::HexBrace(kind),
1829	c,
1830	}),
1831	}
1832	}
1833
1834	/// Parse a decimal number into a u32 while trimming leading and trailing
1835	/// whitespace.
1836	///
1837	/// This expects the parser to be positioned at the first position where
1838	/// a decimal digit could occur. This will advance the parser to the byte
1839	/// immediately following the last contiguous decimal digit.
1840	///
1841	/// If no decimal digit could be found or if there was a problem parsing
1842	/// the complete set of digits into a u32, then an error is returned.
1843	fn parse_decimal(&self) -> Result<u32> {
1844	let mut scratch = self.parser().scratch.borrow_mut();
1845	scratch.clear();
1846
1847	while !self.is_eof() && self.char().is_whitespace() {
1848	self.bump();
1849	}
1850	let start = self.pos();
1851	while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1852	scratch.push(self.char());
1853	self.bump_and_bump_space();
1854	}
1855	let span = Span::new(start, self.pos());
1856	while !self.is_eof() && self.char().is_whitespace() {
1857	self.bump_and_bump_space();
1858	}
1859	let digits = scratch.as_str();
1860	if digits.is_empty() {
1861	return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1862	}
1863	match u32::from_str_radix(digits, `10`).ok() {
1864	Some(n) => Ok(n),
1865	None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1866	}
1867	}
1868
1869	/// Parse a standard character class consisting primarily of characters or
1870	/// character ranges, but can also contain nested character classes of
1871	/// any type (sans `.`).
1872	///
1873	/// This assumes the parser is positioned at the opening `[`. If parsing
1874	/// is successful, then the parser is advanced to the position immediately
1875	/// following the closing `]`.
1876	#[inline(never)]
1877	fn parse_set_class(&self) -> Result<ast::ClassBracketed> {
1878	assert_eq!(self.char(), '[');
1879
1880	let mut union =
1881	ast::ClassSetUnion { span: self.span(), items: vec![] };
1882	loop {
1883	self.bump_space();
1884	if self.is_eof() {
1885	return Err(self.unclosed_class_error());
1886	}
1887	match self.char() {
1888	'[' => {
1889	// If we've already parsed the opening bracket, then
1890	// attempt to treat this as the beginning of an ASCII
1891	// class. If ASCII class parsing fails, then the parser
1892	// backs up to `[`.
1893	if !self.parser().stack_class.borrow().is_empty() {
1894	if let Some(cls) = self.maybe_parse_ascii_class() {
1895	union.push(ast::ClassSetItem::Ascii(cls));
1896	continue;
1897	}
1898	}
1899	union = self.push_class_open(union)?;
1900	}
1901	']' => match self.pop_class(union)? {
1902	Either::Left(nested_union) => {
1903	union = nested_union;
1904	}
1905	Either::Right(class) => return Ok(class),
1906	},
1907	'&' if self.peek() == Some('&') => {
1908	assert!(self.bump_if("&&"));
1909	union = self.push_class_op(
1910	ast::ClassSetBinaryOpKind::Intersection,
1911	union,
1912	);
1913	}
1914	'-' if self.peek() == Some('-') => {
1915	assert!(self.bump_if("--"));
1916	union = self.push_class_op(
1917	ast::ClassSetBinaryOpKind::Difference,
1918	union,
1919	);
1920	}
1921	'~' if self.peek() == Some('~') => {
1922	assert!(self.bump_if("~~"));
1923	union = self.push_class_op(
1924	ast::ClassSetBinaryOpKind::SymmetricDifference,
1925	union,
1926	);
1927	}
1928	_ => {
1929	union.push(self.parse_set_class_range()?);
1930	}
1931	}
1932	}
1933	}
1934
1935	/// Parse a single primitive item in a character class set. The item to
1936	/// be parsed can either be one of a simple literal character, a range
1937	/// between two simple literal characters or a "primitive" character
1938	/// class like \w or \p{Greek}.
1939	///
1940	/// If an invalid escape is found, or if a character class is found where
1941	/// a simple literal is expected (e.g., in a range), then an error is
1942	/// returned.
1943	#[inline(never)]
1944	fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1945	let prim1 = self.parse_set_class_item()?;
1946	self.bump_space();
1947	if self.is_eof() {
1948	return Err(self.unclosed_class_error());
1949	}
1950	// If the next char isn't a `-`, then we don't have a range.
1951	// There are two exceptions. If the char after a `-` is a `]`, then
1952	// `-` is interpreted as a literal `-`. Alternatively, if the char
1953	// after a `-` is a `-`, then `--` corresponds to a "difference"
1954	// operation.
1955	if self.char() != '-'
1956	\|\| self.peek_space() == Some(']')
1957	\|\| self.peek_space() == Some('-')
1958	{
1959	return prim1.into_class_set_item(self);
1960	}
1961	// OK, now we're parsing a range, so bump past the `-` and parse the
1962	// second half of the range.
1963	if !self.bump_and_bump_space() {
1964	return Err(self.unclosed_class_error());
1965	}
1966	let prim2 = self.parse_set_class_item()?;
1967	let range = ast::ClassSetRange {
1968	span: Span::new(prim1.span().start, prim2.span().end),
1969	start: prim1.into_class_literal(self)?,
1970	end: prim2.into_class_literal(self)?,
1971	};
1972	if !range.is_valid() {
1973	return Err(
1974	self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1975	);
1976	}
1977	Ok(ast::ClassSetItem::Range(range))
1978	}
1979
1980	/// Parse a single item in a character class as a primitive, where the
1981	/// primitive either consists of a verbatim literal or a single escape
1982	/// sequence.
1983	///
1984	/// This assumes the parser is positioned at the beginning of a primitive,
1985	/// and advances the parser to the first position after the primitive if
1986	/// successful.
1987	///
1988	/// Note that it is the caller's responsibility to report an error if an
1989	/// illegal primitive was parsed.
1990	#[inline(never)]
1991	fn parse_set_class_item(&self) -> Result<Primitive> {
1992	if self.char() == '`\\`' {
1993	self.parse_escape()
1994	} else {
1995	let x = Primitive::Literal(ast::Literal {
1996	span: self.span_char(),
1997	kind: ast::LiteralKind::Verbatim,
1998	c: self.char(),
1999	});
2000	self.bump();
2001	Ok(x)
2002	}
2003	}
2004
2005	/// Parses the opening of a character class set. This includes the opening
2006	/// bracket along with `^` if present to indicate negation. This also
2007	/// starts parsing the opening set of unioned items if applicable, since
2008	/// there are special rules applied to certain characters in the opening
2009	/// of a character class. For example, `[^]]` is the class of all
2010	/// characters not equal to `]`. (`]` would need to be escaped in any other
2011	/// position.) Similarly for `-`.
2012	///
2013	/// In all cases, the op inside the returned `ast::ClassBracketed` is an
2014	/// empty union. This empty union should be replaced with the actual item
2015	/// when it is popped from the parser's stack.
2016	///
2017	/// This assumes the parser is positioned at the opening `[` and advances
2018	/// the parser to the first non-special byte of the character class.
2019	///
2020	/// An error is returned if EOF is found.
2021	#[inline(never)]
2022	fn parse_set_class_open(
2023	&self,
2024	) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
2025	assert_eq!(self.char(), '[');
2026	let start = self.pos();
2027	if !self.bump_and_bump_space() {
2028	return Err(self.error(
2029	Span::new(start, self.pos()),
2030	ast::ErrorKind::ClassUnclosed,
2031	));
2032	}
2033
2034	let negated = if self.char() != '^' {
2035	`false`
2036	} else {
2037	if !self.bump_and_bump_space() {
2038	return Err(self.error(
2039	Span::new(start, self.pos()),
2040	ast::ErrorKind::ClassUnclosed,
2041	));
2042	}
2043	`true`
2044	};
2045	// Accept any number of `-` as literal `-`.
2046	let mut union =
2047	ast::ClassSetUnion { span: self.span(), items: vec![] };
2048	while self.char() == '-' {
2049	union.push(ast::ClassSetItem::Literal(ast::Literal {
2050	span: self.span_char(),
2051	kind: ast::LiteralKind::Verbatim,
2052	c: '-',
2053	}));
2054	if !self.bump_and_bump_space() {
2055	return Err(self.error(
2056	Span::new(start, start),
2057	ast::ErrorKind::ClassUnclosed,
2058	));
2059	}
2060	}
2061	// If `]` is the first* char in a set, then interpret it as a literal*
2062	// `]`. That is, an empty class is impossible to write.
2063	if union.items.is_empty() && self.char() == ']' {
2064	union.push(ast::ClassSetItem::Literal(ast::Literal {
2065	span: self.span_char(),
2066	kind: ast::LiteralKind::Verbatim,
2067	c: ']',
2068	}));
2069	if !self.bump_and_bump_space() {
2070	return Err(self.error(
2071	Span::new(start, self.pos()),
2072	ast::ErrorKind::ClassUnclosed,
2073	));
2074	}
2075	}
2076	let set = ast::ClassBracketed {
2077	span: Span::new(start, self.pos()),
2078	negated,
2079	kind: ast::ClassSet::union(ast::ClassSetUnion {
2080	span: Span::new(union.span.start, union.span.start),
2081	items: vec![],
2082	}),
2083	};
2084	Ok((set, union))
2085	}
2086
2087	/// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
2088	///
2089	/// This assumes the parser is positioned at the opening `[`.
2090	///
2091	/// If no valid ASCII character class could be found, then this does not
2092	/// advance the parser and `None` is returned. Otherwise, the parser is
2093	/// advanced to the first byte following the closing `]` and the
2094	/// corresponding ASCII class is returned.
2095	#[inline(never)]
2096	fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
2097	// ASCII character classes are interesting from a parsing perspective
2098	// because parsing cannot fail with any interesting error. For example,
2099	// in order to use an ASCII character class, it must be enclosed in
2100	// double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
2101	// of it as "ASCII character classes have the syntax `[:NAME:]` which
2102	// can only appear within character brackets." This means that things
2103	// like `[[:lower:]A]` are legal constructs.
2104	//
2105	// However, if one types an incorrect ASCII character class, e.g.,
2106	// `[[:loower:]]`, then we treat that as a normal nested character
2107	// class containing the characters `:elorw`. One might argue that we
2108	// should return an error instead since the repeated colons give away
2109	// the intent to write an ASCII class. But what if the user typed
2110	// `[[:lower]]` instead? How can we tell that was intended to be an
2111	// ASCII class and not just a normal nested class?
2112	//
2113	// Reasonable people can probably disagree over this, but for better
2114	// or worse, we implement semantics that never fails at the expense
2115	// of better failure modes.
2116	assert_eq!(self.char(), '[');
2117	// If parsing fails, then we back up the parser to this starting point.
2118	let start = self.pos();
2119	let mut negated = `false`;
2120	if !self.bump() \|\| self.char() != ':' {
2121	self.parser().pos.set(start);
2122	return None;
2123	}
2124	if !self.bump() {
2125	self.parser().pos.set(start);
2126	return None;
2127	}
2128	if self.char() == '^' {
2129	negated = `true`;
2130	if !self.bump() {
2131	self.parser().pos.set(start);
2132	return None;
2133	}
2134	}
2135	let name_start = self.offset();
2136	while self.char() != ':' && self.bump() {}
2137	if self.is_eof() {
2138	self.parser().pos.set(start);
2139	return None;
2140	}
2141	let name = &self.pattern()[name_start..self.offset()];
2142	if !self.bump_if(":]") {
2143	self.parser().pos.set(start);
2144	return None;
2145	}
2146	let kind = match ast::ClassAsciiKind::from_name(name) {
2147	Some(kind) => kind,
2148	None => {
2149	self.parser().pos.set(start);
2150	return None;
2151	}
2152	};
2153	Some(ast::ClassAscii {
2154	span: Span::new(start, self.pos()),
2155	kind,
2156	negated,
2157	})
2158	}
2159
2160	/// Parse a Unicode class in either the single character notation, `\pN`
2161	/// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2162	/// the parser is positioned at the `p` (or `P` for negation) and will
2163	/// advance the parser to the character immediately following the class.
2164	///
2165	/// Note that this does not check whether the class name is valid or not.
2166	#[inline(never)]
2167	fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2168	assert!(self.char() == 'p' \|\| self.char() == 'P');
2169
2170	let mut scratch = self.parser().scratch.borrow_mut();
2171	scratch.clear();
2172
2173	let negated = self.char() == 'P';
2174	if !self.bump_and_bump_space() {
2175	return Err(
2176	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2177	);
2178	}
2179	let (start, kind) = if self.char() == '{' {
2180	let start = self.span_char().end;
2181	while self.bump_and_bump_space() && self.char() != '}' {
2182	scratch.push(self.char());
2183	}
2184	if self.is_eof() {
2185	return Err(self
2186	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2187	}
2188	assert_eq!(self.char(), '}');
2189	self.bump();
2190
2191	let name = scratch.as_str();
2192	if let Some(i) = name.find("!=") {
2193	(
2194	start,
2195	ast::ClassUnicodeKind::NamedValue {
2196	op: ast::ClassUnicodeOpKind::NotEqual,
2197	name: name[..i].to_string(),
2198	value: name[i + `2`..].to_string(),
2199	},
2200	)
2201	} else if let Some(i) = name.find(':') {
2202	(
2203	start,
2204	ast::ClassUnicodeKind::NamedValue {
2205	op: ast::ClassUnicodeOpKind::Colon,
2206	name: name[..i].to_string(),
2207	value: name[i + `1`..].to_string(),
2208	},
2209	)
2210	} else if let Some(i) = name.find('=') {
2211	(
2212	start,
2213	ast::ClassUnicodeKind::NamedValue {
2214	op: ast::ClassUnicodeOpKind::Equal,
2215	name: name[..i].to_string(),
2216	value: name[i + `1`..].to_string(),
2217	},
2218	)
2219	} else {
2220	(start, ast::ClassUnicodeKind::Named(name.to_string()))
2221	}
2222	} else {
2223	let start = self.pos();
2224	let c = self.char();
2225	if c == '`\\`' {
2226	return Err(self.error(
2227	self.span_char(),
2228	ast::ErrorKind::UnicodeClassInvalid,
2229	));
2230	}
2231	self.bump_and_bump_space();
2232	let kind = ast::ClassUnicodeKind::OneLetter(c);
2233	(start, kind)
2234	};
2235	Ok(ast::ClassUnicode {
2236	span: Span::new(start, self.pos()),
2237	negated,
2238	kind,
2239	})
2240	}
2241
2242	/// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2243	/// parser is currently at a valid character class name and will be
2244	/// advanced to the character immediately following the class.
2245	#[inline(never)]
2246	fn parse_perl_class(&self) -> ast::ClassPerl {
2247	let c = self.char();
2248	let span = self.span_char();
2249	self.bump();
2250	let (negated, kind) = match c {
2251	'd' => (`false`, ast::ClassPerlKind::Digit),
2252	'D' => (`true`, ast::ClassPerlKind::Digit),
2253	's' => (`false`, ast::ClassPerlKind::Space),
2254	'S' => (`true`, ast::ClassPerlKind::Space),
2255	'w' => (`false`, ast::ClassPerlKind::Word),
2256	'W' => (`true`, ast::ClassPerlKind::Word),
2257	c => panic!("expected valid Perl class but got '{}'", c),
2258	};
2259	ast::ClassPerl { span, kind, negated }
2260	}
2261	}
2262
2263	/// A type that traverses a fully parsed Ast and checks whether its depth
2264	/// exceeds the specified nesting limit. If it does, then an error is returned.
2265	#[derive(Debug)]
2266	struct NestLimiter<'p, 's, P> {
2267	/// The parser that is checking the nest limit.
2268	p: &'p ParserI<'s, P>,
2269	/// The current depth while walking an Ast.
2270	depth: u32,
2271	}
2272
2273	impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2274	fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2275	NestLimiter { p, depth: `0` }
2276	}
2277
2278	#[inline(never)]
2279	fn check(self, ast: &Ast) -> Result<()> {
2280	ast::visit(ast, self)
2281	}
2282
2283	fn increment_depth(&mut self, span: &Span) -> Result<()> {
2284	let new = self.depth.checked_add(`1`).ok_or_else(\|\| {
2285	self.p.error(
2286	span.clone(),
2287	ast::ErrorKind::NestLimitExceeded(u32::MAX),
2288	)
2289	})?;
2290	let limit = self.p.parser().nest_limit;
2291	if new > limit {
2292	return Err(self.p.error(
2293	span.clone(),
2294	ast::ErrorKind::NestLimitExceeded(limit),
2295	));
2296	}
2297	self.depth = new;
2298	Ok(())
2299	}
2300
2301	fn decrement_depth(&mut self) {
2302	// Assuming the correctness of the visitor, this should never drop
2303	// below 0.
2304	self.depth = self.depth.checked_sub(`1`).unwrap();
2305	}
2306	}
2307
2308	impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2309	type Output = ();
2310	type Err = ast::Error;
2311
2312	fn finish(self) -> Result<()> {
2313	Ok(())
2314	}
2315
2316	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2317	let span = match *ast {
2318	Ast::Empty(_)
2319	\| Ast::Flags(_)
2320	\| Ast::Literal(_)
2321	\| Ast::Dot(_)
2322	\| Ast::Assertion(_)
2323	\| Ast::ClassUnicode(_)
2324	\| Ast::ClassPerl(_) => {
2325	// These are all base cases, so we don't increment depth.
2326	return Ok(());
2327	}
2328	Ast::ClassBracketed(ref x) => &x.span,
2329	Ast::Repetition(ref x) => &x.span,
2330	Ast::Group(ref x) => &x.span,
2331	Ast::Alternation(ref x) => &x.span,
2332	Ast::Concat(ref x) => &x.span,
2333	};
2334	self.increment_depth(span)
2335	}
2336
2337	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2338	match *ast {
2339	Ast::Empty(_)
2340	\| Ast::Flags(_)
2341	\| Ast::Literal(_)
2342	\| Ast::Dot(_)
2343	\| Ast::Assertion(_)
2344	\| Ast::ClassUnicode(_)
2345	\| Ast::ClassPerl(_) => {
2346	// These are all base cases, so we don't decrement depth.
2347	Ok(())
2348	}
2349	Ast::ClassBracketed(_)
2350	\| Ast::Repetition(_)
2351	\| Ast::Group(_)
2352	\| Ast::Alternation(_)
2353	\| Ast::Concat(_) => {
2354	self.decrement_depth();
2355	Ok(())
2356	}
2357	}
2358	}
2359
2360	fn visit_class_set_item_pre(
2361	&mut self,
2362	ast: &ast::ClassSetItem,
2363	) -> Result<()> {
2364	let span = match *ast {
2365	ast::ClassSetItem::Empty(_)
2366	\| ast::ClassSetItem::Literal(_)
2367	\| ast::ClassSetItem::Range(_)
2368	\| ast::ClassSetItem::Ascii(_)
2369	\| ast::ClassSetItem::Unicode(_)
2370	\| ast::ClassSetItem::Perl(_) => {
2371	// These are all base cases, so we don't increment depth.
2372	return Ok(());
2373	}
2374	ast::ClassSetItem::Bracketed(ref x) => &x.span,
2375	ast::ClassSetItem::Union(ref x) => &x.span,
2376	};
2377	self.increment_depth(span)
2378	}
2379
2380	fn visit_class_set_item_post(
2381	&mut self,
2382	ast: &ast::ClassSetItem,
2383	) -> Result<()> {
2384	match *ast {
2385	ast::ClassSetItem::Empty(_)
2386	\| ast::ClassSetItem::Literal(_)
2387	\| ast::ClassSetItem::Range(_)
2388	\| ast::ClassSetItem::Ascii(_)
2389	\| ast::ClassSetItem::Unicode(_)
2390	\| ast::ClassSetItem::Perl(_) => {
2391	// These are all base cases, so we don't decrement depth.
2392	Ok(())
2393	}
2394	ast::ClassSetItem::Bracketed(_) \| ast::ClassSetItem::Union(_) => {
2395	self.decrement_depth();
2396	Ok(())
2397	}
2398	}
2399	}
2400
2401	fn visit_class_set_binary_op_pre(
2402	&mut self,
2403	ast: &ast::ClassSetBinaryOp,
2404	) -> Result<()> {
2405	self.increment_depth(&ast.span)
2406	}
2407
2408	fn visit_class_set_binary_op_post(
2409	&mut self,
2410	_ast: &ast::ClassSetBinaryOp,
2411	) -> Result<()> {
2412	self.decrement_depth();
2413	Ok(())
2414	}
2415	}
2416
2417	/// When the result is an error, transforms the ast::ErrorKind from the source
2418	/// Result into another one. This function is used to return clearer error
2419	/// messages when possible.
2420	fn specialize_err<T>(
2421	result: Result<T>,
2422	from: ast::ErrorKind,
2423	to: ast::ErrorKind,
2424	) -> Result<T> {
2425	if let Err(e: Error) = result {
2426	if e.kind == from {
2427	Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2428	} else {
2429	Err(e)
2430	}
2431	} else {
2432	result
2433	}
2434	}
2435
2436	#[cfg(test)]
2437	mod tests {
2438	use core::ops::Range;
2439
2440	use alloc::format;
2441
2442	use super::*;
2443
2444	// Our own assert_eq, which has slightly better formatting (but honestly
2445	// still kind of crappy).
2446	macro_rules! assert_eq {
2447	($left:expr, $right:expr) => {{
2448	match (&$left, &$right) {
2449	(left_val, right_val) => {
2450	if !(left_val == right_val) {
2451	panic!(
2452	"assertion failed: `(left == right)``\n\n`\
2453	left: `{:?}``\n`right: `{:?}``\n\n`",
2454	left_val, right_val
2455	)
2456	}
2457	}
2458	}
2459	}};
2460	}
2461
2462	// We create these errors to compare with real ast::Errors in the tests.
2463	// We define equality between TestError and ast::Error to disregard the
2464	// pattern string in ast::Error, which is annoying to provide in tests.
2465	#[derive(Clone, Debug)]
2466	struct TestError {
2467	span: Span,
2468	kind: ast::ErrorKind,
2469	}
2470
2471	impl PartialEq<ast::Error> for TestError {
2472	fn eq(&self, other: &ast::Error) -> bool {
2473	self.span == other.span && self.kind == other.kind
2474	}
2475	}
2476
2477	impl PartialEq<TestError> for ast::Error {
2478	fn eq(&self, other: &TestError) -> bool {
2479	self.span == other.span && self.kind == other.kind
2480	}
2481	}
2482
2483	fn s(str: &str) -> String {
2484	str.to_string()
2485	}
2486
2487	fn parser(pattern: &str) -> ParserI<'_, Parser> {
2488	ParserI::new(Parser::new(), pattern)
2489	}
2490
2491	fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2492	let parser = ParserBuilder::new().octal(`true`).build();
2493	ParserI::new(parser, pattern)
2494	}
2495
2496	fn parser_empty_min_range(pattern: &str) -> ParserI<'_, Parser> {
2497	let parser = ParserBuilder::new().empty_min_range(`true`).build();
2498	ParserI::new(parser, pattern)
2499	}
2500
2501	fn parser_nest_limit(
2502	pattern: &str,
2503	nest_limit: u32,
2504	) -> ParserI<'_, Parser> {
2505	let p = ParserBuilder::new().nest_limit(nest_limit).build();
2506	ParserI::new(p, pattern)
2507	}
2508
2509	fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2510	let p = ParserBuilder::new().ignore_whitespace(`true`).build();
2511	ParserI::new(p, pattern)
2512	}
2513
2514	/// Short alias for creating a new span.
2515	fn nspan(start: Position, end: Position) -> Span {
2516	Span::new(start, end)
2517	}
2518
2519	/// Short alias for creating a new position.
2520	fn npos(offset: usize, line: usize, column: usize) -> Position {
2521	Position::new(offset, line, column)
2522	}
2523
2524	/// Create a new span from the given offset range. This assumes a single
2525	/// line and sets the columns based on the offsets. i.e., This only works
2526	/// out of the box for ASCII, which is fine for most tests.
2527	fn span(range: Range<usize>) -> Span {
2528	let start = Position::new(range.start, `1`, range.start + `1`);
2529	let end = Position::new(range.end, `1`, range.end + `1`);
2530	Span::new(start, end)
2531	}
2532
2533	/// Create a new span for the corresponding byte range in the given string.
2534	fn span_range(subject: &str, range: Range<usize>) -> Span {
2535	let start = Position {
2536	offset: range.start,
2537	line: `1` + subject[..range.start].matches('`\n`').count(),
2538	column: `1` + subject[..range.start]
2539	.chars()
2540	.rev()
2541	.position(\|c\| c == '`\n`')
2542	.unwrap_or(subject[..range.start].chars().count()),
2543	};
2544	let end = Position {
2545	offset: range.end,
2546	line: `1` + subject[..range.end].matches('`\n`').count(),
2547	column: `1` + subject[..range.end]
2548	.chars()
2549	.rev()
2550	.position(\|c\| c == '`\n`')
2551	.unwrap_or(subject[..range.end].chars().count()),
2552	};
2553	Span::new(start, end)
2554	}
2555
2556	/// Create a verbatim literal starting at the given position.
2557	fn lit(c: char, start: usize) -> Ast {
2558	lit_with(c, span(start..start + c.len_utf8()))
2559	}
2560
2561	/// Create a meta literal starting at the given position.
2562	fn meta_lit(c: char, span: Span) -> Ast {
2563	Ast::literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
2564	}
2565
2566	/// Create a verbatim literal with the given span.
2567	fn lit_with(c: char, span: Span) -> Ast {
2568	Ast::literal(ast::Literal {
2569	span,
2570	kind: ast::LiteralKind::Verbatim,
2571	c,
2572	})
2573	}
2574
2575	/// Create a concatenation with the given range.
2576	fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2577	concat_with(span(range), asts)
2578	}
2579
2580	/// Create a concatenation with the given span.
2581	fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2582	Ast::concat(ast::Concat { span, asts })
2583	}
2584
2585	/// Create an alternation with the given span.
2586	fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2587	Ast::alternation(ast::Alternation { span: span(range), asts })
2588	}
2589
2590	/// Create a capturing group with the given span.
2591	fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2592	Ast::group(ast::Group {
2593	span: span(range),
2594	kind: ast::GroupKind::CaptureIndex(index),
2595	ast: Box::new(ast),
2596	})
2597	}
2598
2599	/// Create an ast::SetFlags.
2600	///
2601	/// The given pattern should be the full pattern string. The range given
2602	/// should correspond to the byte offsets where the flag set occurs.
2603	///
2604	/// If negated is true, then the set is interpreted as beginning with a
2605	/// negation.
2606	fn flag_set(
2607	pat: &str,
2608	range: Range<usize>,
2609	flag: ast::Flag,
2610	negated: bool,
2611	) -> Ast {
2612	let mut items = vec![ast::FlagsItem {
2613	span: span_range(pat, (range.end - `2`)..(range.end - `1`)),
2614	kind: ast::FlagsItemKind::Flag(flag),
2615	}];
2616	if negated {
2617	items.insert(
2618	`0`,
2619	ast::FlagsItem {
2620	span: span_range(pat, (range.start + `2`)..(range.end - `2`)),
2621	kind: ast::FlagsItemKind::Negation,
2622	},
2623	);
2624	}
2625	Ast::flags(ast::SetFlags {
2626	span: span_range(pat, range.clone()),
2627	flags: ast::Flags {
2628	span: span_range(pat, (range.start + `2`)..(range.end - `1`)),
2629	items,
2630	},
2631	})
2632	}
2633
2634	#[test]
2635	fn parse_nest_limit() {
2636	// A nest limit of 0 still allows some types of regexes.
2637	assert_eq!(
2638	parser_nest_limit("", `0`).parse(),
2639	Ok(Ast::empty(span(`0`..`0`)))
2640	);
2641	assert_eq!(parser_nest_limit("a", `0`).parse(), Ok(lit('a', `0`)));
2642
2643	// Test repetition operations, which require one level of nesting.
2644	assert_eq!(
2645	parser_nest_limit("a+", `0`).parse().unwrap_err(),
2646	TestError {
2647	span: span(`0`..`2`),
2648	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2649	}
2650	);
2651	assert_eq!(
2652	parser_nest_limit("a+", `1`).parse(),
2653	Ok(Ast::repetition(ast::Repetition {
2654	span: span(`0`..`2`),
2655	op: ast::RepetitionOp {
2656	span: span(`1`..`2`),
2657	kind: ast::RepetitionKind::OneOrMore,
2658	},
2659	greedy: `true`,
2660	ast: Box::new(lit('a', `0`)),
2661	}))
2662	);
2663	assert_eq!(
2664	parser_nest_limit("(a)+", `1`).parse().unwrap_err(),
2665	TestError {
2666	span: span(`0`..`3`),
2667	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2668	}
2669	);
2670	assert_eq!(
2671	parser_nest_limit("a+*", `1`).parse().unwrap_err(),
2672	TestError {
2673	span: span(`0`..`2`),
2674	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2675	}
2676	);
2677	assert_eq!(
2678	parser_nest_limit("a+*", `2`).parse(),
2679	Ok(Ast::repetition(ast::Repetition {
2680	span: span(`0`..`3`),
2681	op: ast::RepetitionOp {
2682	span: span(`2`..`3`),
2683	kind: ast::RepetitionKind::ZeroOrMore,
2684	},
2685	greedy: `true`,
2686	ast: Box::new(Ast::repetition(ast::Repetition {
2687	span: span(`0`..`2`),
2688	op: ast::RepetitionOp {
2689	span: span(`1`..`2`),
2690	kind: ast::RepetitionKind::OneOrMore,
2691	},
2692	greedy: `true`,
2693	ast: Box::new(lit('a', `0`)),
2694	})),
2695	}))
2696	);
2697
2698	// Test concatenations. A concatenation requires one level of nesting.
2699	assert_eq!(
2700	parser_nest_limit("ab", `0`).parse().unwrap_err(),
2701	TestError {
2702	span: span(`0`..`2`),
2703	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2704	}
2705	);
2706	assert_eq!(
2707	parser_nest_limit("ab", `1`).parse(),
2708	Ok(concat(`0`..`2`, vec![lit('a', `0`), lit('b', `1`)]))
2709	);
2710	assert_eq!(
2711	parser_nest_limit("abc", `1`).parse(),
2712	Ok(concat(`0`..`3`, vec![lit('a', `0`), lit('b', `1`), lit('c', `2`)]))
2713	);
2714
2715	// Test alternations. An alternation requires one level of nesting.
2716	assert_eq!(
2717	parser_nest_limit("a\|b", `0`).parse().unwrap_err(),
2718	TestError {
2719	span: span(`0`..`3`),
2720	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2721	}
2722	);
2723	assert_eq!(
2724	parser_nest_limit("a\|b", `1`).parse(),
2725	Ok(alt(`0`..`3`, vec![lit('a', `0`), lit('b', `2`)]))
2726	);
2727	assert_eq!(
2728	parser_nest_limit("a\|b\|c", `1`).parse(),
2729	Ok(alt(`0`..`5`, vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)]))
2730	);
2731
2732	// Test character classes. Classes form their own mini-recursive
2733	// syntax!
2734	assert_eq!(
2735	parser_nest_limit("[a]", `0`).parse().unwrap_err(),
2736	TestError {
2737	span: span(`0`..`3`),
2738	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2739	}
2740	);
2741	assert_eq!(
2742	parser_nest_limit("[a]", `1`).parse(),
2743	Ok(Ast::class_bracketed(ast::ClassBracketed {
2744	span: span(`0`..`3`),
2745	negated: `false`,
2746	kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2747	ast::Literal {
2748	span: span(`1`..`2`),
2749	kind: ast::LiteralKind::Verbatim,
2750	c: 'a',
2751	}
2752	)),
2753	}))
2754	);
2755	assert_eq!(
2756	parser_nest_limit("[ab]", `1`).parse().unwrap_err(),
2757	TestError {
2758	span: span(`1`..`3`),
2759	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2760	}
2761	);
2762	assert_eq!(
2763	parser_nest_limit("[ab[cd]]", `2`).parse().unwrap_err(),
2764	TestError {
2765	span: span(`3`..`7`),
2766	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2767	}
2768	);
2769	assert_eq!(
2770	parser_nest_limit("[ab[cd]]", `3`).parse().unwrap_err(),
2771	TestError {
2772	span: span(`4`..`6`),
2773	kind: ast::ErrorKind::NestLimitExceeded(`3`),
2774	}
2775	);
2776	assert_eq!(
2777	parser_nest_limit("[a--b]", `1`).parse().unwrap_err(),
2778	TestError {
2779	span: span(`1`..`5`),
2780	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2781	}
2782	);
2783	assert_eq!(
2784	parser_nest_limit("[a--bc]", `2`).parse().unwrap_err(),
2785	TestError {
2786	span: span(`4`..`6`),
2787	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2788	}
2789	);
2790	}
2791
2792	#[test]
2793	fn parse_comments() {
2794	let pat = "(?x)
2795	# This is comment 1.
2796	foo # This is comment 2.
2797	# This is comment 3.
2798	bar
2799	# This is comment 4.";
2800	let astc = parser(pat).parse_with_comments().unwrap();
2801	assert_eq!(
2802	astc.ast,
2803	concat_with(
2804	span_range(pat, `0`..pat.len()),
2805	vec![
2806	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2807	lit_with('f', span_range(pat, `26`..`27`)),
2808	lit_with('o', span_range(pat, `27`..`28`)),
2809	lit_with('o', span_range(pat, `28`..`29`)),
2810	lit_with('b', span_range(pat, `74`..`75`)),
2811	lit_with('a', span_range(pat, `75`..`76`)),
2812	lit_with('r', span_range(pat, `76`..`77`)),
2813	]
2814	)
2815	);
2816	assert_eq!(
2817	astc.comments,
2818	vec![
2819	ast::Comment {
2820	span: span_range(pat, `5`..`26`),
2821	comment: s(" This is comment 1."),
2822	},
2823	ast::Comment {
2824	span: span_range(pat, `30`..`51`),
2825	comment: s(" This is comment 2."),
2826	},
2827	ast::Comment {
2828	span: span_range(pat, `53`..`74`),
2829	comment: s(" This is comment 3."),
2830	},
2831	ast::Comment {
2832	span: span_range(pat, `78`..`98`),
2833	comment: s(" This is comment 4."),
2834	},
2835	]
2836	);
2837	}
2838
2839	#[test]
2840	fn parse_holistic() {
2841	assert_eq!(parser("]").parse(), Ok(lit(']', `0`)));
2842	assert_eq!(
2843	parser(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#\&\-\~").parse(),
2844	Ok(concat(
2845	`0`..`36`,
2846	vec![
2847	meta_lit('`\\`', span(`0`..`2`)),
2848	meta_lit('.', span(`2`..`4`)),
2849	meta_lit('+', span(`4`..`6`)),
2850	meta_lit('*', span(`6`..`8`)),
2851	meta_lit('?', span(`8`..`10`)),
2852	meta_lit('(', span(`10`..`12`)),
2853	meta_lit(')', span(`12`..`14`)),
2854	meta_lit('\|', span(`14`..`16`)),
2855	meta_lit('[', span(`16`..`18`)),
2856	meta_lit(']', span(`18`..`20`)),
2857	meta_lit('{', span(`20`..`22`)),
2858	meta_lit('}', span(`22`..`24`)),
2859	meta_lit('^', span(`24`..`26`)),
2860	meta_lit('$', span(`26`..`28`)),
2861	meta_lit('#', span(`28`..`30`)),
2862	meta_lit('&', span(`30`..`32`)),
2863	meta_lit('-', span(`32`..`34`)),
2864	meta_lit('~', span(`34`..`36`)),
2865	]
2866	))
2867	);
2868	}
2869
2870	#[test]
2871	fn parse_ignore_whitespace() {
2872	// Test that basic whitespace insensitivity works.
2873	let pat = "(?x)a b";
2874	assert_eq!(
2875	parser(pat).parse(),
2876	Ok(concat_with(
2877	nspan(npos(`0`, `1`, `1`), npos(`7`, `1`, `8`)),
2878	vec![
2879	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2880	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2881	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2882	]
2883	))
2884	);
2885
2886	// Test that we can toggle whitespace insensitivity.
2887	let pat = "(?x)a b(?-x)a b";
2888	assert_eq!(
2889	parser(pat).parse(),
2890	Ok(concat_with(
2891	nspan(npos(`0`, `1`, `1`), npos(`15`, `1`, `16`)),
2892	vec![
2893	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2894	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2895	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2896	flag_set(pat, `7`..`12`, ast::Flag::IgnoreWhitespace, `true`),
2897	lit_with('a', nspan(npos(`12`, `1`, `13`), npos(`13`, `1`, `14`))),
2898	lit_with(' ', nspan(npos(`13`, `1`, `14`), npos(`14`, `1`, `15`))),
2899	lit_with('b', nspan(npos(`14`, `1`, `15`), npos(`15`, `1`, `16`))),
2900	]
2901	))
2902	);
2903
2904	// Test that nesting whitespace insensitive flags works.
2905	let pat = "a (?x:a )a ";
2906	assert_eq!(
2907	parser(pat).parse(),
2908	Ok(concat_with(
2909	span_range(pat, `0`..`11`),
2910	vec![
2911	lit_with('a', span_range(pat, `0`..`1`)),
2912	lit_with(' ', span_range(pat, `1`..`2`)),
2913	Ast::group(ast::Group {
2914	span: span_range(pat, `2`..`9`),
2915	kind: ast::GroupKind::NonCapturing(ast::Flags {
2916	span: span_range(pat, `4`..`5`),
2917	items: vec![ast::FlagsItem {
2918	span: span_range(pat, `4`..`5`),
2919	kind: ast::FlagsItemKind::Flag(
2920	ast::Flag::IgnoreWhitespace
2921	),
2922	},],
2923	}),
2924	ast: Box::new(lit_with('a', span_range(pat, `6`..`7`))),
2925	}),
2926	lit_with('a', span_range(pat, `9`..`10`)),
2927	lit_with(' ', span_range(pat, `10`..`11`)),
2928	]
2929	))
2930	);
2931
2932	// Test that whitespace after an opening paren is insignificant.
2933	let pat = "(?x)( ?P<foo> a )";
2934	assert_eq!(
2935	parser(pat).parse(),
2936	Ok(concat_with(
2937	span_range(pat, `0`..pat.len()),
2938	vec![
2939	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2940	Ast::group(ast::Group {
2941	span: span_range(pat, `4`..pat.len()),
2942	kind: ast::GroupKind::CaptureName {
2943	starts_with_p: `true`,
2944	name: ast::CaptureName {
2945	span: span_range(pat, `9`..`12`),
2946	name: s("foo"),
2947	index: `1`,
2948	}
2949	},
2950	ast: Box::new(lit_with('a', span_range(pat, `14`..`15`))),
2951	}),
2952	]
2953	))
2954	);
2955	let pat = "(?x)( a )";
2956	assert_eq!(
2957	parser(pat).parse(),
2958	Ok(concat_with(
2959	span_range(pat, `0`..pat.len()),
2960	vec![
2961	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2962	Ast::group(ast::Group {
2963	span: span_range(pat, `4`..pat.len()),
2964	kind: ast::GroupKind::CaptureIndex(`1`),
2965	ast: Box::new(lit_with('a', span_range(pat, `7`..`8`))),
2966	}),
2967	]
2968	))
2969	);
2970	let pat = "(?x)( ?: a )";
2971	assert_eq!(
2972	parser(pat).parse(),
2973	Ok(concat_with(
2974	span_range(pat, `0`..pat.len()),
2975	vec![
2976	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2977	Ast::group(ast::Group {
2978	span: span_range(pat, `4`..pat.len()),
2979	kind: ast::GroupKind::NonCapturing(ast::Flags {
2980	span: span_range(pat, `8`..`8`),
2981	items: vec![],
2982	}),
2983	ast: Box::new(lit_with('a', span_range(pat, `11`..`12`))),
2984	}),
2985	]
2986	))
2987	);
2988	let pat = r"(?x)\x { 53 }";
2989	assert_eq!(
2990	parser(pat).parse(),
2991	Ok(concat_with(
2992	span_range(pat, `0`..pat.len()),
2993	vec![
2994	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2995	Ast::literal(ast::Literal {
2996	span: span(`4`..`13`),
2997	kind: ast::LiteralKind::HexBrace(
2998	ast::HexLiteralKind::X
2999	),
3000	c: 'S',
3001	}),
3002	]
3003	))
3004	);
3005
3006	// Test that whitespace after an escape is OK.
3007	let pat = r"(?x)\ ";
3008	assert_eq!(
3009	parser(pat).parse(),
3010	Ok(concat_with(
3011	span_range(pat, `0`..pat.len()),
3012	vec![
3013	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
3014	Ast::literal(ast::Literal {
3015	span: span_range(pat, `4`..`6`),
3016	kind: ast::LiteralKind::Superfluous,
3017	c: ' ',
3018	}),
3019	]
3020	))
3021	);
3022	}
3023
3024	#[test]
3025	fn parse_newlines() {
3026	let pat = ".`\n`.";
3027	assert_eq!(
3028	parser(pat).parse(),
3029	Ok(concat_with(
3030	span_range(pat, `0`..`3`),
3031	vec![
3032	Ast::dot(span_range(pat, `0`..`1`)),
3033	lit_with('`\n`', span_range(pat, `1`..`2`)),
3034	Ast::dot(span_range(pat, `2`..`3`)),
3035	]
3036	))
3037	);
3038
3039	let pat = "foobar`\n`baz`\n`quux`\n`";
3040	assert_eq!(
3041	parser(pat).parse(),
3042	Ok(concat_with(
3043	span_range(pat, `0`..pat.len()),
3044	vec![
3045	lit_with('f', nspan(npos(`0`, `1`, `1`), npos(`1`, `1`, `2`))),
3046	lit_with('o', nspan(npos(`1`, `1`, `2`), npos(`2`, `1`, `3`))),
3047	lit_with('o', nspan(npos(`2`, `1`, `3`), npos(`3`, `1`, `4`))),
3048	lit_with('b', nspan(npos(`3`, `1`, `4`), npos(`4`, `1`, `5`))),
3049	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
3050	lit_with('r', nspan(npos(`5`, `1`, `6`), npos(`6`, `1`, `7`))),
3051	lit_with('`\n`', nspan(npos(`6`, `1`, `7`), npos(`7`, `2`, `1`))),
3052	lit_with('b', nspan(npos(`7`, `2`, `1`), npos(`8`, `2`, `2`))),
3053	lit_with('a', nspan(npos(`8`, `2`, `2`), npos(`9`, `2`, `3`))),
3054	lit_with('z', nspan(npos(`9`, `2`, `3`), npos(`10`, `2`, `4`))),
3055	lit_with('`\n`', nspan(npos(`10`, `2`, `4`), npos(`11`, `3`, `1`))),
3056	lit_with('q', nspan(npos(`11`, `3`, `1`), npos(`12`, `3`, `2`))),
3057	lit_with('u', nspan(npos(`12`, `3`, `2`), npos(`13`, `3`, `3`))),
3058	lit_with('u', nspan(npos(`13`, `3`, `3`), npos(`14`, `3`, `4`))),
3059	lit_with('x', nspan(npos(`14`, `3`, `4`), npos(`15`, `3`, `5`))),
3060	lit_with('`\n`', nspan(npos(`15`, `3`, `5`), npos(`16`, `4`, `1`))),
3061	]
3062	))
3063	);
3064	}
3065
3066	#[test]
3067	fn parse_uncounted_repetition() {
3068	assert_eq!(
3069	parser(r"a*").parse(),
3070	Ok(Ast::repetition(ast::Repetition {
3071	span: span(`0`..`2`),
3072	op: ast::RepetitionOp {
3073	span: span(`1`..`2`),
3074	kind: ast::RepetitionKind::ZeroOrMore,
3075	},
3076	greedy: `true`,
3077	ast: Box::new(lit('a', `0`)),
3078	}))
3079	);
3080	assert_eq!(
3081	parser(r"a+").parse(),
3082	Ok(Ast::repetition(ast::Repetition {
3083	span: span(`0`..`2`),
3084	op: ast::RepetitionOp {
3085	span: span(`1`..`2`),
3086	kind: ast::RepetitionKind::OneOrMore,
3087	},
3088	greedy: `true`,
3089	ast: Box::new(lit('a', `0`)),
3090	}))
3091	);
3092
3093	assert_eq!(
3094	parser(r"a?").parse(),
3095	Ok(Ast::repetition(ast::Repetition {
3096	span: span(`0`..`2`),
3097	op: ast::RepetitionOp {
3098	span: span(`1`..`2`),
3099	kind: ast::RepetitionKind::ZeroOrOne,
3100	},
3101	greedy: `true`,
3102	ast: Box::new(lit('a', `0`)),
3103	}))
3104	);
3105	assert_eq!(
3106	parser(r"a??").parse(),
3107	Ok(Ast::repetition(ast::Repetition {
3108	span: span(`0`..`3`),
3109	op: ast::RepetitionOp {
3110	span: span(`1`..`3`),
3111	kind: ast::RepetitionKind::ZeroOrOne,
3112	},
3113	greedy: `false`,
3114	ast: Box::new(lit('a', `0`)),
3115	}))
3116	);
3117	assert_eq!(
3118	parser(r"a?").parse(),
3119	Ok(Ast::repetition(ast::Repetition {
3120	span: span(`0`..`2`),
3121	op: ast::RepetitionOp {
3122	span: span(`1`..`2`),
3123	kind: ast::RepetitionKind::ZeroOrOne,
3124	},
3125	greedy: `true`,
3126	ast: Box::new(lit('a', `0`)),
3127	}))
3128	);
3129	assert_eq!(
3130	parser(r"a?b").parse(),
3131	Ok(concat(
3132	`0`..`3`,
3133	vec![
3134	Ast::repetition(ast::Repetition {
3135	span: span(`0`..`2`),
3136	op: ast::RepetitionOp {
3137	span: span(`1`..`2`),
3138	kind: ast::RepetitionKind::ZeroOrOne,
3139	},
3140	greedy: `true`,
3141	ast: Box::new(lit('a', `0`)),
3142	}),
3143	lit('b', `2`),
3144	]
3145	))
3146	);
3147	assert_eq!(
3148	parser(r"a??b").parse(),
3149	Ok(concat(
3150	`0`..`4`,
3151	vec![
3152	Ast::repetition(ast::Repetition {
3153	span: span(`0`..`3`),
3154	op: ast::RepetitionOp {
3155	span: span(`1`..`3`),
3156	kind: ast::RepetitionKind::ZeroOrOne,
3157	},
3158	greedy: `false`,
3159	ast: Box::new(lit('a', `0`)),
3160	}),
3161	lit('b', `3`),
3162	]
3163	))
3164	);
3165	assert_eq!(
3166	parser(r"ab?").parse(),
3167	Ok(concat(
3168	`0`..`3`,
3169	vec![
3170	lit('a', `0`),
3171	Ast::repetition(ast::Repetition {
3172	span: span(`1`..`3`),
3173	op: ast::RepetitionOp {
3174	span: span(`2`..`3`),
3175	kind: ast::RepetitionKind::ZeroOrOne,
3176	},
3177	greedy: `true`,
3178	ast: Box::new(lit('b', `1`)),
3179	}),
3180	]
3181	))
3182	);
3183	assert_eq!(
3184	parser(r"(ab)?").parse(),
3185	Ok(Ast::repetition(ast::Repetition {
3186	span: span(`0`..`5`),
3187	op: ast::RepetitionOp {
3188	span: span(`4`..`5`),
3189	kind: ast::RepetitionKind::ZeroOrOne,
3190	},
3191	greedy: `true`,
3192	ast: Box::new(group(
3193	`0`..`4`,
3194	`1`,
3195	concat(`1`..`3`, vec![lit('a', `1`), lit('b', `2`),])
3196	)),
3197	}))
3198	);
3199	assert_eq!(
3200	parser(r"\|a?").parse(),
3201	Ok(alt(
3202	`0`..`3`,
3203	vec![
3204	Ast::empty(span(`0`..`0`)),
3205	Ast::repetition(ast::Repetition {
3206	span: span(`1`..`3`),
3207	op: ast::RepetitionOp {
3208	span: span(`2`..`3`),
3209	kind: ast::RepetitionKind::ZeroOrOne,
3210	},
3211	greedy: `true`,
3212	ast: Box::new(lit('a', `1`)),
3213	}),
3214	]
3215	))
3216	);
3217
3218	assert_eq!(
3219	parser(r"*").parse().unwrap_err(),
3220	TestError {
3221	span: span(`0`..`0`),
3222	kind: ast::ErrorKind::RepetitionMissing,
3223	}
3224	);
3225	assert_eq!(
3226	parser(r"(?i)*").parse().unwrap_err(),
3227	TestError {
3228	span: span(`4`..`4`),
3229	kind: ast::ErrorKind::RepetitionMissing,
3230	}
3231	);
3232	assert_eq!(
3233	parser(r"(*)").parse().unwrap_err(),
3234	TestError {
3235	span: span(`1`..`1`),
3236	kind: ast::ErrorKind::RepetitionMissing,
3237	}
3238	);
3239	assert_eq!(
3240	parser(r"(?:?)").parse().unwrap_err(),
3241	TestError {
3242	span: span(`3`..`3`),
3243	kind: ast::ErrorKind::RepetitionMissing,
3244	}
3245	);
3246	assert_eq!(
3247	parser(r"+").parse().unwrap_err(),
3248	TestError {
3249	span: span(`0`..`0`),
3250	kind: ast::ErrorKind::RepetitionMissing,
3251	}
3252	);
3253	assert_eq!(
3254	parser(r"?").parse().unwrap_err(),
3255	TestError {
3256	span: span(`0`..`0`),
3257	kind: ast::ErrorKind::RepetitionMissing,
3258	}
3259	);
3260	assert_eq!(
3261	parser(r"(?)").parse().unwrap_err(),
3262	TestError {
3263	span: span(`1`..`1`),
3264	kind: ast::ErrorKind::RepetitionMissing,
3265	}
3266	);
3267	assert_eq!(
3268	parser(r"\|*").parse().unwrap_err(),
3269	TestError {
3270	span: span(`1`..`1`),
3271	kind: ast::ErrorKind::RepetitionMissing,
3272	}
3273	);
3274	assert_eq!(
3275	parser(r"\|+").parse().unwrap_err(),
3276	TestError {
3277	span: span(`1`..`1`),
3278	kind: ast::ErrorKind::RepetitionMissing,
3279	}
3280	);
3281	assert_eq!(
3282	parser(r"\|?").parse().unwrap_err(),
3283	TestError {
3284	span: span(`1`..`1`),
3285	kind: ast::ErrorKind::RepetitionMissing,
3286	}
3287	);
3288	}
3289
3290	#[test]
3291	fn parse_counted_repetition() {
3292	assert_eq!(
3293	parser(r"a{5}").parse(),
3294	Ok(Ast::repetition(ast::Repetition {
3295	span: span(`0`..`4`),
3296	op: ast::RepetitionOp {
3297	span: span(`1`..`4`),
3298	kind: ast::RepetitionKind::Range(
3299	ast::RepetitionRange::Exactly(`5`)
3300	),
3301	},
3302	greedy: `true`,
3303	ast: Box::new(lit('a', `0`)),
3304	}))
3305	);
3306	assert_eq!(
3307	parser(r"a{5,}").parse(),
3308	Ok(Ast::repetition(ast::Repetition {
3309	span: span(`0`..`5`),
3310	op: ast::RepetitionOp {
3311	span: span(`1`..`5`),
3312	kind: ast::RepetitionKind::Range(
3313	ast::RepetitionRange::AtLeast(`5`)
3314	),
3315	},
3316	greedy: `true`,
3317	ast: Box::new(lit('a', `0`)),
3318	}))
3319	);
3320	assert_eq!(
3321	parser(r"a{5,9}").parse(),
3322	Ok(Ast::repetition(ast::Repetition {
3323	span: span(`0`..`6`),
3324	op: ast::RepetitionOp {
3325	span: span(`1`..`6`),
3326	kind: ast::RepetitionKind::Range(
3327	ast::RepetitionRange::Bounded(`5`, `9`)
3328	),
3329	},
3330	greedy: `true`,
3331	ast: Box::new(lit('a', `0`)),
3332	}))
3333	);
3334	assert_eq!(
3335	parser(r"a{5}?").parse(),
3336	Ok(Ast::repetition(ast::Repetition {
3337	span: span(`0`..`5`),
3338	op: ast::RepetitionOp {
3339	span: span(`1`..`5`),
3340	kind: ast::RepetitionKind::Range(
3341	ast::RepetitionRange::Exactly(`5`)
3342	),
3343	},
3344	greedy: `false`,
3345	ast: Box::new(lit('a', `0`)),
3346	}))
3347	);
3348	assert_eq!(
3349	parser(r"ab{5}").parse(),
3350	Ok(concat(
3351	`0`..`5`,
3352	vec![
3353	lit('a', `0`),
3354	Ast::repetition(ast::Repetition {
3355	span: span(`1`..`5`),
3356	op: ast::RepetitionOp {
3357	span: span(`2`..`5`),
3358	kind: ast::RepetitionKind::Range(
3359	ast::RepetitionRange::Exactly(`5`)
3360	),
3361	},
3362	greedy: `true`,
3363	ast: Box::new(lit('b', `1`)),
3364	}),
3365	]
3366	))
3367	);
3368	assert_eq!(
3369	parser(r"ab{5}c").parse(),
3370	Ok(concat(
3371	`0`..`6`,
3372	vec![
3373	lit('a', `0`),
3374	Ast::repetition(ast::Repetition {
3375	span: span(`1`..`5`),
3376	op: ast::RepetitionOp {
3377	span: span(`2`..`5`),
3378	kind: ast::RepetitionKind::Range(
3379	ast::RepetitionRange::Exactly(`5`)
3380	),
3381	},
3382	greedy: `true`,
3383	ast: Box::new(lit('b', `1`)),
3384	}),
3385	lit('c', `5`),
3386	]
3387	))
3388	);
3389
3390	assert_eq!(
3391	parser(r"a{ 5 }").parse(),
3392	Ok(Ast::repetition(ast::Repetition {
3393	span: span(`0`..`6`),
3394	op: ast::RepetitionOp {
3395	span: span(`1`..`6`),
3396	kind: ast::RepetitionKind::Range(
3397	ast::RepetitionRange::Exactly(`5`)
3398	),
3399	},
3400	greedy: `true`,
3401	ast: Box::new(lit('a', `0`)),
3402	}))
3403	);
3404	assert_eq!(
3405	parser(r"a{ 5 , 9 }").parse(),
3406	Ok(Ast::repetition(ast::Repetition {
3407	span: span(`0`..`10`),
3408	op: ast::RepetitionOp {
3409	span: span(`1`..`10`),
3410	kind: ast::RepetitionKind::Range(
3411	ast::RepetitionRange::Bounded(`5`, `9`)
3412	),
3413	},
3414	greedy: `true`,
3415	ast: Box::new(lit('a', `0`)),
3416	}))
3417	);
3418	assert_eq!(
3419	parser_empty_min_range(r"a{,9}").parse(),
3420	Ok(Ast::repetition(ast::Repetition {
3421	span: span(`0`..`5`),
3422	op: ast::RepetitionOp {
3423	span: span(`1`..`5`),
3424	kind: ast::RepetitionKind::Range(
3425	ast::RepetitionRange::Bounded(`0`, `9`)
3426	),
3427	},
3428	greedy: `true`,
3429	ast: Box::new(lit('a', `0`)),
3430	}))
3431	);
3432	assert_eq!(
3433	parser_ignore_whitespace(r"a{5,9} ?").parse(),
3434	Ok(Ast::repetition(ast::Repetition {
3435	span: span(`0`..`8`),
3436	op: ast::RepetitionOp {
3437	span: span(`1`..`8`),
3438	kind: ast::RepetitionKind::Range(
3439	ast::RepetitionRange::Bounded(`5`, `9`)
3440	),
3441	},
3442	greedy: `false`,
3443	ast: Box::new(lit('a', `0`)),
3444	}))
3445	);
3446	assert_eq!(
3447	parser(r"\b{5,9}").parse(),
3448	Ok(Ast::repetition(ast::Repetition {
3449	span: span(`0`..`7`),
3450	op: ast::RepetitionOp {
3451	span: span(`2`..`7`),
3452	kind: ast::RepetitionKind::Range(
3453	ast::RepetitionRange::Bounded(`5`, `9`)
3454	),
3455	},
3456	greedy: `true`,
3457	ast: Box::new(Ast::assertion(ast::Assertion {
3458	span: span(`0`..`2`),
3459	kind: ast::AssertionKind::WordBoundary,
3460	})),
3461	}))
3462	);
3463
3464	assert_eq!(
3465	parser(r"(?i){0}").parse().unwrap_err(),
3466	TestError {
3467	span: span(`4`..`4`),
3468	kind: ast::ErrorKind::RepetitionMissing,
3469	}
3470	);
3471	assert_eq!(
3472	parser(r"(?m){1,1}").parse().unwrap_err(),
3473	TestError {
3474	span: span(`4`..`4`),
3475	kind: ast::ErrorKind::RepetitionMissing,
3476	}
3477	);
3478	assert_eq!(
3479	parser(r"a{]}").parse().unwrap_err(),
3480	TestError {
3481	span: span(`2`..`2`),
3482	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3483	}
3484	);
3485	assert_eq!(
3486	parser(r"a{1,]}").parse().unwrap_err(),
3487	TestError {
3488	span: span(`4`..`4`),
3489	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3490	}
3491	);
3492	assert_eq!(
3493	parser(r"a{").parse().unwrap_err(),
3494	TestError {
3495	span: span(`1`..`2`),
3496	kind: ast::ErrorKind::RepetitionCountUnclosed,
3497	}
3498	);
3499	assert_eq!(
3500	parser(r"a{}").parse().unwrap_err(),
3501	TestError {
3502	span: span(`2`..`2`),
3503	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3504	}
3505	);
3506	assert_eq!(
3507	parser(r"a{a").parse().unwrap_err(),
3508	TestError {
3509	span: span(`2`..`2`),
3510	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3511	}
3512	);
3513	assert_eq!(
3514	parser(r"a{9999999999}").parse().unwrap_err(),
3515	TestError {
3516	span: span(`2`..`12`),
3517	kind: ast::ErrorKind::DecimalInvalid,
3518	}
3519	);
3520	assert_eq!(
3521	parser(r"a{9").parse().unwrap_err(),
3522	TestError {
3523	span: span(`1`..`3`),
3524	kind: ast::ErrorKind::RepetitionCountUnclosed,
3525	}
3526	);
3527	assert_eq!(
3528	parser(r"a{9,a").parse().unwrap_err(),
3529	TestError {
3530	span: span(`4`..`4`),
3531	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3532	}
3533	);
3534	assert_eq!(
3535	parser(r"a{9,9999999999}").parse().unwrap_err(),
3536	TestError {
3537	span: span(`4`..`14`),
3538	kind: ast::ErrorKind::DecimalInvalid,
3539	}
3540	);
3541	assert_eq!(
3542	parser(r"a{9,").parse().unwrap_err(),
3543	TestError {
3544	span: span(`1`..`4`),
3545	kind: ast::ErrorKind::RepetitionCountUnclosed,
3546	}
3547	);
3548	assert_eq!(
3549	parser(r"a{9,11").parse().unwrap_err(),
3550	TestError {
3551	span: span(`1`..`6`),
3552	kind: ast::ErrorKind::RepetitionCountUnclosed,
3553	}
3554	);
3555	assert_eq!(
3556	parser(r"a{2,1}").parse().unwrap_err(),
3557	TestError {
3558	span: span(`1`..`6`),
3559	kind: ast::ErrorKind::RepetitionCountInvalid,
3560	}
3561	);
3562	assert_eq!(
3563	parser(r"{5}").parse().unwrap_err(),
3564	TestError {
3565	span: span(`0`..`0`),
3566	kind: ast::ErrorKind::RepetitionMissing,
3567	}
3568	);
3569	assert_eq!(
3570	parser(r"\|{5}").parse().unwrap_err(),
3571	TestError {
3572	span: span(`1`..`1`),
3573	kind: ast::ErrorKind::RepetitionMissing,
3574	}
3575	);
3576	}
3577
3578	#[test]
3579	fn parse_alternate() {
3580	assert_eq!(
3581	parser(r"a\|b").parse(),
3582	Ok(Ast::alternation(ast::Alternation {
3583	span: span(`0`..`3`),
3584	asts: vec![lit('a', `0`), lit('b', `2`)],
3585	}))
3586	);
3587	assert_eq!(
3588	parser(r"(a\|b)").parse(),
3589	Ok(group(
3590	`0`..`5`,
3591	`1`,
3592	Ast::alternation(ast::Alternation {
3593	span: span(`1`..`4`),
3594	asts: vec![lit('a', `1`), lit('b', `3`)],
3595	})
3596	))
3597	);
3598
3599	assert_eq!(
3600	parser(r"a\|b\|c").parse(),
3601	Ok(Ast::alternation(ast::Alternation {
3602	span: span(`0`..`5`),
3603	asts: vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)],
3604	}))
3605	);
3606	assert_eq!(
3607	parser(r"ax\|by\|cz").parse(),
3608	Ok(Ast::alternation(ast::Alternation {
3609	span: span(`0`..`8`),
3610	asts: vec![
3611	concat(`0`..`2`, vec![lit('a', `0`), lit('x', `1`)]),
3612	concat(`3`..`5`, vec![lit('b', `3`), lit('y', `4`)]),
3613	concat(`6`..`8`, vec![lit('c', `6`), lit('z', `7`)]),
3614	],
3615	}))
3616	);
3617	assert_eq!(
3618	parser(r"(ax\|by\|cz)").parse(),
3619	Ok(group(
3620	`0`..`10`,
3621	`1`,
3622	Ast::alternation(ast::Alternation {
3623	span: span(`1`..`9`),
3624	asts: vec![
3625	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3626	concat(`4`..`6`, vec![lit('b', `4`), lit('y', `5`)]),
3627	concat(`7`..`9`, vec![lit('c', `7`), lit('z', `8`)]),
3628	],
3629	})
3630	))
3631	);
3632	assert_eq!(
3633	parser(r"(ax\|(by\|(cz)))").parse(),
3634	Ok(group(
3635	`0`..`14`,
3636	`1`,
3637	alt(
3638	`1`..`13`,
3639	vec![
3640	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3641	group(
3642	`4`..`13`,
3643	`2`,
3644	alt(
3645	`5`..`12`,
3646	vec![
3647	concat(
3648	`5`..`7`,
3649	vec![lit('b', `5`), lit('y', `6`)]
3650	),
3651	group(
3652	`8`..`12`,
3653	`3`,
3654	concat(
3655	`9`..`11`,
3656	vec![lit('c', `9`), lit('z', `10`),]
3657	)
3658	),
3659	]
3660	)
3661	),
3662	]
3663	)
3664	))
3665	);
3666
3667	assert_eq!(
3668	parser(r"\|").parse(),
3669	Ok(alt(
3670	`0`..`1`,
3671	vec![Ast::empty(span(`0`..`0`)), Ast::empty(span(`1`..`1`)),]
3672	))
3673	);
3674	assert_eq!(
3675	parser(r"\|\|").parse(),
3676	Ok(alt(
3677	`0`..`2`,
3678	vec![
3679	Ast::empty(span(`0`..`0`)),
3680	Ast::empty(span(`1`..`1`)),
3681	Ast::empty(span(`2`..`2`)),
3682	]
3683	))
3684	);
3685	assert_eq!(
3686	parser(r"a\|").parse(),
3687	Ok(alt(`0`..`2`, vec![lit('a', `0`), Ast::empty(span(`2`..`2`)),]))
3688	);
3689	assert_eq!(
3690	parser(r"\|a").parse(),
3691	Ok(alt(`0`..`2`, vec![Ast::empty(span(`0`..`0`)), lit('a', `1`),]))
3692	);
3693
3694	assert_eq!(
3695	parser(r"(\|)").parse(),
3696	Ok(group(
3697	`0`..`3`,
3698	`1`,
3699	alt(
3700	`1`..`2`,
3701	vec![Ast::empty(span(`1`..`1`)), Ast::empty(span(`2`..`2`)),]
3702	)
3703	))
3704	);
3705	assert_eq!(
3706	parser(r"(a\|)").parse(),
3707	Ok(group(
3708	`0`..`4`,
3709	`1`,
3710	alt(`1`..`3`, vec![lit('a', `1`), Ast::empty(span(`3`..`3`)),])
3711	))
3712	);
3713	assert_eq!(
3714	parser(r"(\|a)").parse(),
3715	Ok(group(
3716	`0`..`4`,
3717	`1`,
3718	alt(`1`..`3`, vec![Ast::empty(span(`1`..`1`)), lit('a', `2`),])
3719	))
3720	);
3721
3722	assert_eq!(
3723	parser(r"a\|b)").parse().unwrap_err(),
3724	TestError {
3725	span: span(`3`..`4`),
3726	kind: ast::ErrorKind::GroupUnopened,
3727	}
3728	);
3729	assert_eq!(
3730	parser(r"(a\|b").parse().unwrap_err(),
3731	TestError {
3732	span: span(`0`..`1`),
3733	kind: ast::ErrorKind::GroupUnclosed,
3734	}
3735	);
3736	}
3737
3738	#[test]
3739	fn parse_unsupported_lookaround() {
3740	assert_eq!(
3741	parser(r"(?=a)").parse().unwrap_err(),
3742	TestError {
3743	span: span(`0`..`3`),
3744	kind: ast::ErrorKind::UnsupportedLookAround,
3745	}
3746	);
3747	assert_eq!(
3748	parser(r"(?!a)").parse().unwrap_err(),
3749	TestError {
3750	span: span(`0`..`3`),
3751	kind: ast::ErrorKind::UnsupportedLookAround,
3752	}
3753	);
3754	assert_eq!(
3755	parser(r"(?<=a)").parse().unwrap_err(),
3756	TestError {
3757	span: span(`0`..`4`),
3758	kind: ast::ErrorKind::UnsupportedLookAround,
3759	}
3760	);
3761	assert_eq!(
3762	parser(r"(?<!a)").parse().unwrap_err(),
3763	TestError {
3764	span: span(`0`..`4`),
3765	kind: ast::ErrorKind::UnsupportedLookAround,
3766	}
3767	);
3768	}
3769
3770	#[test]
3771	fn parse_group() {
3772	assert_eq!(
3773	parser("(?i)").parse(),
3774	Ok(Ast::flags(ast::SetFlags {
3775	span: span(`0`..`4`),
3776	flags: ast::Flags {
3777	span: span(`2`..`3`),
3778	items: vec![ast::FlagsItem {
3779	span: span(`2`..`3`),
3780	kind: ast::FlagsItemKind::Flag(
3781	ast::Flag::CaseInsensitive
3782	),
3783	}],
3784	},
3785	}))
3786	);
3787	assert_eq!(
3788	parser("(?iU)").parse(),
3789	Ok(Ast::flags(ast::SetFlags {
3790	span: span(`0`..`5`),
3791	flags: ast::Flags {
3792	span: span(`2`..`4`),
3793	items: vec![
3794	ast::FlagsItem {
3795	span: span(`2`..`3`),
3796	kind: ast::FlagsItemKind::Flag(
3797	ast::Flag::CaseInsensitive
3798	),
3799	},
3800	ast::FlagsItem {
3801	span: span(`3`..`4`),
3802	kind: ast::FlagsItemKind::Flag(
3803	ast::Flag::SwapGreed
3804	),
3805	},
3806	],
3807	},
3808	}))
3809	);
3810	assert_eq!(
3811	parser("(?i-U)").parse(),
3812	Ok(Ast::flags(ast::SetFlags {
3813	span: span(`0`..`6`),
3814	flags: ast::Flags {
3815	span: span(`2`..`5`),
3816	items: vec![
3817	ast::FlagsItem {
3818	span: span(`2`..`3`),
3819	kind: ast::FlagsItemKind::Flag(
3820	ast::Flag::CaseInsensitive
3821	),
3822	},
3823	ast::FlagsItem {
3824	span: span(`3`..`4`),
3825	kind: ast::FlagsItemKind::Negation,
3826	},
3827	ast::FlagsItem {
3828	span: span(`4`..`5`),
3829	kind: ast::FlagsItemKind::Flag(
3830	ast::Flag::SwapGreed
3831	),
3832	},
3833	],
3834	},
3835	}))
3836	);
3837
3838	assert_eq!(
3839	parser("()").parse(),
3840	Ok(Ast::group(ast::Group {
3841	span: span(`0`..`2`),
3842	kind: ast::GroupKind::CaptureIndex(`1`),
3843	ast: Box::new(Ast::empty(span(`1`..`1`))),
3844	}))
3845	);
3846	assert_eq!(
3847	parser("(a)").parse(),
3848	Ok(Ast::group(ast::Group {
3849	span: span(`0`..`3`),
3850	kind: ast::GroupKind::CaptureIndex(`1`),
3851	ast: Box::new(lit('a', `1`)),
3852	}))
3853	);
3854	assert_eq!(
3855	parser("(())").parse(),
3856	Ok(Ast::group(ast::Group {
3857	span: span(`0`..`4`),
3858	kind: ast::GroupKind::CaptureIndex(`1`),
3859	ast: Box::new(Ast::group(ast::Group {
3860	span: span(`1`..`3`),
3861	kind: ast::GroupKind::CaptureIndex(`2`),
3862	ast: Box::new(Ast::empty(span(`2`..`2`))),
3863	})),
3864	}))
3865	);
3866
3867	assert_eq!(
3868	parser("(?:a)").parse(),
3869	Ok(Ast::group(ast::Group {
3870	span: span(`0`..`5`),
3871	kind: ast::GroupKind::NonCapturing(ast::Flags {
3872	span: span(`2`..`2`),
3873	items: vec![],
3874	}),
3875	ast: Box::new(lit('a', `3`)),
3876	}))
3877	);
3878
3879	assert_eq!(
3880	parser("(?i:a)").parse(),
3881	Ok(Ast::group(ast::Group {
3882	span: span(`0`..`6`),
3883	kind: ast::GroupKind::NonCapturing(ast::Flags {
3884	span: span(`2`..`3`),
3885	items: vec![ast::FlagsItem {
3886	span: span(`2`..`3`),
3887	kind: ast::FlagsItemKind::Flag(
3888	ast::Flag::CaseInsensitive
3889	),
3890	},],
3891	}),
3892	ast: Box::new(lit('a', `4`)),
3893	}))
3894	);
3895	assert_eq!(
3896	parser("(?i-U:a)").parse(),
3897	Ok(Ast::group(ast::Group {
3898	span: span(`0`..`8`),
3899	kind: ast::GroupKind::NonCapturing(ast::Flags {
3900	span: span(`2`..`5`),
3901	items: vec![
3902	ast::FlagsItem {
3903	span: span(`2`..`3`),
3904	kind: ast::FlagsItemKind::Flag(
3905	ast::Flag::CaseInsensitive
3906	),
3907	},
3908	ast::FlagsItem {
3909	span: span(`3`..`4`),
3910	kind: ast::FlagsItemKind::Negation,
3911	},
3912	ast::FlagsItem {
3913	span: span(`4`..`5`),
3914	kind: ast::FlagsItemKind::Flag(
3915	ast::Flag::SwapGreed
3916	),
3917	},
3918	],
3919	}),
3920	ast: Box::new(lit('a', `6`)),
3921	}))
3922	);
3923
3924	assert_eq!(
3925	parser("(").parse().unwrap_err(),
3926	TestError {
3927	span: span(`0`..`1`),
3928	kind: ast::ErrorKind::GroupUnclosed,
3929	}
3930	);
3931	assert_eq!(
3932	parser("(?").parse().unwrap_err(),
3933	TestError {
3934	span: span(`0`..`1`),
3935	kind: ast::ErrorKind::GroupUnclosed,
3936	}
3937	);
3938	assert_eq!(
3939	parser("(?P").parse().unwrap_err(),
3940	TestError {
3941	span: span(`2`..`3`),
3942	kind: ast::ErrorKind::FlagUnrecognized,
3943	}
3944	);
3945	assert_eq!(
3946	parser("(?P<").parse().unwrap_err(),
3947	TestError {
3948	span: span(`4`..`4`),
3949	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3950	}
3951	);
3952	assert_eq!(
3953	parser("(a").parse().unwrap_err(),
3954	TestError {
3955	span: span(`0`..`1`),
3956	kind: ast::ErrorKind::GroupUnclosed,
3957	}
3958	);
3959	assert_eq!(
3960	parser("(()").parse().unwrap_err(),
3961	TestError {
3962	span: span(`0`..`1`),
3963	kind: ast::ErrorKind::GroupUnclosed,
3964	}
3965	);
3966	assert_eq!(
3967	parser(")").parse().unwrap_err(),
3968	TestError {
3969	span: span(`0`..`1`),
3970	kind: ast::ErrorKind::GroupUnopened,
3971	}
3972	);
3973	assert_eq!(
3974	parser("a)").parse().unwrap_err(),
3975	TestError {
3976	span: span(`1`..`2`),
3977	kind: ast::ErrorKind::GroupUnopened,
3978	}
3979	);
3980	}
3981
3982	#[test]
3983	fn parse_capture_name() {
3984	assert_eq!(
3985	parser("(?<a>z)").parse(),
3986	Ok(Ast::group(ast::Group {
3987	span: span(`0`..`7`),
3988	kind: ast::GroupKind::CaptureName {
3989	starts_with_p: `false`,
3990	name: ast::CaptureName {
3991	span: span(`3`..`4`),
3992	name: s("a"),
3993	index: `1`,
3994	}
3995	},
3996	ast: Box::new(lit('z', `5`)),
3997	}))
3998	);
3999	assert_eq!(
4000	parser("(?P<a>z)").parse(),
4001	Ok(Ast::group(ast::Group {
4002	span: span(`0`..`8`),
4003	kind: ast::GroupKind::CaptureName {
4004	starts_with_p: `true`,
4005	name: ast::CaptureName {
4006	span: span(`4`..`5`),
4007	name: s("a"),
4008	index: `1`,
4009	}
4010	},
4011	ast: Box::new(lit('z', `6`)),
4012	}))
4013	);
4014	assert_eq!(
4015	parser("(?P<abc>z)").parse(),
4016	Ok(Ast::group(ast::Group {
4017	span: span(`0`..`10`),
4018	kind: ast::GroupKind::CaptureName {
4019	starts_with_p: `true`,
4020	name: ast::CaptureName {
4021	span: span(`4`..`7`),
4022	name: s("abc"),
4023	index: `1`,
4024	}
4025	},
4026	ast: Box::new(lit('z', `8`)),
4027	}))
4028	);
4029
4030	assert_eq!(
4031	parser("(?P<a_1>z)").parse(),
4032	Ok(Ast::group(ast::Group {
4033	span: span(`0`..`10`),
4034	kind: ast::GroupKind::CaptureName {
4035	starts_with_p: `true`,
4036	name: ast::CaptureName {
4037	span: span(`4`..`7`),
4038	name: s("a_1"),
4039	index: `1`,
4040	}
4041	},
4042	ast: Box::new(lit('z', `8`)),
4043	}))
4044	);
4045
4046	assert_eq!(
4047	parser("(?P<a.1>z)").parse(),
4048	Ok(Ast::group(ast::Group {
4049	span: span(`0`..`10`),
4050	kind: ast::GroupKind::CaptureName {
4051	starts_with_p: `true`,
4052	name: ast::CaptureName {
4053	span: span(`4`..`7`),
4054	name: s("a.1"),
4055	index: `1`,
4056	}
4057	},
4058	ast: Box::new(lit('z', `8`)),
4059	}))
4060	);
4061
4062	assert_eq!(
4063	parser("(?P<a[1]>z)").parse(),
4064	Ok(Ast::group(ast::Group {
4065	span: span(`0`..`11`),
4066	kind: ast::GroupKind::CaptureName {
4067	starts_with_p: `true`,
4068	name: ast::CaptureName {
4069	span: span(`4`..`8`),
4070	name: s("a[1]"),
4071	index: `1`,
4072	}
4073	},
4074	ast: Box::new(lit('z', `9`)),
4075	}))
4076	);
4077
4078	assert_eq!(
4079	parser("(?P<a¾>)").parse(),
4080	Ok(Ast::group(ast::Group {
4081	span: Span::new(
4082	Position::new(`0`, `1`, `1`),
4083	Position::new(`9`, `1`, `9`),
4084	),
4085	kind: ast::GroupKind::CaptureName {
4086	starts_with_p: `true`,
4087	name: ast::CaptureName {
4088	span: Span::new(
4089	Position::new(`4`, `1`, `5`),
4090	Position::new(`7`, `1`, `7`),
4091	),
4092	name: s("a¾"),
4093	index: `1`,
4094	}
4095	},
4096	ast: Box::new(Ast::empty(Span::new(
4097	Position::new(`8`, `1`, `8`),
4098	Position::new(`8`, `1`, `8`),
4099	))),
4100	}))
4101	);
4102	assert_eq!(
4103	parser("(?P<名字>)").parse(),
4104	Ok(Ast::group(ast::Group {
4105	span: Span::new(
4106	Position::new(`0`, `1`, `1`),
4107	Position::new(`12`, `1`, `9`),
4108	),
4109	kind: ast::GroupKind::CaptureName {
4110	starts_with_p: `true`,
4111	name: ast::CaptureName {
4112	span: Span::new(
4113	Position::new(`4`, `1`, `5`),
4114	Position::new(`10`, `1`, `7`),
4115	),
4116	name: s("名字"),
4117	index: `1`,
4118	}
4119	},
4120	ast: Box::new(Ast::empty(Span::new(
4121	Position::new(`11`, `1`, `8`),
4122	Position::new(`11`, `1`, `8`),
4123	))),
4124	}))
4125	);
4126
4127	assert_eq!(
4128	parser("(?P<").parse().unwrap_err(),
4129	TestError {
4130	span: span(`4`..`4`),
4131	kind: ast::ErrorKind::GroupNameUnexpectedEof,
4132	}
4133	);
4134	assert_eq!(
4135	parser("(?P<>z)").parse().unwrap_err(),
4136	TestError {
4137	span: span(`4`..`4`),
4138	kind: ast::ErrorKind::GroupNameEmpty,
4139	}
4140	);
4141	assert_eq!(
4142	parser("(?P<a").parse().unwrap_err(),
4143	TestError {
4144	span: span(`5`..`5`),
4145	kind: ast::ErrorKind::GroupNameUnexpectedEof,
4146	}
4147	);
4148	assert_eq!(
4149	parser("(?P<ab").parse().unwrap_err(),
4150	TestError {
4151	span: span(`6`..`6`),
4152	kind: ast::ErrorKind::GroupNameUnexpectedEof,
4153	}
4154	);
4155	assert_eq!(
4156	parser("(?P<0a").parse().unwrap_err(),
4157	TestError {
4158	span: span(`4`..`5`),
4159	kind: ast::ErrorKind::GroupNameInvalid,
4160	}
4161	);
4162	assert_eq!(
4163	parser("(?P<~").parse().unwrap_err(),
4164	TestError {
4165	span: span(`4`..`5`),
4166	kind: ast::ErrorKind::GroupNameInvalid,
4167	}
4168	);
4169	assert_eq!(
4170	parser("(?P<abc~").parse().unwrap_err(),
4171	TestError {
4172	span: span(`7`..`8`),
4173	kind: ast::ErrorKind::GroupNameInvalid,
4174	}
4175	);
4176	assert_eq!(
4177	parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
4178	TestError {
4179	span: span(`12`..`13`),
4180	kind: ast::ErrorKind::GroupNameDuplicate {
4181	original: span(`4`..`5`),
4182	},
4183	}
4184	);
4185	assert_eq!(
4186	parser("(?P<5>)").parse().unwrap_err(),
4187	TestError {
4188	span: span(`4`..`5`),
4189	kind: ast::ErrorKind::GroupNameInvalid,
4190	}
4191	);
4192	assert_eq!(
4193	parser("(?P<5a>)").parse().unwrap_err(),
4194	TestError {
4195	span: span(`4`..`5`),
4196	kind: ast::ErrorKind::GroupNameInvalid,
4197	}
4198	);
4199	assert_eq!(
4200	parser("(?P<¾>)").parse().unwrap_err(),
4201	TestError {
4202	span: Span::new(
4203	Position::new(`4`, `1`, `5`),
4204	Position::new(`6`, `1`, `6`),
4205	),
4206	kind: ast::ErrorKind::GroupNameInvalid,
4207	}
4208	);
4209	assert_eq!(
4210	parser("(?P<¾a>)").parse().unwrap_err(),
4211	TestError {
4212	span: Span::new(
4213	Position::new(`4`, `1`, `5`),
4214	Position::new(`6`, `1`, `6`),
4215	),
4216	kind: ast::ErrorKind::GroupNameInvalid,
4217	}
4218	);
4219	assert_eq!(
4220	parser("(?P<☃>)").parse().unwrap_err(),
4221	TestError {
4222	span: Span::new(
4223	Position::new(`4`, `1`, `5`),
4224	Position::new(`7`, `1`, `6`),
4225	),
4226	kind: ast::ErrorKind::GroupNameInvalid,
4227	}
4228	);
4229	assert_eq!(
4230	parser("(?P<a☃>)").parse().unwrap_err(),
4231	TestError {
4232	span: Span::new(
4233	Position::new(`5`, `1`, `6`),
4234	Position::new(`8`, `1`, `7`),
4235	),
4236	kind: ast::ErrorKind::GroupNameInvalid,
4237	}
4238	);
4239	}
4240
4241	#[test]
4242	fn parse_flags() {
4243	assert_eq!(
4244	parser("i:").parse_flags(),
4245	Ok(ast::Flags {
4246	span: span(`0`..`1`),
4247	items: vec![ast::FlagsItem {
4248	span: span(`0`..`1`),
4249	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4250	}],
4251	})
4252	);
4253	assert_eq!(
4254	parser("i)").parse_flags(),
4255	Ok(ast::Flags {
4256	span: span(`0`..`1`),
4257	items: vec![ast::FlagsItem {
4258	span: span(`0`..`1`),
4259	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4260	}],
4261	})
4262	);
4263
4264	assert_eq!(
4265	parser("isU:").parse_flags(),
4266	Ok(ast::Flags {
4267	span: span(`0`..`3`),
4268	items: vec![
4269	ast::FlagsItem {
4270	span: span(`0`..`1`),
4271	kind: ast::FlagsItemKind::Flag(
4272	ast::Flag::CaseInsensitive
4273	),
4274	},
4275	ast::FlagsItem {
4276	span: span(`1`..`2`),
4277	kind: ast::FlagsItemKind::Flag(
4278	ast::Flag::DotMatchesNewLine
4279	),
4280	},
4281	ast::FlagsItem {
4282	span: span(`2`..`3`),
4283	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4284	},
4285	],
4286	})
4287	);
4288
4289	assert_eq!(
4290	parser("-isU:").parse_flags(),
4291	Ok(ast::Flags {
4292	span: span(`0`..`4`),
4293	items: vec![
4294	ast::FlagsItem {
4295	span: span(`0`..`1`),
4296	kind: ast::FlagsItemKind::Negation,
4297	},
4298	ast::FlagsItem {
4299	span: span(`1`..`2`),
4300	kind: ast::FlagsItemKind::Flag(
4301	ast::Flag::CaseInsensitive
4302	),
4303	},
4304	ast::FlagsItem {
4305	span: span(`2`..`3`),
4306	kind: ast::FlagsItemKind::Flag(
4307	ast::Flag::DotMatchesNewLine
4308	),
4309	},
4310	ast::FlagsItem {
4311	span: span(`3`..`4`),
4312	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4313	},
4314	],
4315	})
4316	);
4317	assert_eq!(
4318	parser("i-sU:").parse_flags(),
4319	Ok(ast::Flags {
4320	span: span(`0`..`4`),
4321	items: vec![
4322	ast::FlagsItem {
4323	span: span(`0`..`1`),
4324	kind: ast::FlagsItemKind::Flag(
4325	ast::Flag::CaseInsensitive
4326	),
4327	},
4328	ast::FlagsItem {
4329	span: span(`1`..`2`),
4330	kind: ast::FlagsItemKind::Negation,
4331	},
4332	ast::FlagsItem {
4333	span: span(`2`..`3`),
4334	kind: ast::FlagsItemKind::Flag(
4335	ast::Flag::DotMatchesNewLine
4336	),
4337	},
4338	ast::FlagsItem {
4339	span: span(`3`..`4`),
4340	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4341	},
4342	],
4343	})
4344	);
4345	assert_eq!(
4346	parser("i-sR:").parse_flags(),
4347	Ok(ast::Flags {
4348	span: span(`0`..`4`),
4349	items: vec![
4350	ast::FlagsItem {
4351	span: span(`0`..`1`),
4352	kind: ast::FlagsItemKind::Flag(
4353	ast::Flag::CaseInsensitive
4354	),
4355	},
4356	ast::FlagsItem {
4357	span: span(`1`..`2`),
4358	kind: ast::FlagsItemKind::Negation,
4359	},
4360	ast::FlagsItem {
4361	span: span(`2`..`3`),
4362	kind: ast::FlagsItemKind::Flag(
4363	ast::Flag::DotMatchesNewLine
4364	),
4365	},
4366	ast::FlagsItem {
4367	span: span(`3`..`4`),
4368	kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
4369	},
4370	],
4371	})
4372	);
4373
4374	assert_eq!(
4375	parser("isU").parse_flags().unwrap_err(),
4376	TestError {
4377	span: span(`3`..`3`),
4378	kind: ast::ErrorKind::FlagUnexpectedEof,
4379	}
4380	);
4381	assert_eq!(
4382	parser("isUa:").parse_flags().unwrap_err(),
4383	TestError {
4384	span: span(`3`..`4`),
4385	kind: ast::ErrorKind::FlagUnrecognized,
4386	}
4387	);
4388	assert_eq!(
4389	parser("isUi:").parse_flags().unwrap_err(),
4390	TestError {
4391	span: span(`3`..`4`),
4392	kind: ast::ErrorKind::FlagDuplicate { original: span(`0`..`1`) },
4393	}
4394	);
4395	assert_eq!(
4396	parser("i-sU-i:").parse_flags().unwrap_err(),
4397	TestError {
4398	span: span(`4`..`5`),
4399	kind: ast::ErrorKind::FlagRepeatedNegation {
4400	original: span(`1`..`2`),
4401	},
4402	}
4403	);
4404	assert_eq!(
4405	parser("-)").parse_flags().unwrap_err(),
4406	TestError {
4407	span: span(`0`..`1`),
4408	kind: ast::ErrorKind::FlagDanglingNegation,
4409	}
4410	);
4411	assert_eq!(
4412	parser("i-)").parse_flags().unwrap_err(),
4413	TestError {
4414	span: span(`1`..`2`),
4415	kind: ast::ErrorKind::FlagDanglingNegation,
4416	}
4417	);
4418	assert_eq!(
4419	parser("iU-)").parse_flags().unwrap_err(),
4420	TestError {
4421	span: span(`2`..`3`),
4422	kind: ast::ErrorKind::FlagDanglingNegation,
4423	}
4424	);
4425	}
4426
4427	#[test]
4428	fn parse_flag() {
4429	assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4430	assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4431	assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4432	assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4433	assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4434	assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
4435	assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4436
4437	assert_eq!(
4438	parser("a").parse_flag().unwrap_err(),
4439	TestError {
4440	span: span(`0`..`1`),
4441	kind: ast::ErrorKind::FlagUnrecognized,
4442	}
4443	);
4444	assert_eq!(
4445	parser("☃").parse_flag().unwrap_err(),
4446	TestError {
4447	span: span_range("☃", `0`..`3`),
4448	kind: ast::ErrorKind::FlagUnrecognized,
4449	}
4450	);
4451	}
4452
4453	#[test]
4454	fn parse_primitive_non_escape() {
4455	assert_eq!(
4456	parser(r".").parse_primitive(),
4457	Ok(Primitive::Dot(span(`0`..`1`)))
4458	);
4459	assert_eq!(
4460	parser(r"^").parse_primitive(),
4461	Ok(Primitive::Assertion(ast::Assertion {
4462	span: span(`0`..`1`),
4463	kind: ast::AssertionKind::StartLine,
4464	}))
4465	);
4466	assert_eq!(
4467	parser(r"$").parse_primitive(),
4468	Ok(Primitive::Assertion(ast::Assertion {
4469	span: span(`0`..`1`),
4470	kind: ast::AssertionKind::EndLine,
4471	}))
4472	);
4473
4474	assert_eq!(
4475	parser(r"a").parse_primitive(),
4476	Ok(Primitive::Literal(ast::Literal {
4477	span: span(`0`..`1`),
4478	kind: ast::LiteralKind::Verbatim,
4479	c: 'a',
4480	}))
4481	);
4482	assert_eq!(
4483	parser(r"\|").parse_primitive(),
4484	Ok(Primitive::Literal(ast::Literal {
4485	span: span(`0`..`1`),
4486	kind: ast::LiteralKind::Verbatim,
4487	c: '\|',
4488	}))
4489	);
4490	assert_eq!(
4491	parser(r"☃").parse_primitive(),
4492	Ok(Primitive::Literal(ast::Literal {
4493	span: span_range("☃", `0`..`3`),
4494	kind: ast::LiteralKind::Verbatim,
4495	c: '☃',
4496	}))
4497	);
4498	}
4499
4500	#[test]
4501	fn parse_escape() {
4502	assert_eq!(
4503	parser(r"\\|").parse_primitive(),
4504	Ok(Primitive::Literal(ast::Literal {
4505	span: span(`0`..`2`),
4506	kind: ast::LiteralKind::Meta,
4507	c: '\|',
4508	}))
4509	);
4510	let specials = &[
4511	(r"\a", '`\x07`', ast::SpecialLiteralKind::Bell),
4512	(r"\f", '`\x0C`', ast::SpecialLiteralKind::FormFeed),
4513	(r"\t", '`\t`', ast::SpecialLiteralKind::Tab),
4514	(r"\n", '`\n`', ast::SpecialLiteralKind::LineFeed),
4515	(r"\r", '`\r`', ast::SpecialLiteralKind::CarriageReturn),
4516	(r"\v", '`\x0B`', ast::SpecialLiteralKind::VerticalTab),
4517	];
4518	for &(pat, c, ref kind) in specials {
4519	assert_eq!(
4520	parser(pat).parse_primitive(),
4521	Ok(Primitive::Literal(ast::Literal {
4522	span: span(`0`..`2`),
4523	kind: ast::LiteralKind::Special(kind.clone()),
4524	c,
4525	}))
4526	);
4527	}
4528	assert_eq!(
4529	parser(r"\A").parse_primitive(),
4530	Ok(Primitive::Assertion(ast::Assertion {
4531	span: span(`0`..`2`),
4532	kind: ast::AssertionKind::StartText,
4533	}))
4534	);
4535	assert_eq!(
4536	parser(r"\z").parse_primitive(),
4537	Ok(Primitive::Assertion(ast::Assertion {
4538	span: span(`0`..`2`),
4539	kind: ast::AssertionKind::EndText,
4540	}))
4541	);
4542	assert_eq!(
4543	parser(r"\b").parse_primitive(),
4544	Ok(Primitive::Assertion(ast::Assertion {
4545	span: span(`0`..`2`),
4546	kind: ast::AssertionKind::WordBoundary,
4547	}))
4548	);
4549	assert_eq!(
4550	parser(r"\b{start}").parse_primitive(),
4551	Ok(Primitive::Assertion(ast::Assertion {
4552	span: span(`0`..`9`),
4553	kind: ast::AssertionKind::WordBoundaryStart,
4554	}))
4555	);
4556	assert_eq!(
4557	parser(r"\b{end}").parse_primitive(),
4558	Ok(Primitive::Assertion(ast::Assertion {
4559	span: span(`0`..`7`),
4560	kind: ast::AssertionKind::WordBoundaryEnd,
4561	}))
4562	);
4563	assert_eq!(
4564	parser(r"\b{start-half}").parse_primitive(),
4565	Ok(Primitive::Assertion(ast::Assertion {
4566	span: span(`0`..`14`),
4567	kind: ast::AssertionKind::WordBoundaryStartHalf,
4568	}))
4569	);
4570	assert_eq!(
4571	parser(r"\b{end-half}").parse_primitive(),
4572	Ok(Primitive::Assertion(ast::Assertion {
4573	span: span(`0`..`12`),
4574	kind: ast::AssertionKind::WordBoundaryEndHalf,
4575	}))
4576	);
4577	assert_eq!(
4578	parser(r"\<").parse_primitive(),
4579	Ok(Primitive::Assertion(ast::Assertion {
4580	span: span(`0`..`2`),
4581	kind: ast::AssertionKind::WordBoundaryStartAngle,
4582	}))
4583	);
4584	assert_eq!(
4585	parser(r"\>").parse_primitive(),
4586	Ok(Primitive::Assertion(ast::Assertion {
4587	span: span(`0`..`2`),
4588	kind: ast::AssertionKind::WordBoundaryEndAngle,
4589	}))
4590	);
4591	assert_eq!(
4592	parser(r"\B").parse_primitive(),
4593	Ok(Primitive::Assertion(ast::Assertion {
4594	span: span(`0`..`2`),
4595	kind: ast::AssertionKind::NotWordBoundary,
4596	}))
4597	);
4598
4599	// We also support superfluous escapes in most cases now too.
4600	for c in ['!', '@', '%', '"', '`\'`', '/', ' '] {
4601	let pat = format!(r"\{}", c);
4602	assert_eq!(
4603	parser(&pat).parse_primitive(),
4604	Ok(Primitive::Literal(ast::Literal {
4605	span: span(`0`..`2`),
4606	kind: ast::LiteralKind::Superfluous,
4607	c,
4608	}))
4609	);
4610	}
4611
4612	// Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
4613	// gives flexibility for future evolution.
4614	assert_eq!(
4615	parser(r"\e").parse_escape().unwrap_err(),
4616	TestError {
4617	span: span(`0`..`2`),
4618	kind: ast::ErrorKind::EscapeUnrecognized,
4619	}
4620	);
4621	assert_eq!(
4622	parser(r"\y").parse_escape().unwrap_err(),
4623	TestError {
4624	span: span(`0`..`2`),
4625	kind: ast::ErrorKind::EscapeUnrecognized,
4626	}
4627	);
4628
4629	// Starting a special word boundary without any non-whitespace chars
4630	// after the brace makes it ambiguous whether the user meant to write
4631	// a counted repetition (probably not?) or an actual special word
4632	// boundary assertion.
4633	assert_eq!(
4634	parser(r"\b{").parse_escape().unwrap_err(),
4635	TestError {
4636	span: span(`0`..`3`),
4637	kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4638	}
4639	);
4640	assert_eq!(
4641	parser_ignore_whitespace(r"\b{ ").parse_escape().unwrap_err(),
4642	TestError {
4643	span: span(`0`..`4`),
4644	kind: ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
4645	}
4646	);
4647	// When 'x' is not enabled, the space is seen as a non-[-A-Za-z] char,
4648	// and thus causes the parser to treat it as a counted repetition.
4649	assert_eq!(
4650	parser(r"\b{ ").parse().unwrap_err(),
4651	TestError {
4652	span: span(`2`..`4`),
4653	kind: ast::ErrorKind::RepetitionCountUnclosed,
4654	}
4655	);
4656	// In this case, we got some valid chars that makes it look like the
4657	// user is writing one of the special word boundary assertions, but
4658	// we forget to close the brace.
4659	assert_eq!(
4660	parser(r"\b{foo").parse_escape().unwrap_err(),
4661	TestError {
4662	span: span(`2`..`6`),
4663	kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4664	}
4665	);
4666	// We get the same error as above, except it is provoked by seeing a
4667	// char that we know is invalid before seeing a closing brace.
4668	assert_eq!(
4669	parser(r"\b{foo!}").parse_escape().unwrap_err(),
4670	TestError {
4671	span: span(`2`..`6`),
4672	kind: ast::ErrorKind::SpecialWordBoundaryUnclosed,
4673	}
4674	);
4675	// And this one occurs when, syntactically, everything looks okay, but
4676	// we don't use a valid spelling of a word boundary assertion.
4677	assert_eq!(
4678	parser(r"\b{foo}").parse_escape().unwrap_err(),
4679	TestError {
4680	span: span(`3`..`6`),
4681	kind: ast::ErrorKind::SpecialWordBoundaryUnrecognized,
4682	}
4683	);
4684
4685	// An unfinished escape is illegal.
4686	assert_eq!(
4687	parser(r"\").parse_escape().unwrap_err(),
4688	TestError {
4689	span: span(`0`..`1`),
4690	kind: ast::ErrorKind::EscapeUnexpectedEof,
4691	}
4692	);
4693	}
4694
4695	#[test]
4696	fn parse_unsupported_backreference() {
4697	assert_eq!(
4698	parser(r"\0").parse_escape().unwrap_err(),
4699	TestError {
4700	span: span(`0`..`2`),
4701	kind: ast::ErrorKind::UnsupportedBackreference,
4702	}
4703	);
4704	assert_eq!(
4705	parser(r"\9").parse_escape().unwrap_err(),
4706	TestError {
4707	span: span(`0`..`2`),
4708	kind: ast::ErrorKind::UnsupportedBackreference,
4709	}
4710	);
4711	}
4712
4713	#[test]
4714	fn parse_octal() {
4715	for i in `0`..`511` {
4716	let pat = format!(r"\{:o}", i);
4717	assert_eq!(
4718	parser_octal(&pat).parse_escape(),
4719	Ok(Primitive::Literal(ast::Literal {
4720	span: span(`0`..pat.len()),
4721	kind: ast::LiteralKind::Octal,
4722	c: char::from_u32(i).unwrap(),
4723	}))
4724	);
4725	}
4726	assert_eq!(
4727	parser_octal(r"\778").parse_escape(),
4728	Ok(Primitive::Literal(ast::Literal {
4729	span: span(`0`..`3`),
4730	kind: ast::LiteralKind::Octal,
4731	c: '?',
4732	}))
4733	);
4734	assert_eq!(
4735	parser_octal(r"\7777").parse_escape(),
4736	Ok(Primitive::Literal(ast::Literal {
4737	span: span(`0`..`4`),
4738	kind: ast::LiteralKind::Octal,
4739	c: '`\u{01FF}`',
4740	}))
4741	);
4742	assert_eq!(
4743	parser_octal(r"\778").parse(),
4744	Ok(Ast::concat(ast::Concat {
4745	span: span(`0`..`4`),
4746	asts: vec![
4747	Ast::literal(ast::Literal {
4748	span: span(`0`..`3`),
4749	kind: ast::LiteralKind::Octal,
4750	c: '?',
4751	}),
4752	Ast::literal(ast::Literal {
4753	span: span(`3`..`4`),
4754	kind: ast::LiteralKind::Verbatim,
4755	c: '8',
4756	}),
4757	],
4758	}))
4759	);
4760	assert_eq!(
4761	parser_octal(r"\7777").parse(),
4762	Ok(Ast::concat(ast::Concat {
4763	span: span(`0`..`5`),
4764	asts: vec![
4765	Ast::literal(ast::Literal {
4766	span: span(`0`..`4`),
4767	kind: ast::LiteralKind::Octal,
4768	c: '`\u{01FF}`',
4769	}),
4770	Ast::literal(ast::Literal {
4771	span: span(`4`..`5`),
4772	kind: ast::LiteralKind::Verbatim,
4773	c: '7',
4774	}),
4775	],
4776	}))
4777	);
4778
4779	assert_eq!(
4780	parser_octal(r"\8").parse_escape().unwrap_err(),
4781	TestError {
4782	span: span(`0`..`2`),
4783	kind: ast::ErrorKind::EscapeUnrecognized,
4784	}
4785	);
4786	}
4787
4788	#[test]
4789	fn parse_hex_two() {
4790	for i in `0`..`256` {
4791	let pat = format!(r"\x{:02x}", i);
4792	assert_eq!(
4793	parser(&pat).parse_escape(),
4794	Ok(Primitive::Literal(ast::Literal {
4795	span: span(`0`..pat.len()),
4796	kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4797	c: char::from_u32(i).unwrap(),
4798	}))
4799	);
4800	}
4801
4802	assert_eq!(
4803	parser(r"\xF").parse_escape().unwrap_err(),
4804	TestError {
4805	span: span(`3`..`3`),
4806	kind: ast::ErrorKind::EscapeUnexpectedEof,
4807	}
4808	);
4809	assert_eq!(
4810	parser(r"\xG").parse_escape().unwrap_err(),
4811	TestError {
4812	span: span(`2`..`3`),
4813	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4814	}
4815	);
4816	assert_eq!(
4817	parser(r"\xFG").parse_escape().unwrap_err(),
4818	TestError {
4819	span: span(`3`..`4`),
4820	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4821	}
4822	);
4823	}
4824
4825	#[test]
4826	fn parse_hex_four() {
4827	for i in `0`..`65536` {
4828	let c = match char::from_u32(i) {
4829	None => continue,
4830	Some(c) => c,
4831	};
4832	let pat = format!(r"\u{:04x}", i);
4833	assert_eq!(
4834	parser(&pat).parse_escape(),
4835	Ok(Primitive::Literal(ast::Literal {
4836	span: span(`0`..pat.len()),
4837	kind: ast::LiteralKind::HexFixed(
4838	ast::HexLiteralKind::UnicodeShort
4839	),
4840	c,
4841	}))
4842	);
4843	}
4844
4845	assert_eq!(
4846	parser(r"\uF").parse_escape().unwrap_err(),
4847	TestError {
4848	span: span(`3`..`3`),
4849	kind: ast::ErrorKind::EscapeUnexpectedEof,
4850	}
4851	);
4852	assert_eq!(
4853	parser(r"\uG").parse_escape().unwrap_err(),
4854	TestError {
4855	span: span(`2`..`3`),
4856	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4857	}
4858	);
4859	assert_eq!(
4860	parser(r"\uFG").parse_escape().unwrap_err(),
4861	TestError {
4862	span: span(`3`..`4`),
4863	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4864	}
4865	);
4866	assert_eq!(
4867	parser(r"\uFFG").parse_escape().unwrap_err(),
4868	TestError {
4869	span: span(`4`..`5`),
4870	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4871	}
4872	);
4873	assert_eq!(
4874	parser(r"\uFFFG").parse_escape().unwrap_err(),
4875	TestError {
4876	span: span(`5`..`6`),
4877	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4878	}
4879	);
4880	assert_eq!(
4881	parser(r"\uD800").parse_escape().unwrap_err(),
4882	TestError {
4883	span: span(`2`..`6`),
4884	kind: ast::ErrorKind::EscapeHexInvalid,
4885	}
4886	);
4887	}
4888
4889	#[test]
4890	fn parse_hex_eight() {
4891	for i in `0`..`65536` {
4892	let c = match char::from_u32(i) {
4893	None => continue,
4894	Some(c) => c,
4895	};
4896	let pat = format!(r"\U{:08x}", i);
4897	assert_eq!(
4898	parser(&pat).parse_escape(),
4899	Ok(Primitive::Literal(ast::Literal {
4900	span: span(`0`..pat.len()),
4901	kind: ast::LiteralKind::HexFixed(
4902	ast::HexLiteralKind::UnicodeLong
4903	),
4904	c,
4905	}))
4906	);
4907	}
4908
4909	assert_eq!(
4910	parser(r"\UF").parse_escape().unwrap_err(),
4911	TestError {
4912	span: span(`3`..`3`),
4913	kind: ast::ErrorKind::EscapeUnexpectedEof,
4914	}
4915	);
4916	assert_eq!(
4917	parser(r"\UG").parse_escape().unwrap_err(),
4918	TestError {
4919	span: span(`2`..`3`),
4920	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4921	}
4922	);
4923	assert_eq!(
4924	parser(r"\UFG").parse_escape().unwrap_err(),
4925	TestError {
4926	span: span(`3`..`4`),
4927	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4928	}
4929	);
4930	assert_eq!(
4931	parser(r"\UFFG").parse_escape().unwrap_err(),
4932	TestError {
4933	span: span(`4`..`5`),
4934	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4935	}
4936	);
4937	assert_eq!(
4938	parser(r"\UFFFG").parse_escape().unwrap_err(),
4939	TestError {
4940	span: span(`5`..`6`),
4941	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4942	}
4943	);
4944	assert_eq!(
4945	parser(r"\UFFFFG").parse_escape().unwrap_err(),
4946	TestError {
4947	span: span(`6`..`7`),
4948	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4949	}
4950	);
4951	assert_eq!(
4952	parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4953	TestError {
4954	span: span(`7`..`8`),
4955	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4956	}
4957	);
4958	assert_eq!(
4959	parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4960	TestError {
4961	span: span(`8`..`9`),
4962	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4963	}
4964	);
4965	assert_eq!(
4966	parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4967	TestError {
4968	span: span(`9`..`10`),
4969	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4970	}
4971	);
4972	}
4973
4974	#[test]
4975	fn parse_hex_brace() {
4976	assert_eq!(
4977	parser(r"\u{26c4}").parse_escape(),
4978	Ok(Primitive::Literal(ast::Literal {
4979	span: span(`0`..`8`),
4980	kind: ast::LiteralKind::HexBrace(
4981	ast::HexLiteralKind::UnicodeShort
4982	),
4983	c: '⛄',
4984	}))
4985	);
4986	assert_eq!(
4987	parser(r"\U{26c4}").parse_escape(),
4988	Ok(Primitive::Literal(ast::Literal {
4989	span: span(`0`..`8`),
4990	kind: ast::LiteralKind::HexBrace(
4991	ast::HexLiteralKind::UnicodeLong
4992	),
4993	c: '⛄',
4994	}))
4995	);
4996	assert_eq!(
4997	parser(r"\x{26c4}").parse_escape(),
4998	Ok(Primitive::Literal(ast::Literal {
4999	span: span(`0`..`8`),
5000	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5001	c: '⛄',
5002	}))
5003	);
5004	assert_eq!(
5005	parser(r"\x{26C4}").parse_escape(),
5006	Ok(Primitive::Literal(ast::Literal {
5007	span: span(`0`..`8`),
5008	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5009	c: '⛄',
5010	}))
5011	);
5012	assert_eq!(
5013	parser(r"\x{10fFfF}").parse_escape(),
5014	Ok(Primitive::Literal(ast::Literal {
5015	span: span(`0`..`10`),
5016	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
5017	c: '`\u{10FFFF}`',
5018	}))
5019	);
5020
5021	assert_eq!(
5022	parser(r"\x").parse_escape().unwrap_err(),
5023	TestError {
5024	span: span(`2`..`2`),
5025	kind: ast::ErrorKind::EscapeUnexpectedEof,
5026	}
5027	);
5028	assert_eq!(
5029	parser(r"\x{").parse_escape().unwrap_err(),
5030	TestError {
5031	span: span(`2`..`3`),
5032	kind: ast::ErrorKind::EscapeUnexpectedEof,
5033	}
5034	);
5035	assert_eq!(
5036	parser(r"\x{FF").parse_escape().unwrap_err(),
5037	TestError {
5038	span: span(`2`..`5`),
5039	kind: ast::ErrorKind::EscapeUnexpectedEof,
5040	}
5041	);
5042	assert_eq!(
5043	parser(r"\x{}").parse_escape().unwrap_err(),
5044	TestError {
5045	span: span(`2`..`4`),
5046	kind: ast::ErrorKind::EscapeHexEmpty,
5047	}
5048	);
5049	assert_eq!(
5050	parser(r"\x{FGF}").parse_escape().unwrap_err(),
5051	TestError {
5052	span: span(`4`..`5`),
5053	kind: ast::ErrorKind::EscapeHexInvalidDigit,
5054	}
5055	);
5056	assert_eq!(
5057	parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
5058	TestError {
5059	span: span(`3`..`9`),
5060	kind: ast::ErrorKind::EscapeHexInvalid,
5061	}
5062	);
5063	assert_eq!(
5064	parser(r"\x{D800}").parse_escape().unwrap_err(),
5065	TestError {
5066	span: span(`3`..`7`),
5067	kind: ast::ErrorKind::EscapeHexInvalid,
5068	}
5069	);
5070	assert_eq!(
5071	parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
5072	TestError {
5073	span: span(`3`..`12`),
5074	kind: ast::ErrorKind::EscapeHexInvalid,
5075	}
5076	);
5077	}
5078
5079	#[test]
5080	fn parse_decimal() {
5081	assert_eq!(parser("123").parse_decimal(), Ok(`123`));
5082	assert_eq!(parser("0").parse_decimal(), Ok(`0`));
5083	assert_eq!(parser("01").parse_decimal(), Ok(`1`));
5084
5085	assert_eq!(
5086	parser("-1").parse_decimal().unwrap_err(),
5087	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
5088	);
5089	assert_eq!(
5090	parser("").parse_decimal().unwrap_err(),
5091	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
5092	);
5093	assert_eq!(
5094	parser("9999999999").parse_decimal().unwrap_err(),
5095	TestError {
5096	span: span(`0`..`10`),
5097	kind: ast::ErrorKind::DecimalInvalid,
5098	}
5099	);
5100	}
5101
5102	#[test]
5103	fn parse_set_class() {
5104	fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
5105	ast::ClassSet::union(ast::ClassSetUnion { span, items })
5106	}
5107
5108	fn intersection(
5109	span: Span,
5110	lhs: ast::ClassSet,
5111	rhs: ast::ClassSet,
5112	) -> ast::ClassSet {
5113	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5114	span,
5115	kind: ast::ClassSetBinaryOpKind::Intersection,
5116	lhs: Box::new(lhs),
5117	rhs: Box::new(rhs),
5118	})
5119	}
5120
5121	fn difference(
5122	span: Span,
5123	lhs: ast::ClassSet,
5124	rhs: ast::ClassSet,
5125	) -> ast::ClassSet {
5126	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5127	span,
5128	kind: ast::ClassSetBinaryOpKind::Difference,
5129	lhs: Box::new(lhs),
5130	rhs: Box::new(rhs),
5131	})
5132	}
5133
5134	fn symdifference(
5135	span: Span,
5136	lhs: ast::ClassSet,
5137	rhs: ast::ClassSet,
5138	) -> ast::ClassSet {
5139	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
5140	span,
5141	kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
5142	lhs: Box::new(lhs),
5143	rhs: Box::new(rhs),
5144	})
5145	}
5146
5147	fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
5148	ast::ClassSet::Item(item)
5149	}
5150
5151	fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
5152	ast::ClassSetItem::Ascii(cls)
5153	}
5154
5155	fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
5156	ast::ClassSetItem::Unicode(cls)
5157	}
5158
5159	fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
5160	ast::ClassSetItem::Perl(cls)
5161	}
5162
5163	fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
5164	ast::ClassSetItem::Bracketed(Box::new(cls))
5165	}
5166
5167	fn lit(span: Span, c: char) -> ast::ClassSetItem {
5168	ast::ClassSetItem::Literal(ast::Literal {
5169	span,
5170	kind: ast::LiteralKind::Verbatim,
5171	c,
5172	})
5173	}
5174
5175	fn empty(span: Span) -> ast::ClassSetItem {
5176	ast::ClassSetItem::Empty(span)
5177	}
5178
5179	fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
5180	let pos1 = Position {
5181	offset: span.start.offset + start.len_utf8(),
5182	column: span.start.column + `1`,
5183	..span.start
5184	};
5185	let pos2 = Position {
5186	offset: span.end.offset - end.len_utf8(),
5187	column: span.end.column - `1`,
5188	..span.end
5189	};
5190	ast::ClassSetItem::Range(ast::ClassSetRange {
5191	span,
5192	start: ast::Literal {
5193	span: Span { end: pos1, ..span },
5194	kind: ast::LiteralKind::Verbatim,
5195	c: start,
5196	},
5197	end: ast::Literal {
5198	span: Span { start: pos2, ..span },
5199	kind: ast::LiteralKind::Verbatim,
5200	c: end,
5201	},
5202	})
5203	}
5204
5205	fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
5206	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
5207	}
5208
5209	fn lower(span: Span, negated: bool) -> ast::ClassAscii {
5210	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
5211	}
5212
5213	assert_eq!(
5214	parser("[[:alnum:]]").parse(),
5215	Ok(Ast::class_bracketed(ast::ClassBracketed {
5216	span: span(`0`..`11`),
5217	negated: `false`,
5218	kind: itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5219	}))
5220	);
5221	assert_eq!(
5222	parser("[[[:alnum:]]]").parse(),
5223	Ok(Ast::class_bracketed(ast::ClassBracketed {
5224	span: span(`0`..`13`),
5225	negated: `false`,
5226	kind: itemset(item_bracket(ast::ClassBracketed {
5227	span: span(`1`..`12`),
5228	negated: `false`,
5229	kind: itemset(item_ascii(alnum(span(`2`..`11`), `false`))),
5230	})),
5231	}))
5232	);
5233	assert_eq!(
5234	parser("[[:alnum:]&&[:lower:]]").parse(),
5235	Ok(Ast::class_bracketed(ast::ClassBracketed {
5236	span: span(`0`..`22`),
5237	negated: `false`,
5238	kind: intersection(
5239	span(`1`..`21`),
5240	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5241	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5242	),
5243	}))
5244	);
5245	assert_eq!(
5246	parser("[[:alnum:]--[:lower:]]").parse(),
5247	Ok(Ast::class_bracketed(ast::ClassBracketed {
5248	span: span(`0`..`22`),
5249	negated: `false`,
5250	kind: difference(
5251	span(`1`..`21`),
5252	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5253	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5254	),
5255	}))
5256	);
5257	assert_eq!(
5258	parser("[[:alnum:]~~[:lower:]]").parse(),
5259	Ok(Ast::class_bracketed(ast::ClassBracketed {
5260	span: span(`0`..`22`),
5261	negated: `false`,
5262	kind: symdifference(
5263	span(`1`..`21`),
5264	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5265	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5266	),
5267	}))
5268	);
5269
5270	assert_eq!(
5271	parser("[a]").parse(),
5272	Ok(Ast::class_bracketed(ast::ClassBracketed {
5273	span: span(`0`..`3`),
5274	negated: `false`,
5275	kind: itemset(lit(span(`1`..`2`), 'a')),
5276	}))
5277	);
5278	assert_eq!(
5279	parser(r"[a\]]").parse(),
5280	Ok(Ast::class_bracketed(ast::ClassBracketed {
5281	span: span(`0`..`5`),
5282	negated: `false`,
5283	kind: union(
5284	span(`1`..`4`),
5285	vec![
5286	lit(span(`1`..`2`), 'a'),
5287	ast::ClassSetItem::Literal(ast::Literal {
5288	span: span(`2`..`4`),
5289	kind: ast::LiteralKind::Meta,
5290	c: ']',
5291	}),
5292	]
5293	),
5294	}))
5295	);
5296	assert_eq!(
5297	parser(r"[a\-z]").parse(),
5298	Ok(Ast::class_bracketed(ast::ClassBracketed {
5299	span: span(`0`..`6`),
5300	negated: `false`,
5301	kind: union(
5302	span(`1`..`5`),
5303	vec![
5304	lit(span(`1`..`2`), 'a'),
5305	ast::ClassSetItem::Literal(ast::Literal {
5306	span: span(`2`..`4`),
5307	kind: ast::LiteralKind::Meta,
5308	c: '-',
5309	}),
5310	lit(span(`4`..`5`), 'z'),
5311	]
5312	),
5313	}))
5314	);
5315	assert_eq!(
5316	parser("[ab]").parse(),
5317	Ok(Ast::class_bracketed(ast::ClassBracketed {
5318	span: span(`0`..`4`),
5319	negated: `false`,
5320	kind: union(
5321	span(`1`..`3`),
5322	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), 'b'),]
5323	),
5324	}))
5325	);
5326	assert_eq!(
5327	parser("[a-]").parse(),
5328	Ok(Ast::class_bracketed(ast::ClassBracketed {
5329	span: span(`0`..`4`),
5330	negated: `false`,
5331	kind: union(
5332	span(`1`..`3`),
5333	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), '-'),]
5334	),
5335	}))
5336	);
5337	assert_eq!(
5338	parser("[-a]").parse(),
5339	Ok(Ast::class_bracketed(ast::ClassBracketed {
5340	span: span(`0`..`4`),
5341	negated: `false`,
5342	kind: union(
5343	span(`1`..`3`),
5344	vec![lit(span(`1`..`2`), '-'), lit(span(`2`..`3`), 'a'),]
5345	),
5346	}))
5347	);
5348	assert_eq!(
5349	parser(r"[\pL]").parse(),
5350	Ok(Ast::class_bracketed(ast::ClassBracketed {
5351	span: span(`0`..`5`),
5352	negated: `false`,
5353	kind: itemset(item_unicode(ast::ClassUnicode {
5354	span: span(`1`..`4`),
5355	negated: `false`,
5356	kind: ast::ClassUnicodeKind::OneLetter('L'),
5357	})),
5358	}))
5359	);
5360	assert_eq!(
5361	parser(r"[\w]").parse(),
5362	Ok(Ast::class_bracketed(ast::ClassBracketed {
5363	span: span(`0`..`4`),
5364	negated: `false`,
5365	kind: itemset(item_perl(ast::ClassPerl {
5366	span: span(`1`..`3`),
5367	kind: ast::ClassPerlKind::Word,
5368	negated: `false`,
5369	})),
5370	}))
5371	);
5372	assert_eq!(
5373	parser(r"[a\wz]").parse(),
5374	Ok(Ast::class_bracketed(ast::ClassBracketed {
5375	span: span(`0`..`6`),
5376	negated: `false`,
5377	kind: union(
5378	span(`1`..`5`),
5379	vec![
5380	lit(span(`1`..`2`), 'a'),
5381	item_perl(ast::ClassPerl {
5382	span: span(`2`..`4`),
5383	kind: ast::ClassPerlKind::Word,
5384	negated: `false`,
5385	}),
5386	lit(span(`4`..`5`), 'z'),
5387	]
5388	),
5389	}))
5390	);
5391
5392	assert_eq!(
5393	parser("[a-z]").parse(),
5394	Ok(Ast::class_bracketed(ast::ClassBracketed {
5395	span: span(`0`..`5`),
5396	negated: `false`,
5397	kind: itemset(range(span(`1`..`4`), 'a', 'z')),
5398	}))
5399	);
5400	assert_eq!(
5401	parser("[a-cx-z]").parse(),
5402	Ok(Ast::class_bracketed(ast::ClassBracketed {
5403	span: span(`0`..`8`),
5404	negated: `false`,
5405	kind: union(
5406	span(`1`..`7`),
5407	vec![
5408	range(span(`1`..`4`), 'a', 'c'),
5409	range(span(`4`..`7`), 'x', 'z'),
5410	]
5411	),
5412	}))
5413	);
5414	assert_eq!(
5415	parser(r"[\w&&a-cx-z]").parse(),
5416	Ok(Ast::class_bracketed(ast::ClassBracketed {
5417	span: span(`0`..`12`),
5418	negated: `false`,
5419	kind: intersection(
5420	span(`1`..`11`),
5421	itemset(item_perl(ast::ClassPerl {
5422	span: span(`1`..`3`),
5423	kind: ast::ClassPerlKind::Word,
5424	negated: `false`,
5425	})),
5426	union(
5427	span(`5`..`11`),
5428	vec![
5429	range(span(`5`..`8`), 'a', 'c'),
5430	range(span(`8`..`11`), 'x', 'z'),
5431	]
5432	),
5433	),
5434	}))
5435	);
5436	assert_eq!(
5437	parser(r"[a-cx-z&&\w]").parse(),
5438	Ok(Ast::class_bracketed(ast::ClassBracketed {
5439	span: span(`0`..`12`),
5440	negated: `false`,
5441	kind: intersection(
5442	span(`1`..`11`),
5443	union(
5444	span(`1`..`7`),
5445	vec![
5446	range(span(`1`..`4`), 'a', 'c'),
5447	range(span(`4`..`7`), 'x', 'z'),
5448	]
5449	),
5450	itemset(item_perl(ast::ClassPerl {
5451	span: span(`9`..`11`),
5452	kind: ast::ClassPerlKind::Word,
5453	negated: `false`,
5454	})),
5455	),
5456	}))
5457	);
5458	assert_eq!(
5459	parser(r"[a--b--c]").parse(),
5460	Ok(Ast::class_bracketed(ast::ClassBracketed {
5461	span: span(`0`..`9`),
5462	negated: `false`,
5463	kind: difference(
5464	span(`1`..`8`),
5465	difference(
5466	span(`1`..`5`),
5467	itemset(lit(span(`1`..`2`), 'a')),
5468	itemset(lit(span(`4`..`5`), 'b')),
5469	),
5470	itemset(lit(span(`7`..`8`), 'c')),
5471	),
5472	}))
5473	);
5474	assert_eq!(
5475	parser(r"[a~~b~~c]").parse(),
5476	Ok(Ast::class_bracketed(ast::ClassBracketed {
5477	span: span(`0`..`9`),
5478	negated: `false`,
5479	kind: symdifference(
5480	span(`1`..`8`),
5481	symdifference(
5482	span(`1`..`5`),
5483	itemset(lit(span(`1`..`2`), 'a')),
5484	itemset(lit(span(`4`..`5`), 'b')),
5485	),
5486	itemset(lit(span(`7`..`8`), 'c')),
5487	),
5488	}))
5489	);
5490	assert_eq!(
5491	parser(r"[\^&&^]").parse(),
5492	Ok(Ast::class_bracketed(ast::ClassBracketed {
5493	span: span(`0`..`7`),
5494	negated: `false`,
5495	kind: intersection(
5496	span(`1`..`6`),
5497	itemset(ast::ClassSetItem::Literal(ast::Literal {
5498	span: span(`1`..`3`),
5499	kind: ast::LiteralKind::Meta,
5500	c: '^',
5501	})),
5502	itemset(lit(span(`5`..`6`), '^')),
5503	),
5504	}))
5505	);
5506	assert_eq!(
5507	parser(r"[\&&&&]").parse(),
5508	Ok(Ast::class_bracketed(ast::ClassBracketed {
5509	span: span(`0`..`7`),
5510	negated: `false`,
5511	kind: intersection(
5512	span(`1`..`6`),
5513	itemset(ast::ClassSetItem::Literal(ast::Literal {
5514	span: span(`1`..`3`),
5515	kind: ast::LiteralKind::Meta,
5516	c: '&',
5517	})),
5518	itemset(lit(span(`5`..`6`), '&')),
5519	),
5520	}))
5521	);
5522	assert_eq!(
5523	parser(r"[&&&&]").parse(),
5524	Ok(Ast::class_bracketed(ast::ClassBracketed {
5525	span: span(`0`..`6`),
5526	negated: `false`,
5527	kind: intersection(
5528	span(`1`..`5`),
5529	intersection(
5530	span(`1`..`3`),
5531	itemset(empty(span(`1`..`1`))),
5532	itemset(empty(span(`3`..`3`))),
5533	),
5534	itemset(empty(span(`5`..`5`))),
5535	),
5536	}))
5537	);
5538
5539	let pat = "[☃-⛄]";
5540	assert_eq!(
5541	parser(pat).parse(),
5542	Ok(Ast::class_bracketed(ast::ClassBracketed {
5543	span: span_range(pat, `0`..`9`),
5544	negated: `false`,
5545	kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5546	span: span_range(pat, `1`..`8`),
5547	start: ast::Literal {
5548	span: span_range(pat, `1`..`4`),
5549	kind: ast::LiteralKind::Verbatim,
5550	c: '☃',
5551	},
5552	end: ast::Literal {
5553	span: span_range(pat, `5`..`8`),
5554	kind: ast::LiteralKind::Verbatim,
5555	c: '⛄',
5556	},
5557	})),
5558	}))
5559	);
5560
5561	assert_eq!(
5562	parser(r"[]]").parse(),
5563	Ok(Ast::class_bracketed(ast::ClassBracketed {
5564	span: span(`0`..`3`),
5565	negated: `false`,
5566	kind: itemset(lit(span(`1`..`2`), ']')),
5567	}))
5568	);
5569	assert_eq!(
5570	parser(r"[]\[]").parse(),
5571	Ok(Ast::class_bracketed(ast::ClassBracketed {
5572	span: span(`0`..`5`),
5573	negated: `false`,
5574	kind: union(
5575	span(`1`..`4`),
5576	vec![
5577	lit(span(`1`..`2`), ']'),
5578	ast::ClassSetItem::Literal(ast::Literal {
5579	span: span(`2`..`4`),
5580	kind: ast::LiteralKind::Meta,
5581	c: '[',
5582	}),
5583	]
5584	),
5585	}))
5586	);
5587	assert_eq!(
5588	parser(r"[\[]]").parse(),
5589	Ok(concat(
5590	`0`..`5`,
5591	vec![
5592	Ast::class_bracketed(ast::ClassBracketed {
5593	span: span(`0`..`4`),
5594	negated: `false`,
5595	kind: itemset(ast::ClassSetItem::Literal(
5596	ast::Literal {
5597	span: span(`1`..`3`),
5598	kind: ast::LiteralKind::Meta,
5599	c: '[',
5600	}
5601	)),
5602	}),
5603	Ast::literal(ast::Literal {
5604	span: span(`4`..`5`),
5605	kind: ast::LiteralKind::Verbatim,
5606	c: ']',
5607	}),
5608	]
5609	))
5610	);
5611
5612	assert_eq!(
5613	parser("[").parse().unwrap_err(),
5614	TestError {
5615	span: span(`0`..`1`),
5616	kind: ast::ErrorKind::ClassUnclosed,
5617	}
5618	);
5619	assert_eq!(
5620	parser("[[").parse().unwrap_err(),
5621	TestError {
5622	span: span(`1`..`2`),
5623	kind: ast::ErrorKind::ClassUnclosed,
5624	}
5625	);
5626	assert_eq!(
5627	parser("[[-]").parse().unwrap_err(),
5628	TestError {
5629	span: span(`0`..`1`),
5630	kind: ast::ErrorKind::ClassUnclosed,
5631	}
5632	);
5633	assert_eq!(
5634	parser("[[[:alnum:]").parse().unwrap_err(),
5635	TestError {
5636	span: span(`1`..`2`),
5637	kind: ast::ErrorKind::ClassUnclosed,
5638	}
5639	);
5640	assert_eq!(
5641	parser(r"[\b]").parse().unwrap_err(),
5642	TestError {
5643	span: span(`1`..`3`),
5644	kind: ast::ErrorKind::ClassEscapeInvalid,
5645	}
5646	);
5647	assert_eq!(
5648	parser(r"[\w-a]").parse().unwrap_err(),
5649	TestError {
5650	span: span(`1`..`3`),
5651	kind: ast::ErrorKind::ClassRangeLiteral,
5652	}
5653	);
5654	assert_eq!(
5655	parser(r"[a-\w]").parse().unwrap_err(),
5656	TestError {
5657	span: span(`3`..`5`),
5658	kind: ast::ErrorKind::ClassRangeLiteral,
5659	}
5660	);
5661	assert_eq!(
5662	parser(r"[z-a]").parse().unwrap_err(),
5663	TestError {
5664	span: span(`1`..`4`),
5665	kind: ast::ErrorKind::ClassRangeInvalid,
5666	}
5667	);
5668
5669	assert_eq!(
5670	parser_ignore_whitespace("[a ").parse().unwrap_err(),
5671	TestError {
5672	span: span(`0`..`1`),
5673	kind: ast::ErrorKind::ClassUnclosed,
5674	}
5675	);
5676	assert_eq!(
5677	parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5678	TestError {
5679	span: span(`0`..`1`),
5680	kind: ast::ErrorKind::ClassUnclosed,
5681	}
5682	);
5683	}
5684
5685	#[test]
5686	fn parse_set_class_open() {
5687	assert_eq!(parser("[a]").parse_set_class_open(), {
5688	let set = ast::ClassBracketed {
5689	span: span(`0`..`1`),
5690	negated: `false`,
5691	kind: ast::ClassSet::union(ast::ClassSetUnion {
5692	span: span(`1`..`1`),
5693	items: vec![],
5694	}),
5695	};
5696	let union = ast::ClassSetUnion { span: span(`1`..`1`), items: vec![] };
5697	Ok((set, union))
5698	});
5699	assert_eq!(
5700	parser_ignore_whitespace("[ a]").parse_set_class_open(),
5701	{
5702	let set = ast::ClassBracketed {
5703	span: span(`0`..`4`),
5704	negated: `false`,
5705	kind: ast::ClassSet::union(ast::ClassSetUnion {
5706	span: span(`4`..`4`),
5707	items: vec![],
5708	}),
5709	};
5710	let union =
5711	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5712	Ok((set, union))
5713	}
5714	);
5715	assert_eq!(parser("[^a]").parse_set_class_open(), {
5716	let set = ast::ClassBracketed {
5717	span: span(`0`..`2`),
5718	negated: `true`,
5719	kind: ast::ClassSet::union(ast::ClassSetUnion {
5720	span: span(`2`..`2`),
5721	items: vec![],
5722	}),
5723	};
5724	let union = ast::ClassSetUnion { span: span(`2`..`2`), items: vec![] };
5725	Ok((set, union))
5726	});
5727	assert_eq!(
5728	parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5729	{
5730	let set = ast::ClassBracketed {
5731	span: span(`0`..`4`),
5732	negated: `true`,
5733	kind: ast::ClassSet::union(ast::ClassSetUnion {
5734	span: span(`4`..`4`),
5735	items: vec![],
5736	}),
5737	};
5738	let union =
5739	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5740	Ok((set, union))
5741	}
5742	);
5743	assert_eq!(parser("[-a]").parse_set_class_open(), {
5744	let set = ast::ClassBracketed {
5745	span: span(`0`..`2`),
5746	negated: `false`,
5747	kind: ast::ClassSet::union(ast::ClassSetUnion {
5748	span: span(`1`..`1`),
5749	items: vec![],
5750	}),
5751	};
5752	let union = ast::ClassSetUnion {
5753	span: span(`1`..`2`),
5754	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5755	span: span(`1`..`2`),
5756	kind: ast::LiteralKind::Verbatim,
5757	c: '-',
5758	})],
5759	};
5760	Ok((set, union))
5761	});
5762	assert_eq!(
5763	parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5764	{
5765	let set = ast::ClassBracketed {
5766	span: span(`0`..`4`),
5767	negated: `false`,
5768	kind: ast::ClassSet::union(ast::ClassSetUnion {
5769	span: span(`2`..`2`),
5770	items: vec![],
5771	}),
5772	};
5773	let union = ast::ClassSetUnion {
5774	span: span(`2`..`3`),
5775	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5776	span: span(`2`..`3`),
5777	kind: ast::LiteralKind::Verbatim,
5778	c: '-',
5779	})],
5780	};
5781	Ok((set, union))
5782	}
5783	);
5784	assert_eq!(parser("[^-a]").parse_set_class_open(), {
5785	let set = ast::ClassBracketed {
5786	span: span(`0`..`3`),
5787	negated: `true`,
5788	kind: ast::ClassSet::union(ast::ClassSetUnion {
5789	span: span(`2`..`2`),
5790	items: vec![],
5791	}),
5792	};
5793	let union = ast::ClassSetUnion {
5794	span: span(`2`..`3`),
5795	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5796	span: span(`2`..`3`),
5797	kind: ast::LiteralKind::Verbatim,
5798	c: '-',
5799	})],
5800	};
5801	Ok((set, union))
5802	});
5803	assert_eq!(parser("[--a]").parse_set_class_open(), {
5804	let set = ast::ClassBracketed {
5805	span: span(`0`..`3`),
5806	negated: `false`,
5807	kind: ast::ClassSet::union(ast::ClassSetUnion {
5808	span: span(`1`..`1`),
5809	items: vec![],
5810	}),
5811	};
5812	let union = ast::ClassSetUnion {
5813	span: span(`1`..`3`),
5814	items: vec![
5815	ast::ClassSetItem::Literal(ast::Literal {
5816	span: span(`1`..`2`),
5817	kind: ast::LiteralKind::Verbatim,
5818	c: '-',
5819	}),
5820	ast::ClassSetItem::Literal(ast::Literal {
5821	span: span(`2`..`3`),
5822	kind: ast::LiteralKind::Verbatim,
5823	c: '-',
5824	}),
5825	],
5826	};
5827	Ok((set, union))
5828	});
5829	assert_eq!(parser("[]a]").parse_set_class_open(), {
5830	let set = ast::ClassBracketed {
5831	span: span(`0`..`2`),
5832	negated: `false`,
5833	kind: ast::ClassSet::union(ast::ClassSetUnion {
5834	span: span(`1`..`1`),
5835	items: vec![],
5836	}),
5837	};
5838	let union = ast::ClassSetUnion {
5839	span: span(`1`..`2`),
5840	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5841	span: span(`1`..`2`),
5842	kind: ast::LiteralKind::Verbatim,
5843	c: ']',
5844	})],
5845	};
5846	Ok((set, union))
5847	});
5848	assert_eq!(
5849	parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5850	{
5851	let set = ast::ClassBracketed {
5852	span: span(`0`..`4`),
5853	negated: `false`,
5854	kind: ast::ClassSet::union(ast::ClassSetUnion {
5855	span: span(`2`..`2`),
5856	items: vec![],
5857	}),
5858	};
5859	let union = ast::ClassSetUnion {
5860	span: span(`2`..`3`),
5861	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5862	span: span(`2`..`3`),
5863	kind: ast::LiteralKind::Verbatim,
5864	c: ']',
5865	})],
5866	};
5867	Ok((set, union))
5868	}
5869	);
5870	assert_eq!(parser("[^]a]").parse_set_class_open(), {
5871	let set = ast::ClassBracketed {
5872	span: span(`0`..`3`),
5873	negated: `true`,
5874	kind: ast::ClassSet::union(ast::ClassSetUnion {
5875	span: span(`2`..`2`),
5876	items: vec![],
5877	}),
5878	};
5879	let union = ast::ClassSetUnion {
5880	span: span(`2`..`3`),
5881	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5882	span: span(`2`..`3`),
5883	kind: ast::LiteralKind::Verbatim,
5884	c: ']',
5885	})],
5886	};
5887	Ok((set, union))
5888	});
5889	assert_eq!(parser("[-]a]").parse_set_class_open(), {
5890	let set = ast::ClassBracketed {
5891	span: span(`0`..`2`),
5892	negated: `false`,
5893	kind: ast::ClassSet::union(ast::ClassSetUnion {
5894	span: span(`1`..`1`),
5895	items: vec![],
5896	}),
5897	};
5898	let union = ast::ClassSetUnion {
5899	span: span(`1`..`2`),
5900	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5901	span: span(`1`..`2`),
5902	kind: ast::LiteralKind::Verbatim,
5903	c: '-',
5904	})],
5905	};
5906	Ok((set, union))
5907	});
5908
5909	assert_eq!(
5910	parser("[").parse_set_class_open().unwrap_err(),
5911	TestError {
5912	span: span(`0`..`1`),
5913	kind: ast::ErrorKind::ClassUnclosed,
5914	}
5915	);
5916	assert_eq!(
5917	parser_ignore_whitespace("[ ")
5918	.parse_set_class_open()
5919	.unwrap_err(),
5920	TestError {
5921	span: span(`0`..`5`),
5922	kind: ast::ErrorKind::ClassUnclosed,
5923	}
5924	);
5925	assert_eq!(
5926	parser("[^").parse_set_class_open().unwrap_err(),
5927	TestError {
5928	span: span(`0`..`2`),
5929	kind: ast::ErrorKind::ClassUnclosed,
5930	}
5931	);
5932	assert_eq!(
5933	parser("[]").parse_set_class_open().unwrap_err(),
5934	TestError {
5935	span: span(`0`..`2`),
5936	kind: ast::ErrorKind::ClassUnclosed,
5937	}
5938	);
5939	assert_eq!(
5940	parser("[-").parse_set_class_open().unwrap_err(),
5941	TestError {
5942	span: span(`0`..`0`),
5943	kind: ast::ErrorKind::ClassUnclosed,
5944	}
5945	);
5946	assert_eq!(
5947	parser("[--").parse_set_class_open().unwrap_err(),
5948	TestError {
5949	span: span(`0`..`0`),
5950	kind: ast::ErrorKind::ClassUnclosed,
5951	}
5952	);
5953
5954	// See: https://github.com/rust-lang/regex/issues/792
5955	assert_eq!(
5956	parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5957	TestError {
5958	span: span(`4`..`4`),
5959	kind: ast::ErrorKind::ClassUnclosed,
5960	}
5961	);
5962	}
5963
5964	#[test]
5965	fn maybe_parse_ascii_class() {
5966	assert_eq!(
5967	parser(r"[:alnum:]").maybe_parse_ascii_class(),
5968	Some(ast::ClassAscii {
5969	span: span(`0`..`9`),
5970	kind: ast::ClassAsciiKind::Alnum,
5971	negated: `false`,
5972	})
5973	);
5974	assert_eq!(
5975	parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5976	Some(ast::ClassAscii {
5977	span: span(`0`..`9`),
5978	kind: ast::ClassAsciiKind::Alnum,
5979	negated: `false`,
5980	})
5981	);
5982	assert_eq!(
5983	parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5984	Some(ast::ClassAscii {
5985	span: span(`0`..`10`),
5986	kind: ast::ClassAsciiKind::Alnum,
5987	negated: `true`,
5988	})
5989	);
5990
5991	let p = parser(r"[:");
5992	assert_eq!(p.maybe_parse_ascii_class(), None);
5993	assert_eq!(p.offset(), `0`);
5994
5995	let p = parser(r"[:^");
5996	assert_eq!(p.maybe_parse_ascii_class(), None);
5997	assert_eq!(p.offset(), `0`);
5998
5999	let p = parser(r"[^:alnum:]");
6000	assert_eq!(p.maybe_parse_ascii_class(), None);
6001	assert_eq!(p.offset(), `0`);
6002
6003	let p = parser(r"[:alnnum:]");
6004	assert_eq!(p.maybe_parse_ascii_class(), None);
6005	assert_eq!(p.offset(), `0`);
6006
6007	let p = parser(r"[:alnum]");
6008	assert_eq!(p.maybe_parse_ascii_class(), None);
6009	assert_eq!(p.offset(), `0`);
6010
6011	let p = parser(r"[:alnum:");
6012	assert_eq!(p.maybe_parse_ascii_class(), None);
6013	assert_eq!(p.offset(), `0`);
6014	}
6015
6016	#[test]
6017	fn parse_unicode_class() {
6018	assert_eq!(
6019	parser(r"\pN").parse_escape(),
6020	Ok(Primitive::Unicode(ast::ClassUnicode {
6021	span: span(`0`..`3`),
6022	negated: `false`,
6023	kind: ast::ClassUnicodeKind::OneLetter('N'),
6024	}))
6025	);
6026	assert_eq!(
6027	parser(r"\PN").parse_escape(),
6028	Ok(Primitive::Unicode(ast::ClassUnicode {
6029	span: span(`0`..`3`),
6030	negated: `true`,
6031	kind: ast::ClassUnicodeKind::OneLetter('N'),
6032	}))
6033	);
6034	assert_eq!(
6035	parser(r"\p{N}").parse_escape(),
6036	Ok(Primitive::Unicode(ast::ClassUnicode {
6037	span: span(`0`..`5`),
6038	negated: `false`,
6039	kind: ast::ClassUnicodeKind::Named(s("N")),
6040	}))
6041	);
6042	assert_eq!(
6043	parser(r"\P{N}").parse_escape(),
6044	Ok(Primitive::Unicode(ast::ClassUnicode {
6045	span: span(`0`..`5`),
6046	negated: `true`,
6047	kind: ast::ClassUnicodeKind::Named(s("N")),
6048	}))
6049	);
6050	assert_eq!(
6051	parser(r"\p{Greek}").parse_escape(),
6052	Ok(Primitive::Unicode(ast::ClassUnicode {
6053	span: span(`0`..`9`),
6054	negated: `false`,
6055	kind: ast::ClassUnicodeKind::Named(s("Greek")),
6056	}))
6057	);
6058
6059	assert_eq!(
6060	parser(r"\p{scx:Katakana}").parse_escape(),
6061	Ok(Primitive::Unicode(ast::ClassUnicode {
6062	span: span(`0`..`16`),
6063	negated: `false`,
6064	kind: ast::ClassUnicodeKind::NamedValue {
6065	op: ast::ClassUnicodeOpKind::Colon,
6066	name: s("scx"),
6067	value: s("Katakana"),
6068	},
6069	}))
6070	);
6071	assert_eq!(
6072	parser(r"\p{scx=Katakana}").parse_escape(),
6073	Ok(Primitive::Unicode(ast::ClassUnicode {
6074	span: span(`0`..`16`),
6075	negated: `false`,
6076	kind: ast::ClassUnicodeKind::NamedValue {
6077	op: ast::ClassUnicodeOpKind::Equal,
6078	name: s("scx"),
6079	value: s("Katakana"),
6080	},
6081	}))
6082	);
6083	assert_eq!(
6084	parser(r"\p{scx!=Katakana}").parse_escape(),
6085	Ok(Primitive::Unicode(ast::ClassUnicode {
6086	span: span(`0`..`17`),
6087	negated: `false`,
6088	kind: ast::ClassUnicodeKind::NamedValue {
6089	op: ast::ClassUnicodeOpKind::NotEqual,
6090	name: s("scx"),
6091	value: s("Katakana"),
6092	},
6093	}))
6094	);
6095
6096	assert_eq!(
6097	parser(r"\p{:}").parse_escape(),
6098	Ok(Primitive::Unicode(ast::ClassUnicode {
6099	span: span(`0`..`5`),
6100	negated: `false`,
6101	kind: ast::ClassUnicodeKind::NamedValue {
6102	op: ast::ClassUnicodeOpKind::Colon,
6103	name: s(""),
6104	value: s(""),
6105	},
6106	}))
6107	);
6108	assert_eq!(
6109	parser(r"\p{=}").parse_escape(),
6110	Ok(Primitive::Unicode(ast::ClassUnicode {
6111	span: span(`0`..`5`),
6112	negated: `false`,
6113	kind: ast::ClassUnicodeKind::NamedValue {
6114	op: ast::ClassUnicodeOpKind::Equal,
6115	name: s(""),
6116	value: s(""),
6117	},
6118	}))
6119	);
6120	assert_eq!(
6121	parser(r"\p{!=}").parse_escape(),
6122	Ok(Primitive::Unicode(ast::ClassUnicode {
6123	span: span(`0`..`6`),
6124	negated: `false`,
6125	kind: ast::ClassUnicodeKind::NamedValue {
6126	op: ast::ClassUnicodeOpKind::NotEqual,
6127	name: s(""),
6128	value: s(""),
6129	},
6130	}))
6131	);
6132
6133	assert_eq!(
6134	parser(r"\p").parse_escape().unwrap_err(),
6135	TestError {
6136	span: span(`2`..`2`),
6137	kind: ast::ErrorKind::EscapeUnexpectedEof,
6138	}
6139	);
6140	assert_eq!(
6141	parser(r"\p{").parse_escape().unwrap_err(),
6142	TestError {
6143	span: span(`3`..`3`),
6144	kind: ast::ErrorKind::EscapeUnexpectedEof,
6145	}
6146	);
6147	assert_eq!(
6148	parser(r"\p{N").parse_escape().unwrap_err(),
6149	TestError {
6150	span: span(`4`..`4`),
6151	kind: ast::ErrorKind::EscapeUnexpectedEof,
6152	}
6153	);
6154	assert_eq!(
6155	parser(r"\p{Greek").parse_escape().unwrap_err(),
6156	TestError {
6157	span: span(`8`..`8`),
6158	kind: ast::ErrorKind::EscapeUnexpectedEof,
6159	}
6160	);
6161
6162	assert_eq!(
6163	parser(r"\pNz").parse(),
6164	Ok(Ast::concat(ast::Concat {
6165	span: span(`0`..`4`),
6166	asts: vec![
6167	Ast::class_unicode(ast::ClassUnicode {
6168	span: span(`0`..`3`),
6169	negated: `false`,
6170	kind: ast::ClassUnicodeKind::OneLetter('N'),
6171	}),
6172	Ast::literal(ast::Literal {
6173	span: span(`3`..`4`),
6174	kind: ast::LiteralKind::Verbatim,
6175	c: 'z',
6176	}),
6177	],
6178	}))
6179	);
6180	assert_eq!(
6181	parser(r"\p{Greek}z").parse(),
6182	Ok(Ast::concat(ast::Concat {
6183	span: span(`0`..`10`),
6184	asts: vec![
6185	Ast::class_unicode(ast::ClassUnicode {
6186	span: span(`0`..`9`),
6187	negated: `false`,
6188	kind: ast::ClassUnicodeKind::Named(s("Greek")),
6189	}),
6190	Ast::literal(ast::Literal {
6191	span: span(`9`..`10`),
6192	kind: ast::LiteralKind::Verbatim,
6193	c: 'z',
6194	}),
6195	],
6196	}))
6197	);
6198	assert_eq!(
6199	parser(r"\p\{").parse().unwrap_err(),
6200	TestError {
6201	span: span(`2`..`3`),
6202	kind: ast::ErrorKind::UnicodeClassInvalid,
6203	}
6204	);
6205	assert_eq!(
6206	parser(r"\P\{").parse().unwrap_err(),
6207	TestError {
6208	span: span(`2`..`3`),
6209	kind: ast::ErrorKind::UnicodeClassInvalid,
6210	}
6211	);
6212	}
6213
6214	#[test]
6215	fn parse_perl_class() {
6216	assert_eq!(
6217	parser(r"\d").parse_escape(),
6218	Ok(Primitive::Perl(ast::ClassPerl {
6219	span: span(`0`..`2`),
6220	kind: ast::ClassPerlKind::Digit,
6221	negated: `false`,
6222	}))
6223	);
6224	assert_eq!(
6225	parser(r"\D").parse_escape(),
6226	Ok(Primitive::Perl(ast::ClassPerl {
6227	span: span(`0`..`2`),
6228	kind: ast::ClassPerlKind::Digit,
6229	negated: `true`,
6230	}))
6231	);
6232	assert_eq!(
6233	parser(r"\s").parse_escape(),
6234	Ok(Primitive::Perl(ast::ClassPerl {
6235	span: span(`0`..`2`),
6236	kind: ast::ClassPerlKind::Space,
6237	negated: `false`,
6238	}))
6239	);
6240	assert_eq!(
6241	parser(r"\S").parse_escape(),
6242	Ok(Primitive::Perl(ast::ClassPerl {
6243	span: span(`0`..`2`),
6244	kind: ast::ClassPerlKind::Space,
6245	negated: `true`,
6246	}))
6247	);
6248	assert_eq!(
6249	parser(r"\w").parse_escape(),
6250	Ok(Primitive::Perl(ast::ClassPerl {
6251	span: span(`0`..`2`),
6252	kind: ast::ClassPerlKind::Word,
6253	negated: `false`,
6254	}))
6255	);
6256	assert_eq!(
6257	parser(r"\W").parse_escape(),
6258	Ok(Primitive::Perl(ast::ClassPerl {
6259	span: span(`0`..`2`),
6260	kind: ast::ClassPerlKind::Word,
6261	negated: `true`,
6262	}))
6263	);
6264
6265	assert_eq!(
6266	parser(r"\d").parse(),
6267	Ok(Ast::class_perl(ast::ClassPerl {
6268	span: span(`0`..`2`),
6269	kind: ast::ClassPerlKind::Digit,
6270	negated: `false`,
6271	}))
6272	);
6273	assert_eq!(
6274	parser(r"\dz").parse(),
6275	Ok(Ast::concat(ast::Concat {
6276	span: span(`0`..`3`),
6277	asts: vec![
6278	Ast::class_perl(ast::ClassPerl {
6279	span: span(`0`..`2`),
6280	kind: ast::ClassPerlKind::Digit,
6281	negated: `false`,
6282	}),
6283	Ast::literal(ast::Literal {
6284	span: span(`2`..`3`),
6285	kind: ast::LiteralKind::Verbatim,
6286	c: 'z',
6287	}),
6288	],
6289	}))
6290	);
6291	}
6292
6293	// This tests a bug fix where the nest limit checker wasn't decrementing
6294	// its depth during post-traversal, which causes long regexes to trip
6295	// the default limit too aggressively.
6296	#[test]
6297	fn regression_454_nest_too_big() {
6298	let pattern = r#"
6299	2(?:
6300	[45]\d{3}\|
6301	7(?:
6302	1[0-267]\|
6303	2[0-289]\|
6304	3[0-29]\|
6305	4[01]\|
6306	5[1-3]\|
6307	6[013]\|
6308	7[0178]\|
6309	91
6310	)\|
6311	8(?:
6312	0[125]\|
6313	[139][1-6]\|
6314	2[0157-9]\|
6315	41\|
6316	6[1-35]\|
6317	7[1-5]\|
6318	8[1-8]\|
6319	90
6320	)\|
6321	9(?:
6322	0[0-2]\|
6323	1[0-4]\|
6324	2[568]\|
6325	3[3-6]\|
6326	5[5-7]\|
6327	6[0167]\|
6328	7[15]\|
6329	8[0146-9]
6330	)
6331	)\d{4}
6332	"#;
6333	assert!(parser_nest_limit(pattern, `50`).parse().is_ok());
6334	}
6335
6336	// This tests that we treat a trailing `-` in a character class as a
6337	// literal `-` even when whitespace mode is enabled and there is whitespace
6338	// after the trailing `-`.
6339	#[test]
6340	fn regression_455_trailing_dash_ignore_whitespace() {
6341	assert!(parser("(?x)[ / - ]").parse().is_ok());
6342	assert!(parser("(?x)[ a - ]").parse().is_ok());
6343	assert!(parser(
6344	"(?x)[
6345	a
6346	- ]
6347	"
6348	)
6349	.parse()
6350	.is_ok());
6351	assert!(parser(
6352	"(?x)[
6353	a # wat
6354	- ]
6355	"
6356	)
6357	.parse()
6358	.is_ok());
6359
6360	assert!(parser("(?x)[ / -").parse().is_err());
6361	assert!(parser("(?x)[ / - ").parse().is_err());
6362	assert!(parser(
6363	"(?x)[
6364	/ -
6365	"
6366	)
6367	.parse()
6368	.is_err());
6369	assert!(parser(
6370	"(?x)[
6371	/ - # wat
6372	"
6373	)
6374	.parse()
6375	.is_err());
6376	}
6377	}
6378