parse.rs - Codebrowser

1	/!*
2	This module provides a regular expression parser.
3	*/
4
5	use std::borrow::Borrow;
6	use std::cell::{Cell, RefCell};
7	use std::mem;
8	use std::result;
9
10	use crate::ast::{self, Ast, Position, Span};
11	use crate::either::Either;
12
13	use crate::is_meta_character;
14
15	type Result<T> = result::Result<T, ast::Error>;
16
17	/// A primitive is an expression with no sub-expressions. This includes
18	/// literals, assertions and non-set character classes. This representation
19	/// is used as intermediate state in the parser.
20	///
21	/// This does not include ASCII character classes, since they can only appear
22	/// within a set character class.
23	#[derive(Clone, Debug, Eq, PartialEq)]
24	enum Primitive {
25	Literal(ast::Literal),
26	Assertion(ast::Assertion),
27	Dot(Span),
28	Perl(ast::ClassPerl),
29	Unicode(ast::ClassUnicode),
30	}
31
32	impl Primitive {
33	/// Return the span of this primitive.
34	fn span(&self) -> &Span {
35	match *self {
36	Primitive::Literal(ref x) => &x.span,
37	Primitive::Assertion(ref x) => &x.span,
38	Primitive::Dot(ref span) => span,
39	Primitive::Perl(ref x) => &x.span,
40	Primitive::Unicode(ref x) => &x.span,
41	}
42	}
43
44	/// Convert this primitive into a proper AST.
45	fn into_ast(self) -> Ast {
46	match self {
47	Primitive::Literal(lit) => Ast::Literal(lit),
48	Primitive::Assertion(assert) => Ast::Assertion(assert),
49	Primitive::Dot(span) => Ast::Dot(span),
50	Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51	Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52	}
53	}
54
55	/// Convert this primitive into an item in a character class.
56	///
57	/// If this primitive is not a legal item (i.e., an assertion or a dot),
58	/// then return an error.
59	fn into_class_set_item<P: Borrow<Parser>>(
60	self,
61	p: &ParserI<'_, P>,
62	) -> Result<ast::ClassSetItem> {
63	use self::Primitive::*;
64	use crate::ast::ClassSetItem;
65
66	match self {
67	Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68	Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69	Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70	x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71	}
72	}
73
74	/// Convert this primitive into a literal in a character class. In
75	/// particular, literals are the only valid items that can appear in
76	/// ranges.
77	///
78	/// If this primitive is not a legal item (i.e., a class, assertion or a
79	/// dot), then return an error.
80	fn into_class_literal<P: Borrow<Parser>>(
81	self,
82	p: &ParserI<'_, P>,
83	) -> Result<ast::Literal> {
84	use self::Primitive::*;
85
86	match self {
87	Literal(lit) => Ok(lit),
88	x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89	}
90	}
91	}
92
93	/// Returns true if the given character is a hexadecimal digit.
94	fn is_hex(c: char) -> bool {
95	('0' <= c && c <= '9') \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F')
96	}
97
98	/// Returns true if the given character is a valid in a capture group name.
99	///
100	/// If `first` is true, then `c` is treated as the first character in the
101	/// group name (which must be alphabetic or underscore).
102	fn is_capture_char(c: char, first: bool) -> bool {
103	c == '_'
104	\|\| (!first
105	&& (('0' <= c && c <= '9') \|\| c == '.' \|\| c == '[' \|\| c == ']'))
106	\|\| ('A' <= c && c <= 'Z')
107	\|\| ('a' <= c && c <= 'z')
108	}
109
110	/// A builder for a regular expression parser.
111	///
112	/// This builder permits modifying configuration options for the parser.
113	#[derive(Clone, Debug)]
114	pub struct ParserBuilder {
115	ignore_whitespace: bool,
116	nest_limit: u32,
117	octal: bool,
118	}
119
120	impl Default for ParserBuilder {
121	fn default() -> ParserBuilder {
122	ParserBuilder::new()
123	}
124	}
125
126	impl ParserBuilder {
127	/// Create a new parser builder with a default configuration.
128	pub fn new() -> ParserBuilder {
129	ParserBuilder {
130	ignore_whitespace: `false`,
131	nest_limit: `250`,
132	octal: `false`,
133	}
134	}
135
136	/// Build a parser from this configuration with the given pattern.
137	pub fn build(&self) -> Parser {
138	Parser {
139	pos: Cell::new(Position { offset: `0`, line: `1`, column: `1` }),
140	capture_index: Cell::new(`0`),
141	nest_limit: self.nest_limit,
142	octal: self.octal,
143	initial_ignore_whitespace: self.ignore_whitespace,
144	ignore_whitespace: Cell::new(self.ignore_whitespace),
145	comments: RefCell::new(vec![]),
146	stack_group: RefCell::new(vec![]),
147	stack_class: RefCell::new(vec![]),
148	capture_names: RefCell::new(vec![]),
149	scratch: RefCell::new(String::new()),
150	}
151	}
152
153	/// Set the nesting limit for this parser.
154	///
155	/// The nesting limit controls how deep the abstract syntax tree is allowed
156	/// to be. If the AST exceeds the given limit (e.g., with too many nested
157	/// groups), then an error is returned by the parser.
158	///
159	/// The purpose of this limit is to act as a heuristic to prevent stack
160	/// overflow for consumers that do structural induction on an `Ast` using
161	/// explicit recursion. While this crate never does this (instead using
162	/// constant stack space and moving the call stack to the heap), other
163	/// crates may.
164	///
165	/// This limit is not checked until the entire Ast is parsed. Therefore,
166	/// if callers want to put a limit on the amount of heap space used, then
167	/// they should impose a limit on the length, in bytes, of the concrete
168	/// pattern string. In particular, this is viable since this parser
169	/// implementation will limit itself to heap space proportional to the
170	/// length of the pattern string.
171	///
172	/// Note that a nest limit of `0` will return a nest limit error for most
173	/// patterns but not all. For example, a nest limit of `0` permits `a` but
174	/// not `ab`, since `ab` requires a concatenation, which results in a nest
175	/// depth of `1`. In general, a nest limit is not something that manifests
176	/// in an obvious way in the concrete syntax, therefore, it should not be
177	/// used in a granular way.
178	pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179	self.nest_limit = limit;
180	self
181	}
182
183	/// Whether to support octal syntax or not.
184	///
185	/// Octal syntax is a little-known way of uttering Unicode codepoints in
186	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
187	/// `\141` are all equivalent regular expressions, where the last example
188	/// shows octal syntax.
189	///
190	/// While supporting octal syntax isn't in and of itself a problem, it does
191	/// make good error messages harder. That is, in PCRE based regex engines,
192	/// syntax like `\0` invokes a backreference, which is explicitly
193	/// unsupported in Rust's regex engine. However, many users expect it to
194	/// be supported. Therefore, when octal support is disabled, the error
195	/// message will explicitly mention that backreferences aren't supported.
196	///
197	/// Octal syntax is disabled by default.
198	pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199	self.octal = yes;
200	self
201	}
202
203	/// Enable verbose mode in the regular expression.
204	///
205	/// When enabled, verbose mode permits insignificant whitespace in many
206	/// places in the regular expression, as well as comments. Comments are
207	/// started using `#` and continue until the end of the line.
208	///
209	/// By default, this is disabled. It may be selectively enabled in the
210	/// regular expression by using the `x` flag regardless of this setting.
211	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212	self.ignore_whitespace = yes;
213	self
214	}
215	}
216
217	/// A regular expression parser.
218	///
219	/// This parses a string representation of a regular expression into an
220	/// abstract syntax tree. The size of the tree is proportional to the length
221	/// of the regular expression pattern.
222	///
223	/// A `Parser` can be configured in more detail via a
224	/// [`ParserBuilder`](struct.ParserBuilder.html).
225	#[derive(Clone, Debug)]
226	pub struct Parser {
227	/// The current position of the parser.
228	pos: Cell<Position>,
229	/// The current capture index.
230	capture_index: Cell<u32>,
231	/// The maximum number of open parens/brackets allowed. If the parser
232	/// exceeds this number, then an error is returned.
233	nest_limit: u32,
234	/// Whether to support octal syntax or not. When `false`, the parser will
235	/// return an error helpfully pointing out that backreferences are not
236	/// supported.
237	octal: bool,
238	/// The initial setting for `ignore_whitespace` as provided by
239	/// `ParserBuilder`. It is used when resetting the parser's state.
240	initial_ignore_whitespace: bool,
241	/// Whether whitespace should be ignored. When enabled, comments are
242	/// also permitted.
243	ignore_whitespace: Cell<bool>,
244	/// A list of comments, in order of appearance.
245	comments: RefCell<Vec<ast::Comment>>,
246	/// A stack of grouped sub-expressions, including alternations.
247	stack_group: RefCell<Vec<GroupState>>,
248	/// A stack of nested character classes. This is only non-empty when
249	/// parsing a class.
250	stack_class: RefCell<Vec<ClassState>>,
251	/// A sorted sequence of capture names. This is used to detect duplicate
252	/// capture names and report an error if one is detected.
253	capture_names: RefCell<Vec<ast::CaptureName>>,
254	/// A scratch buffer used in various places. Mostly this is used to
255	/// accumulate relevant characters from parts of a pattern.
256	scratch: RefCell<String>,
257	}
258
259	/// ParserI is the internal parser implementation.
260	///
261	/// We use this separate type so that we can carry the provided pattern string
262	/// along with us. In particular, a `Parser` internal state is not tied to any
263	/// one pattern, but `ParserI` is.
264	///
265	/// This type also lets us use `ParserI<&Parser>` in production code while
266	/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267	/// work against the internal interface of the parser.
268	#[derive(Clone, Debug)]
269	struct ParserI<'s, P> {
270	/// The parser state/configuration.
271	parser: P,
272	/// The full regular expression provided by the user.
273	pattern: &'s str,
274	}
275
276	/// GroupState represents a single stack frame while parsing nested groups
277	/// and alternations. Each frame records the state up to an opening parenthesis
278	/// or a alternating bracket `\|`.
279	#[derive(Clone, Debug)]
280	enum GroupState {
281	/// This state is pushed whenever an opening group is found.
282	Group {
283	/// The concatenation immediately preceding the opening group.
284	concat: ast::Concat,
285	/// The group that has been opened. Its sub-AST is always empty.
286	group: ast::Group,
287	/// Whether this group has the `x` flag enabled or not.
288	ignore_whitespace: bool,
289	},
290	/// This state is pushed whenever a new alternation branch is found. If
291	/// an alternation branch is found and this state is at the top of the
292	/// stack, then this state should be modified to include the new
293	/// alternation.
294	Alternation(ast::Alternation),
295	}
296
297	/// ClassState represents a single stack frame while parsing character classes.
298	/// Each frame records the state up to an intersection, difference, symmetric
299	/// difference or nested class.
300	///
301	/// Note that a parser's character class stack is only non-empty when parsing
302	/// a character class. In all other cases, it is empty.
303	#[derive(Clone, Debug)]
304	enum ClassState {
305	/// This state is pushed whenever an opening bracket is found.
306	Open {
307	/// The union of class items immediately preceding this class.
308	union: ast::ClassSetUnion,
309	/// The class that has been opened. Typically this just corresponds
310	/// to the `[`, but it can also include `[^` since `^` indicates
311	/// negation of the class.
312	set: ast::ClassBracketed,
313	},
314	/// This state is pushed when a operator is seen. When popped, the stored
315	/// set becomes the left hand side of the operator.
316	Op {
317	/// The type of the operation, i.e., &&, -- or ~~.
318	kind: ast::ClassSetBinaryOpKind,
319	/// The left-hand side of the operator.
320	lhs: ast::ClassSet,
321	},
322	}
323
324	impl Parser {
325	/// Create a new parser with a default configuration.
326	///
327	/// The parser can be run with either the `parse` or `parse_with_comments`
328	/// methods. The parse methods return an abstract syntax tree.
329	///
330	/// To set configuration options on the parser, use
331	/// [`ParserBuilder`](struct.ParserBuilder.html).
332	pub fn new() -> Parser {
333	ParserBuilder::new().build()
334	}
335
336	/// Parse the regular expression into an abstract syntax tree.
337	pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338	ParserI::new(self, pattern).parse()
339	}
340
341	/// Parse the regular expression and return an abstract syntax tree with
342	/// all of the comments found in the pattern.
343	pub fn parse_with_comments(
344	&mut self,
345	pattern: &str,
346	) -> Result<ast::WithComments> {
347	ParserI::new(self, pattern).parse_with_comments()
348	}
349
350	/// Reset the internal state of a parser.
351	///
352	/// This is called at the beginning of every parse. This prevents the
353	/// parser from running with inconsistent state (say, if a previous
354	/// invocation returned an error and the parser is reused).
355	fn reset(&self) {
356	// These settings should be in line with the construction
357	// in `ParserBuilder::build`.
358	self.pos.set(Position { offset: `0`, line: `1`, column: `1` });
359	self.ignore_whitespace.set(self.initial_ignore_whitespace);
360	self.comments.borrow_mut().clear();
361	self.stack_group.borrow_mut().clear();
362	self.stack_class.borrow_mut().clear();
363	}
364	}
365
366	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367	/// Build an internal parser from a parser configuration and a pattern.
368	fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369	ParserI { parser, pattern }
370	}
371
372	/// Return a reference to the parser state.
373	fn parser(&self) -> &Parser {
374	self.parser.borrow()
375	}
376
377	/// Return a reference to the pattern being parsed.
378	fn pattern(&self) -> &str {
379	self.pattern.borrow()
380	}
381
382	/// Create a new error with the given span and error type.
383	fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384	ast::Error { kind, pattern: self.pattern().to_string(), span }
385	}
386
387	/// Return the current offset of the parser.
388	///
389	/// The offset starts at `0` from the beginning of the regular expression
390	/// pattern string.
391	fn offset(&self) -> usize {
392	self.parser().pos.get().offset
393	}
394
395	/// Return the current line number of the parser.
396	///
397	/// The line number starts at `1`.
398	fn line(&self) -> usize {
399	self.parser().pos.get().line
400	}
401
402	/// Return the current column of the parser.
403	///
404	/// The column number starts at `1` and is reset whenever a `\n` is seen.
405	fn column(&self) -> usize {
406	self.parser().pos.get().column
407	}
408
409	/// Return the next capturing index. Each subsequent call increments the
410	/// internal index.
411	///
412	/// The span given should correspond to the location of the opening
413	/// parenthesis.
414	///
415	/// If the capture limit is exceeded, then an error is returned.
416	fn next_capture_index(&self, span: Span) -> Result<u32> {
417	let current = self.parser().capture_index.get();
418	let i = current.checked_add(`1`).ok_or_else(\|\| {
419	self.error(span, ast::ErrorKind::CaptureLimitExceeded)
420	})?;
421	self.parser().capture_index.set(i);
422	Ok(i)
423	}
424
425	/// Adds the given capture name to this parser. If this capture name has
426	/// already been used, then an error is returned.
427	fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
428	let mut names = self.parser().capture_names.borrow_mut();
429	match names
430	.binary_search_by_key(&cap.name.as_str(), \|c\| c.name.as_str())
431	{
432	Err(i) => {
433	names.insert(i, cap.clone());
434	Ok(())
435	}
436	Ok(i) => Err(self.error(
437	cap.span,
438	ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
439	)),
440	}
441	}
442
443	/// Return whether the parser should ignore whitespace or not.
444	fn ignore_whitespace(&self) -> bool {
445	self.parser().ignore_whitespace.get()
446	}
447
448	/// Return the character at the current position of the parser.
449	///
450	/// This panics if the current position does not point to a valid char.
451	fn char(&self) -> char {
452	self.char_at(self.offset())
453	}
454
455	/// Return the character at the given position.
456	///
457	/// This panics if the given position does not point to a valid char.
458	fn char_at(&self, i: usize) -> char {
459	self.pattern()[i..]
460	.chars()
461	.next()
462	.unwrap_or_else(\|\| panic!("expected char at offset {}", i))
463	}
464
465	/// Bump the parser to the next Unicode scalar value.
466	///
467	/// If the end of the input has been reached, then `false` is returned.
468	fn bump(&self) -> bool {
469	if self.is_eof() {
470	return `false`;
471	}
472	let Position { mut offset, mut line, mut column } = self.pos();
473	if self.char() == '`\n`' {
474	line = line.checked_add(`1`).unwrap();
475	column = `1`;
476	} else {
477	column = column.checked_add(`1`).unwrap();
478	}
479	offset += self.char().len_utf8();
480	self.parser().pos.set(Position { offset, line, column });
481	self.pattern()[self.offset()..].chars().next().is_some()
482	}
483
484	/// If the substring starting at the current position of the parser has
485	/// the given prefix, then bump the parser to the character immediately
486	/// following the prefix and return true. Otherwise, don't bump the parser
487	/// and return false.
488	fn bump_if(&self, prefix: &str) -> bool {
489	if self.pattern()[self.offset()..].starts_with(prefix) {
490	for _ in `0`..prefix.chars().count() {
491	self.bump();
492	}
493	`true`
494	} else {
495	`false`
496	}
497	}
498
499	/// Returns true if and only if the parser is positioned at a look-around
500	/// prefix. The conditions under which this returns true must always
501	/// correspond to a regular expression that would otherwise be consider
502	/// invalid.
503	///
504	/// This should only be called immediately after parsing the opening of
505	/// a group or a set of flags.
506	fn is_lookaround_prefix(&self) -> bool {
507	self.bump_if("?=")
508	\|\| self.bump_if("?!")
509	\|\| self.bump_if("?<=")
510	\|\| self.bump_if("?<!")
511	}
512
513	/// Bump the parser, and if the `x` flag is enabled, bump through any
514	/// subsequent spaces. Return true if and only if the parser is not at
515	/// EOF.
516	fn bump_and_bump_space(&self) -> bool {
517	if !self.bump() {
518	return `false`;
519	}
520	self.bump_space();
521	!self.is_eof()
522	}
523
524	/// If the `x` flag is enabled (i.e., whitespace insensitivity with
525	/// comments), then this will advance the parser through all whitespace
526	/// and comments to the next non-whitespace non-comment byte.
527	///
528	/// If the `x` flag is disabled, then this is a no-op.
529	///
530	/// This should be used selectively throughout the parser where
531	/// arbitrary whitespace is permitted when the `x` flag is enabled. For
532	/// example, `{ 5 , 6}` is equivalent to `{5,6}`.
533	fn bump_space(&self) {
534	if !self.ignore_whitespace() {
535	return;
536	}
537	while !self.is_eof() {
538	if self.char().is_whitespace() {
539	self.bump();
540	} else if self.char() == '#' {
541	let start = self.pos();
542	let mut comment_text = String::new();
543	self.bump();
544	while !self.is_eof() {
545	let c = self.char();
546	self.bump();
547	if c == '`\n`' {
548	break;
549	}
550	comment_text.push(c);
551	}
552	let comment = ast::Comment {
553	span: Span::new(start, self.pos()),
554	comment: comment_text,
555	};
556	self.parser().comments.borrow_mut().push(comment);
557	} else {
558	break;
559	}
560	}
561	}
562
563	/// Peek at the next character in the input without advancing the parser.
564	///
565	/// If the input has been exhausted, then this returns `None`.
566	fn peek(&self) -> Option<char> {
567	if self.is_eof() {
568	return None;
569	}
570	self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
571	}
572
573	/// Like peek, but will ignore spaces when the parser is in whitespace
574	/// insensitive mode.
575	fn peek_space(&self) -> Option<char> {
576	if !self.ignore_whitespace() {
577	return self.peek();
578	}
579	if self.is_eof() {
580	return None;
581	}
582	let mut start = self.offset() + self.char().len_utf8();
583	let mut in_comment = `false`;
584	for (i, c) in self.pattern()[start..].char_indices() {
585	if c.is_whitespace() {
586	continue;
587	} else if !in_comment && c == '#' {
588	in_comment = `true`;
589	} else if in_comment && c == '`\n`' {
590	in_comment = `false`;
591	} else {
592	start += i;
593	break;
594	}
595	}
596	self.pattern()[start..].chars().next()
597	}
598
599	/// Returns true if the next call to `bump` would return false.
600	fn is_eof(&self) -> bool {
601	self.offset() == self.pattern().len()
602	}
603
604	/// Return the current position of the parser, which includes the offset,
605	/// line and column.
606	fn pos(&self) -> Position {
607	self.parser().pos.get()
608	}
609
610	/// Create a span at the current position of the parser. Both the start
611	/// and end of the span are set.
612	fn span(&self) -> Span {
613	Span::splat(self.pos())
614	}
615
616	/// Create a span that covers the current character.
617	fn span_char(&self) -> Span {
618	let mut next = Position {
619	offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
620	line: self.line(),
621	column: self.column().checked_add(`1`).unwrap(),
622	};
623	if self.char() == '`\n`' {
624	next.line += `1`;
625	next.column = `1`;
626	}
627	Span::new(self.pos(), next)
628	}
629
630	/// Parse and push a single alternation on to the parser's internal stack.
631	/// If the top of the stack already has an alternation, then add to that
632	/// instead of pushing a new one.
633	///
634	/// The concatenation given corresponds to a single alternation branch.
635	/// The concatenation returned starts the next branch and is empty.
636	///
637	/// This assumes the parser is currently positioned at `\|` and will advance
638	/// the parser to the character following `\|`.
639	#[inline(never)]
640	fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
641	assert_eq!(self.char(), '\|');
642	concat.span.end = self.pos();
643	self.push_or_add_alternation(concat);
644	self.bump();
645	Ok(ast::Concat { span: self.span(), asts: vec![] })
646	}
647
648	/// Pushes or adds the given branch of an alternation to the parser's
649	/// internal stack of state.
650	fn push_or_add_alternation(&self, concat: ast::Concat) {
651	use self::GroupState::*;
652
653	let mut stack = self.parser().stack_group.borrow_mut();
654	if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
655	alts.asts.push(concat.into_ast());
656	return;
657	}
658	stack.push(Alternation(ast::Alternation {
659	span: Span::new(concat.span.start, self.pos()),
660	asts: vec![concat.into_ast()],
661	}));
662	}
663
664	/// Parse and push a group AST (and its parent concatenation) on to the
665	/// parser's internal stack. Return a fresh concatenation corresponding
666	/// to the group's sub-AST.
667	///
668	/// If a set of flags was found (with no group), then the concatenation
669	/// is returned with that set of flags added.
670	///
671	/// This assumes that the parser is currently positioned on the opening
672	/// parenthesis. It advances the parser to the character at the start
673	/// of the sub-expression (or adjoining expression).
674	///
675	/// If there was a problem parsing the start of the group, then an error
676	/// is returned.
677	#[inline(never)]
678	fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
679	assert_eq!(self.char(), '(');
680	match self.parse_group()? {
681	Either::Left(set) => {
682	let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
683	if let Some(v) = ignore {
684	self.parser().ignore_whitespace.set(v);
685	}
686
687	concat.asts.push(Ast::Flags(set));
688	Ok(concat)
689	}
690	Either::Right(group) => {
691	let old_ignore_whitespace = self.ignore_whitespace();
692	let new_ignore_whitespace = group
693	.flags()
694	.and_then(\|f\| f.flag_state(ast::Flag::IgnoreWhitespace))
695	.unwrap_or(old_ignore_whitespace);
696	self.parser().stack_group.borrow_mut().push(
697	GroupState::Group {
698	concat,
699	group,
700	ignore_whitespace: old_ignore_whitespace,
701	},
702	);
703	self.parser().ignore_whitespace.set(new_ignore_whitespace);
704	Ok(ast::Concat { span: self.span(), asts: vec![] })
705	}
706	}
707	}
708
709	/// Pop a group AST from the parser's internal stack and set the group's
710	/// AST to the given concatenation. Return the concatenation containing
711	/// the group.
712	///
713	/// This assumes that the parser is currently positioned on the closing
714	/// parenthesis and advances the parser to the character following the `)`.
715	///
716	/// If no such group could be popped, then an unopened group error is
717	/// returned.
718	#[inline(never)]
719	fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
720	use self::GroupState::*;
721
722	assert_eq!(self.char(), ')');
723	let mut stack = self.parser().stack_group.borrow_mut();
724	let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
725	.pop()
726	{
727	Some(Group { concat, group, ignore_whitespace }) => {
728	(concat, group, ignore_whitespace, None)
729	}
730	Some(Alternation(alt)) => match stack.pop() {
731	Some(Group { concat, group, ignore_whitespace }) => {
732	(concat, group, ignore_whitespace, Some(alt))
733	}
734	None \| Some(Alternation(_)) => {
735	return Err(self.error(
736	self.span_char(),
737	ast::ErrorKind::GroupUnopened,
738	));
739	}
740	},
741	None => {
742	return Err(self
743	.error(self.span_char(), ast::ErrorKind::GroupUnopened));
744	}
745	};
746	self.parser().ignore_whitespace.set(ignore_whitespace);
747	group_concat.span.end = self.pos();
748	self.bump();
749	group.span.end = self.pos();
750	match alt {
751	Some(mut alt) => {
752	alt.span.end = group_concat.span.end;
753	alt.asts.push(group_concat.into_ast());
754	group.ast = Box::new(alt.into_ast());
755	}
756	None => {
757	group.ast = Box::new(group_concat.into_ast());
758	}
759	}
760	prior_concat.asts.push(Ast::Group(group));
761	Ok(prior_concat)
762	}
763
764	/// Pop the last state from the parser's internal stack, if it exists, and
765	/// add the given concatenation to it. There either must be no state or a
766	/// single alternation item on the stack. Any other scenario produces an
767	/// error.
768	///
769	/// This assumes that the parser has advanced to the end.
770	#[inline(never)]
771	fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
772	concat.span.end = self.pos();
773	let mut stack = self.parser().stack_group.borrow_mut();
774	let ast = match stack.pop() {
775	None => Ok(concat.into_ast()),
776	Some(GroupState::Alternation(mut alt)) => {
777	alt.span.end = self.pos();
778	alt.asts.push(concat.into_ast());
779	Ok(Ast::Alternation(alt))
780	}
781	Some(GroupState::Group { group, .. }) => {
782	return Err(
783	self.error(group.span, ast::ErrorKind::GroupUnclosed)
784	);
785	}
786	};
787	// If we try to pop again, there should be nothing.
788	match stack.pop() {
789	None => ast,
790	Some(GroupState::Alternation(_)) => {
791	// This unreachable is unfortunate. This case can't happen
792	// because the only way we can be here is if there were two
793	// `GroupState::Alternation`s adjacent in the parser's stack,
794	// which we guarantee to never happen because we never push a
795	// `GroupState::Alternation` if one is already at the top of
796	// the stack.
797	unreachable!()
798	}
799	Some(GroupState::Group { group, .. }) => {
800	Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
801	}
802	}
803	}
804
805	/// Parse the opening of a character class and push the current class
806	/// parsing context onto the parser's stack. This assumes that the parser
807	/// is positioned at an opening `[`. The given union should correspond to
808	/// the union of set items built up before seeing the `[`.
809	///
810	/// If there was a problem parsing the opening of the class, then an error
811	/// is returned. Otherwise, a new union of set items for the class is
812	/// returned (which may be populated with either a `]` or a `-`).
813	#[inline(never)]
814	fn push_class_open(
815	&self,
816	parent_union: ast::ClassSetUnion,
817	) -> Result<ast::ClassSetUnion> {
818	assert_eq!(self.char(), '[');
819
820	let (nested_set, nested_union) = self.parse_set_class_open()?;
821	self.parser()
822	.stack_class
823	.borrow_mut()
824	.push(ClassState::Open { union: parent_union, set: nested_set });
825	Ok(nested_union)
826	}
827
828	/// Parse the end of a character class set and pop the character class
829	/// parser stack. The union given corresponds to the last union built
830	/// before seeing the closing `]`. The union returned corresponds to the
831	/// parent character class set with the nested class added to it.
832	///
833	/// This assumes that the parser is positioned at a `]` and will advance
834	/// the parser to the byte immediately following the `]`.
835	///
836	/// If the stack is empty after popping, then this returns the final
837	/// "top-level" character class AST (where a "top-level" character class
838	/// is one that is not nested inside any other character class).
839	///
840	/// If there is no corresponding opening bracket on the parser's stack,
841	/// then an error is returned.
842	#[inline(never)]
843	fn pop_class(
844	&self,
845	nested_union: ast::ClassSetUnion,
846	) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
847	assert_eq!(self.char(), ']');
848
849	let item = ast::ClassSet::Item(nested_union.into_item());
850	let prevset = self.pop_class_op(item);
851	let mut stack = self.parser().stack_class.borrow_mut();
852	match stack.pop() {
853	None => {
854	// We can never observe an empty stack:
855	//
856	// 1) We are guaranteed to start with a non-empty stack since
857	// the character class parser is only initiated when it sees
858	// a `[`.
859	// 2) If we ever observe an empty stack while popping after
860	// seeing a `]`, then we signal the character class parser
861	// to terminate.
862	panic!("unexpected empty character class stack")
863	}
864	Some(ClassState::Op { .. }) => {
865	// This panic is unfortunate, but this case is impossible
866	// since we already popped the Op state if one exists above.
867	// Namely, every push to the class parser stack is guarded by
868	// whether an existing Op is already on the top of the stack.
869	// If it is, the existing Op is modified. That is, the stack
870	// can never have consecutive Op states.
871	panic!("unexpected ClassState::Op")
872	}
873	Some(ClassState::Open { mut union, mut set }) => {
874	self.bump();
875	set.span.end = self.pos();
876	set.kind = prevset;
877	if stack.is_empty() {
878	Ok(Either::Right(ast::Class::Bracketed(set)))
879	} else {
880	union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
881	Ok(Either::Left(union))
882	}
883	}
884	}
885	}
886
887	/// Return an "unclosed class" error whose span points to the most
888	/// recently opened class.
889	///
890	/// This should only be called while parsing a character class.
891	#[inline(never)]
892	fn unclosed_class_error(&self) -> ast::Error {
893	for state in self.parser().stack_class.borrow().iter().rev() {
894	if let ClassState::Open { ref set, .. } = *state {
895	return self.error(set.span, ast::ErrorKind::ClassUnclosed);
896	}
897	}
898	// We are guaranteed to have a non-empty stack with at least
899	// one open bracket, so we should never get here.
900	panic!("no open character class found")
901	}
902
903	/// Push the current set of class items on to the class parser's stack as
904	/// the left hand side of the given operator.
905	///
906	/// A fresh set union is returned, which should be used to build the right
907	/// hand side of this operator.
908	#[inline(never)]
909	fn push_class_op(
910	&self,
911	next_kind: ast::ClassSetBinaryOpKind,
912	next_union: ast::ClassSetUnion,
913	) -> ast::ClassSetUnion {
914	let item = ast::ClassSet::Item(next_union.into_item());
915	let new_lhs = self.pop_class_op(item);
916	self.parser()
917	.stack_class
918	.borrow_mut()
919	.push(ClassState::Op { kind: next_kind, lhs: new_lhs });
920	ast::ClassSetUnion { span: self.span(), items: vec![] }
921	}
922
923	/// Pop a character class set from the character class parser stack. If the
924	/// top of the stack is just an item (not an operation), then return the
925	/// given set unchanged. If the top of the stack is an operation, then the
926	/// given set will be used as the rhs of the operation on the top of the
927	/// stack. In that case, the binary operation is returned as a set.
928	#[inline(never)]
929	fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
930	let mut stack = self.parser().stack_class.borrow_mut();
931	let (kind, lhs) = match stack.pop() {
932	Some(ClassState::Op { kind, lhs }) => (kind, lhs),
933	Some(state @ ClassState::Open { .. }) => {
934	stack.push(state);
935	return rhs;
936	}
937	None => unreachable!(),
938	};
939	let span = Span::new(lhs.span().start, rhs.span().end);
940	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
941	span,
942	kind,
943	lhs: Box::new(lhs),
944	rhs: Box::new(rhs),
945	})
946	}
947	}
948
949	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
950	/// Parse the regular expression into an abstract syntax tree.
951	fn parse(&self) -> Result<Ast> {
952	self.parse_with_comments().map(\|astc\| astc.ast)
953	}
954
955	/// Parse the regular expression and return an abstract syntax tree with
956	/// all of the comments found in the pattern.
957	fn parse_with_comments(&self) -> Result<ast::WithComments> {
958	assert_eq!(self.offset(), `0`, "parser can only be used once");
959	self.parser().reset();
960	let mut concat = ast::Concat { span: self.span(), asts: vec![] };
961	loop {
962	self.bump_space();
963	if self.is_eof() {
964	break;
965	}
966	match self.char() {
967	'(' => concat = self.push_group(concat)?,
968	')' => concat = self.pop_group(concat)?,
969	'\|' => concat = self.push_alternate(concat)?,
970	'[' => {
971	let class = self.parse_set_class()?;
972	concat.asts.push(Ast::Class(class));
973	}
974	'?' => {
975	concat = self.parse_uncounted_repetition(
976	concat,
977	ast::RepetitionKind::ZeroOrOne,
978	)?;
979	}
980	'*' => {
981	concat = self.parse_uncounted_repetition(
982	concat,
983	ast::RepetitionKind::ZeroOrMore,
984	)?;
985	}
986	'+' => {
987	concat = self.parse_uncounted_repetition(
988	concat,
989	ast::RepetitionKind::OneOrMore,
990	)?;
991	}
992	'{' => {
993	concat = self.parse_counted_repetition(concat)?;
994	}
995	_ => concat.asts.push(self.parse_primitive()?.into_ast()),
996	}
997	}
998	let ast = self.pop_group_end(concat)?;
999	NestLimiter::new(self).check(&ast)?;
1000	Ok(ast::WithComments {
1001	ast,
1002	comments: mem::replace(
1003	&mut *self.parser().comments.borrow_mut(),
1004	vec![],
1005	),
1006	})
1007	}
1008
1009	/// Parses an uncounted repetition operation. An uncounted repetition
1010	/// operator includes ?, and +, but does not include the {m,n} syntax.*
1011	/// The given `kind` should correspond to the operator observed by the
1012	/// caller.
1013	///
1014	/// This assumes that the parser is currently positioned at the repetition
1015	/// operator and advances the parser to the first character after the
1016	/// operator. (Note that the operator may include a single additional `?`,
1017	/// which makes the operator ungreedy.)
1018	///
1019	/// The caller should include the concatenation that is being built. The
1020	/// concatenation returned includes the repetition operator applied to the
1021	/// last expression in the given concatenation.
1022	#[inline(never)]
1023	fn parse_uncounted_repetition(
1024	&self,
1025	mut concat: ast::Concat,
1026	kind: ast::RepetitionKind,
1027	) -> Result<ast::Concat> {
1028	assert!(
1029	self.char() == '?' \|\| self.char() == '' \|\| self*.char() == '+'
1030	);
1031	let op_start = self.pos();
1032	let ast = match concat.asts.pop() {
1033	Some(ast) => ast,
1034	None => {
1035	return Err(
1036	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1037	)
1038	}
1039	};
1040	match ast {
1041	Ast::Empty(_) \| Ast::Flags(_) => {
1042	return Err(
1043	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1044	)
1045	}
1046	_ => {}
1047	}
1048	let mut greedy = `true`;
1049	if self.bump() && self.char() == '?' {
1050	greedy = `false`;
1051	self.bump();
1052	}
1053	concat.asts.push(Ast::Repetition(ast::Repetition {
1054	span: ast.span().with_end(self.pos()),
1055	op: ast::RepetitionOp {
1056	span: Span::new(op_start, self.pos()),
1057	kind,
1058	},
1059	greedy,
1060	ast: Box::new(ast),
1061	}));
1062	Ok(concat)
1063	}
1064
1065	/// Parses a counted repetition operation. A counted repetition operator
1066	/// corresponds to the {m,n} syntax, and does not include the ?, or +*
1067	/// operators.
1068	///
1069	/// This assumes that the parser is currently positioned at the opening `{`
1070	/// and advances the parser to the first character after the operator.
1071	/// (Note that the operator may include a single additional `?`, which
1072	/// makes the operator ungreedy.)
1073	///
1074	/// The caller should include the concatenation that is being built. The
1075	/// concatenation returned includes the repetition operator applied to the
1076	/// last expression in the given concatenation.
1077	#[inline(never)]
1078	fn parse_counted_repetition(
1079	&self,
1080	mut concat: ast::Concat,
1081	) -> Result<ast::Concat> {
1082	assert!(self.char() == '{');
1083	let start = self.pos();
1084	let ast = match concat.asts.pop() {
1085	Some(ast) => ast,
1086	None => {
1087	return Err(
1088	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1089	)
1090	}
1091	};
1092	match ast {
1093	Ast::Empty(_) \| Ast::Flags(_) => {
1094	return Err(
1095	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1096	)
1097	}
1098	_ => {}
1099	}
1100	if !self.bump_and_bump_space() {
1101	return Err(self.error(
1102	Span::new(start, self.pos()),
1103	ast::ErrorKind::RepetitionCountUnclosed,
1104	));
1105	}
1106	let count_start = specialize_err(
1107	self.parse_decimal(),
1108	ast::ErrorKind::DecimalEmpty,
1109	ast::ErrorKind::RepetitionCountDecimalEmpty,
1110	)?;
1111	let mut range = ast::RepetitionRange::Exactly(count_start);
1112	if self.is_eof() {
1113	return Err(self.error(
1114	Span::new(start, self.pos()),
1115	ast::ErrorKind::RepetitionCountUnclosed,
1116	));
1117	}
1118	if self.char() == ',' {
1119	if !self.bump_and_bump_space() {
1120	return Err(self.error(
1121	Span::new(start, self.pos()),
1122	ast::ErrorKind::RepetitionCountUnclosed,
1123	));
1124	}
1125	if self.char() != '}' {
1126	let count_end = specialize_err(
1127	self.parse_decimal(),
1128	ast::ErrorKind::DecimalEmpty,
1129	ast::ErrorKind::RepetitionCountDecimalEmpty,
1130	)?;
1131	range = ast::RepetitionRange::Bounded(count_start, count_end);
1132	} else {
1133	range = ast::RepetitionRange::AtLeast(count_start);
1134	}
1135	}
1136	if self.is_eof() \|\| self.char() != '}' {
1137	return Err(self.error(
1138	Span::new(start, self.pos()),
1139	ast::ErrorKind::RepetitionCountUnclosed,
1140	));
1141	}
1142
1143	let mut greedy = `true`;
1144	if self.bump_and_bump_space() && self.char() == '?' {
1145	greedy = `false`;
1146	self.bump();
1147	}
1148
1149	let op_span = Span::new(start, self.pos());
1150	if !range.is_valid() {
1151	return Err(
1152	self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1153	);
1154	}
1155	concat.asts.push(Ast::Repetition(ast::Repetition {
1156	span: ast.span().with_end(self.pos()),
1157	op: ast::RepetitionOp {
1158	span: op_span,
1159	kind: ast::RepetitionKind::Range(range),
1160	},
1161	greedy,
1162	ast: Box::new(ast),
1163	}));
1164	Ok(concat)
1165	}
1166
1167	/// Parse a group (which contains a sub-expression) or a set of flags.
1168	///
1169	/// If a group was found, then it is returned with an empty AST. If a set
1170	/// of flags is found, then that set is returned.
1171	///
1172	/// The parser should be positioned at the opening parenthesis.
1173	///
1174	/// This advances the parser to the character before the start of the
1175	/// sub-expression (in the case of a group) or to the closing parenthesis
1176	/// immediately following the set of flags.
1177	///
1178	/// # Errors
1179	///
1180	/// If flags are given and incorrectly specified, then a corresponding
1181	/// error is returned.
1182	///
1183	/// If a capture name is given and it is incorrectly specified, then a
1184	/// corresponding error is returned.
1185	#[inline(never)]
1186	fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1187	assert_eq!(self.char(), '(');
1188	let open_span = self.span_char();
1189	self.bump();
1190	self.bump_space();
1191	if self.is_lookaround_prefix() {
1192	return Err(self.error(
1193	Span::new(open_span.start, self.span().end),
1194	ast::ErrorKind::UnsupportedLookAround,
1195	));
1196	}
1197	let inner_span = self.span();
1198	if self.bump_if("?P<") {
1199	let capture_index = self.next_capture_index(open_span)?;
1200	let cap = self.parse_capture_name(capture_index)?;
1201	Ok(Either::Right(ast::Group {
1202	span: open_span,
1203	kind: ast::GroupKind::CaptureName(cap),
1204	ast: Box::new(Ast::Empty(self.span())),
1205	}))
1206	} else if self.bump_if("?") {
1207	if self.is_eof() {
1208	return Err(
1209	self.error(open_span, ast::ErrorKind::GroupUnclosed)
1210	);
1211	}
1212	let flags = self.parse_flags()?;
1213	let char_end = self.char();
1214	self.bump();
1215	if char_end == ')' {
1216	// We don't allow empty flags, e.g., `(?)`. We instead
1217	// interpret it as a repetition operator missing its argument.
1218	if flags.items.is_empty() {
1219	return Err(self.error(
1220	inner_span,
1221	ast::ErrorKind::RepetitionMissing,
1222	));
1223	}
1224	Ok(Either::Left(ast::SetFlags {
1225	span: Span { end: self.pos(), ..open_span },
1226	flags,
1227	}))
1228	} else {
1229	assert_eq!(char_end, ':');
1230	Ok(Either::Right(ast::Group {
1231	span: open_span,
1232	kind: ast::GroupKind::NonCapturing(flags),
1233	ast: Box::new(Ast::Empty(self.span())),
1234	}))
1235	}
1236	} else {
1237	let capture_index = self.next_capture_index(open_span)?;
1238	Ok(Either::Right(ast::Group {
1239	span: open_span,
1240	kind: ast::GroupKind::CaptureIndex(capture_index),
1241	ast: Box::new(Ast::Empty(self.span())),
1242	}))
1243	}
1244	}
1245
1246	/// Parses a capture group name. Assumes that the parser is positioned at
1247	/// the first character in the name following the opening `<` (and may
1248	/// possibly be EOF). This advances the parser to the first character
1249	/// following the closing `>`.
1250	///
1251	/// The caller must provide the capture index of the group for this name.
1252	#[inline(never)]
1253	fn parse_capture_name(
1254	&self,
1255	capture_index: u32,
1256	) -> Result<ast::CaptureName> {
1257	if self.is_eof() {
1258	return Err(self
1259	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1260	}
1261	let start = self.pos();
1262	loop {
1263	if self.char() == '>' {
1264	break;
1265	}
1266	if !is_capture_char(self.char(), self.pos() == start) {
1267	return Err(self.error(
1268	self.span_char(),
1269	ast::ErrorKind::GroupNameInvalid,
1270	));
1271	}
1272	if !self.bump() {
1273	break;
1274	}
1275	}
1276	let end = self.pos();
1277	if self.is_eof() {
1278	return Err(self
1279	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1280	}
1281	assert_eq!(self.char(), '>');
1282	self.bump();
1283	let name = &self.pattern()[start.offset..end.offset];
1284	if name.is_empty() {
1285	return Err(self.error(
1286	Span::new(start, start),
1287	ast::ErrorKind::GroupNameEmpty,
1288	));
1289	}
1290	let capname = ast::CaptureName {
1291	span: Span::new(start, end),
1292	name: name.to_string(),
1293	index: capture_index,
1294	};
1295	self.add_capture_name(&capname)?;
1296	Ok(capname)
1297	}
1298
1299	/// Parse a sequence of flags starting at the current character.
1300	///
1301	/// This advances the parser to the character immediately following the
1302	/// flags, which is guaranteed to be either `:` or `)`.
1303	///
1304	/// # Errors
1305	///
1306	/// If any flags are duplicated, then an error is returned.
1307	///
1308	/// If the negation operator is used more than once, then an error is
1309	/// returned.
1310	///
1311	/// If no flags could be found or if the negation operation is not followed
1312	/// by any flags, then an error is returned.
1313	#[inline(never)]
1314	fn parse_flags(&self) -> Result<ast::Flags> {
1315	let mut flags = ast::Flags { span: self.span(), items: vec![] };
1316	let mut last_was_negation = None;
1317	while self.char() != ':' && self.char() != ')' {
1318	if self.char() == '-' {
1319	last_was_negation = Some(self.span_char());
1320	let item = ast::FlagsItem {
1321	span: self.span_char(),
1322	kind: ast::FlagsItemKind::Negation,
1323	};
1324	if let Some(i) = flags.add_item(item) {
1325	return Err(self.error(
1326	self.span_char(),
1327	ast::ErrorKind::FlagRepeatedNegation {
1328	original: flags.items[i].span,
1329	},
1330	));
1331	}
1332	} else {
1333	last_was_negation = None;
1334	let item = ast::FlagsItem {
1335	span: self.span_char(),
1336	kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1337	};
1338	if let Some(i) = flags.add_item(item) {
1339	return Err(self.error(
1340	self.span_char(),
1341	ast::ErrorKind::FlagDuplicate {
1342	original: flags.items[i].span,
1343	},
1344	));
1345	}
1346	}
1347	if !self.bump() {
1348	return Err(
1349	self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1350	);
1351	}
1352	}
1353	if let Some(span) = last_was_negation {
1354	return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1355	}
1356	flags.span.end = self.pos();
1357	Ok(flags)
1358	}
1359
1360	/// Parse the current character as a flag. Do not advance the parser.
1361	///
1362	/// # Errors
1363	///
1364	/// If the flag is not recognized, then an error is returned.
1365	#[inline(never)]
1366	fn parse_flag(&self) -> Result<ast::Flag> {
1367	match self.char() {
1368	'i' => Ok(ast::Flag::CaseInsensitive),
1369	'm' => Ok(ast::Flag::MultiLine),
1370	's' => Ok(ast::Flag::DotMatchesNewLine),
1371	'U' => Ok(ast::Flag::SwapGreed),
1372	'u' => Ok(ast::Flag::Unicode),
1373	'x' => Ok(ast::Flag::IgnoreWhitespace),
1374	_ => {
1375	Err(self
1376	.error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1377	}
1378	}
1379	}
1380
1381	/// Parse a primitive AST. e.g., A literal, non-set character class or
1382	/// assertion.
1383	///
1384	/// This assumes that the parser expects a primitive at the current
1385	/// location. i.e., All other non-primitive cases have been handled.
1386	/// For example, if the parser's position is at `\|`, then `\|` will be
1387	/// treated as a literal (e.g., inside a character class).
1388	///
1389	/// This advances the parser to the first character immediately following
1390	/// the primitive.
1391	fn parse_primitive(&self) -> Result<Primitive> {
1392	match self.char() {
1393	'`\\`' => self.parse_escape(),
1394	'.' => {
1395	let ast = Primitive::Dot(self.span_char());
1396	self.bump();
1397	Ok(ast)
1398	}
1399	'^' => {
1400	let ast = Primitive::Assertion(ast::Assertion {
1401	span: self.span_char(),
1402	kind: ast::AssertionKind::StartLine,
1403	});
1404	self.bump();
1405	Ok(ast)
1406	}
1407	'$' => {
1408	let ast = Primitive::Assertion(ast::Assertion {
1409	span: self.span_char(),
1410	kind: ast::AssertionKind::EndLine,
1411	});
1412	self.bump();
1413	Ok(ast)
1414	}
1415	c => {
1416	let ast = Primitive::Literal(ast::Literal {
1417	span: self.span_char(),
1418	kind: ast::LiteralKind::Verbatim,
1419	c,
1420	});
1421	self.bump();
1422	Ok(ast)
1423	}
1424	}
1425	}
1426
1427	/// Parse an escape sequence as a primitive AST.
1428	///
1429	/// This assumes the parser is positioned at the start of the escape
1430	/// sequence, i.e., `\`. It advances the parser to the first position
1431	/// immediately following the escape sequence.
1432	#[inline(never)]
1433	fn parse_escape(&self) -> Result<Primitive> {
1434	assert_eq!(self.char(), '`\\`');
1435	let start = self.pos();
1436	if !self.bump() {
1437	return Err(self.error(
1438	Span::new(start, self.pos()),
1439	ast::ErrorKind::EscapeUnexpectedEof,
1440	));
1441	}
1442	let c = self.char();
1443	// Put some of the more complicated routines into helpers.
1444	match c {
1445	'0'..='7' => {
1446	if !self.parser().octal {
1447	return Err(self.error(
1448	Span::new(start, self.span_char().end),
1449	ast::ErrorKind::UnsupportedBackreference,
1450	));
1451	}
1452	let mut lit = self.parse_octal();
1453	lit.span.start = start;
1454	return Ok(Primitive::Literal(lit));
1455	}
1456	'8'..='9' if !self.parser().octal => {
1457	return Err(self.error(
1458	Span::new(start, self.span_char().end),
1459	ast::ErrorKind::UnsupportedBackreference,
1460	));
1461	}
1462	'x' \| 'u' \| 'U' => {
1463	let mut lit = self.parse_hex()?;
1464	lit.span.start = start;
1465	return Ok(Primitive::Literal(lit));
1466	}
1467	'p' \| 'P' => {
1468	let mut cls = self.parse_unicode_class()?;
1469	cls.span.start = start;
1470	return Ok(Primitive::Unicode(cls));
1471	}
1472	'd' \| 's' \| 'w' \| 'D' \| 'S' \| 'W' => {
1473	let mut cls = self.parse_perl_class();
1474	cls.span.start = start;
1475	return Ok(Primitive::Perl(cls));
1476	}
1477	_ => {}
1478	}
1479
1480	// Handle all of the one letter sequences inline.
1481	self.bump();
1482	let span = Span::new(start, self.pos());
1483	if is_meta_character(c) {
1484	return Ok(Primitive::Literal(ast::Literal {
1485	span,
1486	kind: ast::LiteralKind::Punctuation,
1487	c,
1488	}));
1489	}
1490	let special = \|kind, c\| {
1491	Ok(Primitive::Literal(ast::Literal {
1492	span,
1493	kind: ast::LiteralKind::Special(kind),
1494	c,
1495	}))
1496	};
1497	match c {
1498	'a' => special(ast::SpecialLiteralKind::Bell, '`\x07`'),
1499	'f' => special(ast::SpecialLiteralKind::FormFeed, '`\x0C`'),
1500	't' => special(ast::SpecialLiteralKind::Tab, '`\t`'),
1501	'n' => special(ast::SpecialLiteralKind::LineFeed, '`\n`'),
1502	'r' => special(ast::SpecialLiteralKind::CarriageReturn, '`\r`'),
1503	'v' => special(ast::SpecialLiteralKind::VerticalTab, '`\x0B`'),
1504	' ' if self.ignore_whitespace() => {
1505	special(ast::SpecialLiteralKind::Space, ' ')
1506	}
1507	'A' => Ok(Primitive::Assertion(ast::Assertion {
1508	span,
1509	kind: ast::AssertionKind::StartText,
1510	})),
1511	'z' => Ok(Primitive::Assertion(ast::Assertion {
1512	span,
1513	kind: ast::AssertionKind::EndText,
1514	})),
1515	'b' => Ok(Primitive::Assertion(ast::Assertion {
1516	span,
1517	kind: ast::AssertionKind::WordBoundary,
1518	})),
1519	'B' => Ok(Primitive::Assertion(ast::Assertion {
1520	span,
1521	kind: ast::AssertionKind::NotWordBoundary,
1522	})),
1523	_ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1524	}
1525	}
1526
1527	/// Parse an octal representation of a Unicode codepoint up to 3 digits
1528	/// long. This expects the parser to be positioned at the first octal
1529	/// digit and advances the parser to the first character immediately
1530	/// following the octal number. This also assumes that parsing octal
1531	/// escapes is enabled.
1532	///
1533	/// Assuming the preconditions are met, this routine can never fail.
1534	#[inline(never)]
1535	fn parse_octal(&self) -> ast::Literal {
1536	use std::char;
1537	use std::u32;
1538
1539	assert!(self.parser().octal);
1540	assert!('0' <= self.char() && self.char() <= '7');
1541	let start = self.pos();
1542	// Parse up to two more digits.
1543	while self.bump()
1544	&& '0' <= self.char()
1545	&& self.char() <= '7'
1546	&& self.pos().offset - start.offset <= `2`
1547	{}
1548	let end = self.pos();
1549	let octal = &self.pattern()[start.offset..end.offset];
1550	// Parsing the octal should never fail since the above guarantees a
1551	// valid number.
1552	let codepoint =
1553	u32::from_str_radix(octal, `8`).expect("valid octal number");
1554	// The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1555	// invalid Unicode scalar values.
1556	let c = char::from_u32(codepoint).expect("Unicode scalar value");
1557	ast::Literal {
1558	span: Span::new(start, end),
1559	kind: ast::LiteralKind::Octal,
1560	c,
1561	}
1562	}
1563
1564	/// Parse a hex representation of a Unicode codepoint. This handles both
1565	/// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1566	/// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1567	/// the first character immediately following the hexadecimal literal.
1568	#[inline(never)]
1569	fn parse_hex(&self) -> Result<ast::Literal> {
1570	assert!(
1571	self.char() == 'x' \|\| self.char() == 'u' \|\| self.char() == 'U'
1572	);
1573
1574	let hex_kind = match self.char() {
1575	'x' => ast::HexLiteralKind::X,
1576	'u' => ast::HexLiteralKind::UnicodeShort,
1577	_ => ast::HexLiteralKind::UnicodeLong,
1578	};
1579	if !self.bump_and_bump_space() {
1580	return Err(
1581	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1582	);
1583	}
1584	if self.char() == '{' {
1585	self.parse_hex_brace(hex_kind)
1586	} else {
1587	self.parse_hex_digits(hex_kind)
1588	}
1589	}
1590
1591	/// Parse an N-digit hex representation of a Unicode codepoint. This
1592	/// expects the parser to be positioned at the first digit and will advance
1593	/// the parser to the first character immediately following the escape
1594	/// sequence.
1595	///
1596	/// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1597	/// or 8 (for `\UNNNNNNNN`).
1598	#[inline(never)]
1599	fn parse_hex_digits(
1600	&self,
1601	kind: ast::HexLiteralKind,
1602	) -> Result<ast::Literal> {
1603	use std::char;
1604	use std::u32;
1605
1606	let mut scratch = self.parser().scratch.borrow_mut();
1607	scratch.clear();
1608
1609	let start = self.pos();
1610	for i in `0`..kind.digits() {
1611	if i > `0` && !self.bump_and_bump_space() {
1612	return Err(self
1613	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1614	}
1615	if !is_hex(self.char()) {
1616	return Err(self.error(
1617	self.span_char(),
1618	ast::ErrorKind::EscapeHexInvalidDigit,
1619	));
1620	}
1621	scratch.push(self.char());
1622	}
1623	// The final bump just moves the parser past the literal, which may
1624	// be EOF.
1625	self.bump_and_bump_space();
1626	let end = self.pos();
1627	let hex = scratch.as_str();
1628	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1629	None => Err(self.error(
1630	Span::new(start, end),
1631	ast::ErrorKind::EscapeHexInvalid,
1632	)),
1633	Some(c) => Ok(ast::Literal {
1634	span: Span::new(start, end),
1635	kind: ast::LiteralKind::HexFixed(kind),
1636	c,
1637	}),
1638	}
1639	}
1640
1641	/// Parse a hex representation of any Unicode scalar value. This expects
1642	/// the parser to be positioned at the opening brace `{` and will advance
1643	/// the parser to the first character following the closing brace `}`.
1644	#[inline(never)]
1645	fn parse_hex_brace(
1646	&self,
1647	kind: ast::HexLiteralKind,
1648	) -> Result<ast::Literal> {
1649	use std::char;
1650	use std::u32;
1651
1652	let mut scratch = self.parser().scratch.borrow_mut();
1653	scratch.clear();
1654
1655	let brace_pos = self.pos();
1656	let start = self.span_char().end;
1657	while self.bump_and_bump_space() && self.char() != '}' {
1658	if !is_hex(self.char()) {
1659	return Err(self.error(
1660	self.span_char(),
1661	ast::ErrorKind::EscapeHexInvalidDigit,
1662	));
1663	}
1664	scratch.push(self.char());
1665	}
1666	if self.is_eof() {
1667	return Err(self.error(
1668	Span::new(brace_pos, self.pos()),
1669	ast::ErrorKind::EscapeUnexpectedEof,
1670	));
1671	}
1672	let end = self.pos();
1673	let hex = scratch.as_str();
1674	assert_eq!(self.char(), '}');
1675	self.bump_and_bump_space();
1676
1677	if hex.is_empty() {
1678	return Err(self.error(
1679	Span::new(brace_pos, self.pos()),
1680	ast::ErrorKind::EscapeHexEmpty,
1681	));
1682	}
1683	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1684	None => Err(self.error(
1685	Span::new(start, end),
1686	ast::ErrorKind::EscapeHexInvalid,
1687	)),
1688	Some(c) => Ok(ast::Literal {
1689	span: Span::new(start, self.pos()),
1690	kind: ast::LiteralKind::HexBrace(kind),
1691	c,
1692	}),
1693	}
1694	}
1695
1696	/// Parse a decimal number into a u32 while trimming leading and trailing
1697	/// whitespace.
1698	///
1699	/// This expects the parser to be positioned at the first position where
1700	/// a decimal digit could occur. This will advance the parser to the byte
1701	/// immediately following the last contiguous decimal digit.
1702	///
1703	/// If no decimal digit could be found or if there was a problem parsing
1704	/// the complete set of digits into a u32, then an error is returned.
1705	fn parse_decimal(&self) -> Result<u32> {
1706	let mut scratch = self.parser().scratch.borrow_mut();
1707	scratch.clear();
1708
1709	while !self.is_eof() && self.char().is_whitespace() {
1710	self.bump();
1711	}
1712	let start = self.pos();
1713	while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1714	scratch.push(self.char());
1715	self.bump_and_bump_space();
1716	}
1717	let span = Span::new(start, self.pos());
1718	while !self.is_eof() && self.char().is_whitespace() {
1719	self.bump_and_bump_space();
1720	}
1721	let digits = scratch.as_str();
1722	if digits.is_empty() {
1723	return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1724	}
1725	match u32::from_str_radix(digits, `10`).ok() {
1726	Some(n) => Ok(n),
1727	None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1728	}
1729	}
1730
1731	/// Parse a standard character class consisting primarily of characters or
1732	/// character ranges, but can also contain nested character classes of
1733	/// any type (sans `.`).
1734	///
1735	/// This assumes the parser is positioned at the opening `[`. If parsing
1736	/// is successful, then the parser is advanced to the position immediately
1737	/// following the closing `]`.
1738	#[inline(never)]
1739	fn parse_set_class(&self) -> Result<ast::Class> {
1740	assert_eq!(self.char(), '[');
1741
1742	let mut union =
1743	ast::ClassSetUnion { span: self.span(), items: vec![] };
1744	loop {
1745	self.bump_space();
1746	if self.is_eof() {
1747	return Err(self.unclosed_class_error());
1748	}
1749	match self.char() {
1750	'[' => {
1751	// If we've already parsed the opening bracket, then
1752	// attempt to treat this as the beginning of an ASCII
1753	// class. If ASCII class parsing fails, then the parser
1754	// backs up to `[`.
1755	if !self.parser().stack_class.borrow().is_empty() {
1756	if let Some(cls) = self.maybe_parse_ascii_class() {
1757	union.push(ast::ClassSetItem::Ascii(cls));
1758	continue;
1759	}
1760	}
1761	union = self.push_class_open(union)?;
1762	}
1763	']' => match self.pop_class(union)? {
1764	Either::Left(nested_union) => {
1765	union = nested_union;
1766	}
1767	Either::Right(class) => return Ok(class),
1768	},
1769	'&' if self.peek() == Some('&') => {
1770	assert!(self.bump_if("&&"));
1771	union = self.push_class_op(
1772	ast::ClassSetBinaryOpKind::Intersection,
1773	union,
1774	);
1775	}
1776	'-' if self.peek() == Some('-') => {
1777	assert!(self.bump_if("--"));
1778	union = self.push_class_op(
1779	ast::ClassSetBinaryOpKind::Difference,
1780	union,
1781	);
1782	}
1783	'~' if self.peek() == Some('~') => {
1784	assert!(self.bump_if("~~"));
1785	union = self.push_class_op(
1786	ast::ClassSetBinaryOpKind::SymmetricDifference,
1787	union,
1788	);
1789	}
1790	_ => {
1791	union.push(self.parse_set_class_range()?);
1792	}
1793	}
1794	}
1795	}
1796
1797	/// Parse a single primitive item in a character class set. The item to
1798	/// be parsed can either be one of a simple literal character, a range
1799	/// between two simple literal characters or a "primitive" character
1800	/// class like \w or \p{Greek}.
1801	///
1802	/// If an invalid escape is found, or if a character class is found where
1803	/// a simple literal is expected (e.g., in a range), then an error is
1804	/// returned.
1805	#[inline(never)]
1806	fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1807	let prim1 = self.parse_set_class_item()?;
1808	self.bump_space();
1809	if self.is_eof() {
1810	return Err(self.unclosed_class_error());
1811	}
1812	// If the next char isn't a `-`, then we don't have a range.
1813	// There are two exceptions. If the char after a `-` is a `]`, then
1814	// `-` is interpreted as a literal `-`. Alternatively, if the char
1815	// after a `-` is a `-`, then `--` corresponds to a "difference"
1816	// operation.
1817	if self.char() != '-'
1818	\|\| self.peek_space() == Some(']')
1819	\|\| self.peek_space() == Some('-')
1820	{
1821	return prim1.into_class_set_item(self);
1822	}
1823	// OK, now we're parsing a range, so bump past the `-` and parse the
1824	// second half of the range.
1825	if !self.bump_and_bump_space() {
1826	return Err(self.unclosed_class_error());
1827	}
1828	let prim2 = self.parse_set_class_item()?;
1829	let range = ast::ClassSetRange {
1830	span: Span::new(prim1.span().start, prim2.span().end),
1831	start: prim1.into_class_literal(self)?,
1832	end: prim2.into_class_literal(self)?,
1833	};
1834	if !range.is_valid() {
1835	return Err(
1836	self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1837	);
1838	}
1839	Ok(ast::ClassSetItem::Range(range))
1840	}
1841
1842	/// Parse a single item in a character class as a primitive, where the
1843	/// primitive either consists of a verbatim literal or a single escape
1844	/// sequence.
1845	///
1846	/// This assumes the parser is positioned at the beginning of a primitive,
1847	/// and advances the parser to the first position after the primitive if
1848	/// successful.
1849	///
1850	/// Note that it is the caller's responsibility to report an error if an
1851	/// illegal primitive was parsed.
1852	#[inline(never)]
1853	fn parse_set_class_item(&self) -> Result<Primitive> {
1854	if self.char() == '`\\`' {
1855	self.parse_escape()
1856	} else {
1857	let x = Primitive::Literal(ast::Literal {
1858	span: self.span_char(),
1859	kind: ast::LiteralKind::Verbatim,
1860	c: self.char(),
1861	});
1862	self.bump();
1863	Ok(x)
1864	}
1865	}
1866
1867	/// Parses the opening of a character class set. This includes the opening
1868	/// bracket along with `^` if present to indicate negation. This also
1869	/// starts parsing the opening set of unioned items if applicable, since
1870	/// there are special rules applied to certain characters in the opening
1871	/// of a character class. For example, `[^]]` is the class of all
1872	/// characters not equal to `]`. (`]` would need to be escaped in any other
1873	/// position.) Similarly for `-`.
1874	///
1875	/// In all cases, the op inside the returned `ast::ClassBracketed` is an
1876	/// empty union. This empty union should be replaced with the actual item
1877	/// when it is popped from the parser's stack.
1878	///
1879	/// This assumes the parser is positioned at the opening `[` and advances
1880	/// the parser to the first non-special byte of the character class.
1881	///
1882	/// An error is returned if EOF is found.
1883	#[inline(never)]
1884	fn parse_set_class_open(
1885	&self,
1886	) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1887	assert_eq!(self.char(), '[');
1888	let start = self.pos();
1889	if !self.bump_and_bump_space() {
1890	return Err(self.error(
1891	Span::new(start, self.pos()),
1892	ast::ErrorKind::ClassUnclosed,
1893	));
1894	}
1895
1896	let negated = if self.char() != '^' {
1897	`false`
1898	} else {
1899	if !self.bump_and_bump_space() {
1900	return Err(self.error(
1901	Span::new(start, self.pos()),
1902	ast::ErrorKind::ClassUnclosed,
1903	));
1904	}
1905	`true`
1906	};
1907	// Accept any number of `-` as literal `-`.
1908	let mut union =
1909	ast::ClassSetUnion { span: self.span(), items: vec![] };
1910	while self.char() == '-' {
1911	union.push(ast::ClassSetItem::Literal(ast::Literal {
1912	span: self.span_char(),
1913	kind: ast::LiteralKind::Verbatim,
1914	c: '-',
1915	}));
1916	if !self.bump_and_bump_space() {
1917	return Err(self.error(
1918	Span::new(start, start),
1919	ast::ErrorKind::ClassUnclosed,
1920	));
1921	}
1922	}
1923	// If `]` is the first* char in a set, then interpret it as a literal*
1924	// `]`. That is, an empty class is impossible to write.
1925	if union.items.is_empty() && self.char() == ']' {
1926	union.push(ast::ClassSetItem::Literal(ast::Literal {
1927	span: self.span_char(),
1928	kind: ast::LiteralKind::Verbatim,
1929	c: ']',
1930	}));
1931	if !self.bump_and_bump_space() {
1932	return Err(self.error(
1933	Span::new(start, self.pos()),
1934	ast::ErrorKind::ClassUnclosed,
1935	));
1936	}
1937	}
1938	let set = ast::ClassBracketed {
1939	span: Span::new(start, self.pos()),
1940	negated,
1941	kind: ast::ClassSet::union(ast::ClassSetUnion {
1942	span: Span::new(union.span.start, union.span.start),
1943	items: vec![],
1944	}),
1945	};
1946	Ok((set, union))
1947	}
1948
1949	/// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1950	///
1951	/// This assumes the parser is positioned at the opening `[`.
1952	///
1953	/// If no valid ASCII character class could be found, then this does not
1954	/// advance the parser and `None` is returned. Otherwise, the parser is
1955	/// advanced to the first byte following the closing `]` and the
1956	/// corresponding ASCII class is returned.
1957	#[inline(never)]
1958	fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1959	// ASCII character classes are interesting from a parsing perspective
1960	// because parsing cannot fail with any interesting error. For example,
1961	// in order to use an ASCII character class, it must be enclosed in
1962	// double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1963	// of it as "ASCII character characters have the syntax `[:NAME:]`
1964	// which can only appear within character brackets." This means that
1965	// things like `[[:lower:]A]` are legal constructs.
1966	//
1967	// However, if one types an incorrect ASCII character class, e.g.,
1968	// `[[:loower:]]`, then we treat that as a normal nested character
1969	// class containing the characters `:elorw`. One might argue that we
1970	// should return an error instead since the repeated colons give away
1971	// the intent to write an ASCII class. But what if the user typed
1972	// `[[:lower]]` instead? How can we tell that was intended to be an
1973	// ASCII class and not just a normal nested class?
1974	//
1975	// Reasonable people can probably disagree over this, but for better
1976	// or worse, we implement semantics that never fails at the expense
1977	// of better failure modes.
1978	assert_eq!(self.char(), '[');
1979	// If parsing fails, then we back up the parser to this starting point.
1980	let start = self.pos();
1981	let mut negated = `false`;
1982	if !self.bump() \|\| self.char() != ':' {
1983	self.parser().pos.set(start);
1984	return None;
1985	}
1986	if !self.bump() {
1987	self.parser().pos.set(start);
1988	return None;
1989	}
1990	if self.char() == '^' {
1991	negated = `true`;
1992	if !self.bump() {
1993	self.parser().pos.set(start);
1994	return None;
1995	}
1996	}
1997	let name_start = self.offset();
1998	while self.char() != ':' && self.bump() {}
1999	if self.is_eof() {
2000	self.parser().pos.set(start);
2001	return None;
2002	}
2003	let name = &self.pattern()[name_start..self.offset()];
2004	if !self.bump_if(":]") {
2005	self.parser().pos.set(start);
2006	return None;
2007	}
2008	let kind = match ast::ClassAsciiKind::from_name(name) {
2009	Some(kind) => kind,
2010	None => {
2011	self.parser().pos.set(start);
2012	return None;
2013	}
2014	};
2015	Some(ast::ClassAscii {
2016	span: Span::new(start, self.pos()),
2017	kind,
2018	negated,
2019	})
2020	}
2021
2022	/// Parse a Unicode class in either the single character notation, `\pN`
2023	/// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2024	/// the parser is positioned at the `p` (or `P` for negation) and will
2025	/// advance the parser to the character immediately following the class.
2026	///
2027	/// Note that this does not check whether the class name is valid or not.
2028	#[inline(never)]
2029	fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2030	assert!(self.char() == 'p' \|\| self.char() == 'P');
2031
2032	let mut scratch = self.parser().scratch.borrow_mut();
2033	scratch.clear();
2034
2035	let negated = self.char() == 'P';
2036	if !self.bump_and_bump_space() {
2037	return Err(
2038	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2039	);
2040	}
2041	let (start, kind) = if self.char() == '{' {
2042	let start = self.span_char().end;
2043	while self.bump_and_bump_space() && self.char() != '}' {
2044	scratch.push(self.char());
2045	}
2046	if self.is_eof() {
2047	return Err(self
2048	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2049	}
2050	assert_eq!(self.char(), '}');
2051	self.bump();
2052
2053	let name = scratch.as_str();
2054	if let Some(i) = name.find("!=") {
2055	(
2056	start,
2057	ast::ClassUnicodeKind::NamedValue {
2058	op: ast::ClassUnicodeOpKind::NotEqual,
2059	name: name[..i].to_string(),
2060	value: name[i + `2`..].to_string(),
2061	},
2062	)
2063	} else if let Some(i) = name.find(':') {
2064	(
2065	start,
2066	ast::ClassUnicodeKind::NamedValue {
2067	op: ast::ClassUnicodeOpKind::Colon,
2068	name: name[..i].to_string(),
2069	value: name[i + `1`..].to_string(),
2070	},
2071	)
2072	} else if let Some(i) = name.find('=') {
2073	(
2074	start,
2075	ast::ClassUnicodeKind::NamedValue {
2076	op: ast::ClassUnicodeOpKind::Equal,
2077	name: name[..i].to_string(),
2078	value: name[i + `1`..].to_string(),
2079	},
2080	)
2081	} else {
2082	(start, ast::ClassUnicodeKind::Named(name.to_string()))
2083	}
2084	} else {
2085	let start = self.pos();
2086	let c = self.char();
2087	if c == '`\\`' {
2088	return Err(self.error(
2089	self.span_char(),
2090	ast::ErrorKind::UnicodeClassInvalid,
2091	));
2092	}
2093	self.bump_and_bump_space();
2094	let kind = ast::ClassUnicodeKind::OneLetter(c);
2095	(start, kind)
2096	};
2097	Ok(ast::ClassUnicode {
2098	span: Span::new(start, self.pos()),
2099	negated,
2100	kind,
2101	})
2102	}
2103
2104	/// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2105	/// parser is currently at a valid character class name and will be
2106	/// advanced to the character immediately following the class.
2107	#[inline(never)]
2108	fn parse_perl_class(&self) -> ast::ClassPerl {
2109	let c = self.char();
2110	let span = self.span_char();
2111	self.bump();
2112	let (negated, kind) = match c {
2113	'd' => (`false`, ast::ClassPerlKind::Digit),
2114	'D' => (`true`, ast::ClassPerlKind::Digit),
2115	's' => (`false`, ast::ClassPerlKind::Space),
2116	'S' => (`true`, ast::ClassPerlKind::Space),
2117	'w' => (`false`, ast::ClassPerlKind::Word),
2118	'W' => (`true`, ast::ClassPerlKind::Word),
2119	c => panic!("expected valid Perl class but got '{}'", c),
2120	};
2121	ast::ClassPerl { span, kind, negated }
2122	}
2123	}
2124
2125	/// A type that traverses a fully parsed Ast and checks whether its depth
2126	/// exceeds the specified nesting limit. If it does, then an error is returned.
2127	#[derive(Debug)]
2128	struct NestLimiter<'p, 's, P> {
2129	/// The parser that is checking the nest limit.
2130	p: &'p ParserI<'s, P>,
2131	/// The current depth while walking an Ast.
2132	depth: u32,
2133	}
2134
2135	impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2136	fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2137	NestLimiter { p, depth: `0` }
2138	}
2139
2140	#[inline(never)]
2141	fn check(self, ast: &Ast) -> Result<()> {
2142	ast::visit(ast, self)
2143	}
2144
2145	fn increment_depth(&mut self, span: &Span) -> Result<()> {
2146	let new = self.depth.checked_add(`1`).ok_or_else(\|\| {
2147	self.p.error(
2148	span.clone(),
2149	ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2150	)
2151	})?;
2152	let limit = self.p.parser().nest_limit;
2153	if new > limit {
2154	return Err(self.p.error(
2155	span.clone(),
2156	ast::ErrorKind::NestLimitExceeded(limit),
2157	));
2158	}
2159	self.depth = new;
2160	Ok(())
2161	}
2162
2163	fn decrement_depth(&mut self) {
2164	// Assuming the correctness of the visitor, this should never drop
2165	// below 0.
2166	self.depth = self.depth.checked_sub(`1`).unwrap();
2167	}
2168	}
2169
2170	impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2171	type Output = ();
2172	type Err = ast::Error;
2173
2174	fn finish(self) -> Result<()> {
2175	Ok(())
2176	}
2177
2178	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2179	let span = match *ast {
2180	Ast::Empty(_)
2181	\| Ast::Flags(_)
2182	\| Ast::Literal(_)
2183	\| Ast::Dot(_)
2184	\| Ast::Assertion(_)
2185	\| Ast::Class(ast::Class::Unicode(_))
2186	\| Ast::Class(ast::Class::Perl(_)) => {
2187	// These are all base cases, so we don't increment depth.
2188	return Ok(());
2189	}
2190	Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2191	Ast::Repetition(ref x) => &x.span,
2192	Ast::Group(ref x) => &x.span,
2193	Ast::Alternation(ref x) => &x.span,
2194	Ast::Concat(ref x) => &x.span,
2195	};
2196	self.increment_depth(span)
2197	}
2198
2199	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2200	match *ast {
2201	Ast::Empty(_)
2202	\| Ast::Flags(_)
2203	\| Ast::Literal(_)
2204	\| Ast::Dot(_)
2205	\| Ast::Assertion(_)
2206	\| Ast::Class(ast::Class::Unicode(_))
2207	\| Ast::Class(ast::Class::Perl(_)) => {
2208	// These are all base cases, so we don't decrement depth.
2209	Ok(())
2210	}
2211	Ast::Class(ast::Class::Bracketed(_))
2212	\| Ast::Repetition(_)
2213	\| Ast::Group(_)
2214	\| Ast::Alternation(_)
2215	\| Ast::Concat(_) => {
2216	self.decrement_depth();
2217	Ok(())
2218	}
2219	}
2220	}
2221
2222	fn visit_class_set_item_pre(
2223	&mut self,
2224	ast: &ast::ClassSetItem,
2225	) -> Result<()> {
2226	let span = match *ast {
2227	ast::ClassSetItem::Empty(_)
2228	\| ast::ClassSetItem::Literal(_)
2229	\| ast::ClassSetItem::Range(_)
2230	\| ast::ClassSetItem::Ascii(_)
2231	\| ast::ClassSetItem::Unicode(_)
2232	\| ast::ClassSetItem::Perl(_) => {
2233	// These are all base cases, so we don't increment depth.
2234	return Ok(());
2235	}
2236	ast::ClassSetItem::Bracketed(ref x) => &x.span,
2237	ast::ClassSetItem::Union(ref x) => &x.span,
2238	};
2239	self.increment_depth(span)
2240	}
2241
2242	fn visit_class_set_item_post(
2243	&mut self,
2244	ast: &ast::ClassSetItem,
2245	) -> Result<()> {
2246	match *ast {
2247	ast::ClassSetItem::Empty(_)
2248	\| ast::ClassSetItem::Literal(_)
2249	\| ast::ClassSetItem::Range(_)
2250	\| ast::ClassSetItem::Ascii(_)
2251	\| ast::ClassSetItem::Unicode(_)
2252	\| ast::ClassSetItem::Perl(_) => {
2253	// These are all base cases, so we don't decrement depth.
2254	Ok(())
2255	}
2256	ast::ClassSetItem::Bracketed(_) \| ast::ClassSetItem::Union(_) => {
2257	self.decrement_depth();
2258	Ok(())
2259	}
2260	}
2261	}
2262
2263	fn visit_class_set_binary_op_pre(
2264	&mut self,
2265	ast: &ast::ClassSetBinaryOp,
2266	) -> Result<()> {
2267	self.increment_depth(&ast.span)
2268	}
2269
2270	fn visit_class_set_binary_op_post(
2271	&mut self,
2272	_ast: &ast::ClassSetBinaryOp,
2273	) -> Result<()> {
2274	self.decrement_depth();
2275	Ok(())
2276	}
2277	}
2278
2279	/// When the result is an error, transforms the ast::ErrorKind from the source
2280	/// Result into another one. This function is used to return clearer error
2281	/// messages when possible.
2282	fn specialize_err<T>(
2283	result: Result<T>,
2284	from: ast::ErrorKind,
2285	to: ast::ErrorKind,
2286	) -> Result<T> {
2287	if let Err(e) = result {
2288	if e.kind == from {
2289	Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2290	} else {
2291	Err(e)
2292	}
2293	} else {
2294	result
2295	}
2296	}
2297
2298	#[cfg(test)]
2299	mod tests {
2300	use std::ops::Range;
2301
2302	use super::{Parser, ParserBuilder, ParserI, Primitive};
2303	use crate::ast::{self, Ast, Position, Span};
2304
2305	// Our own assert_eq, which has slightly better formatting (but honestly
2306	// still kind of crappy).
2307	macro_rules! assert_eq {
2308	($left:expr, $right:expr) => {{
2309	match (&$left, &$right) {
2310	(left_val, right_val) => {
2311	if !(left_val == right_val) {
2312	panic!(
2313	"assertion failed: `(left == right)``\n\n`\
2314	left: `{:?}``\n`right: `{:?}``\n\n`",
2315	left_val, right_val
2316	)
2317	}
2318	}
2319	}
2320	}};
2321	}
2322
2323	// We create these errors to compare with real ast::Errors in the tests.
2324	// We define equality between TestError and ast::Error to disregard the
2325	// pattern string in ast::Error, which is annoying to provide in tests.
2326	#[derive(Clone, Debug)]
2327	struct TestError {
2328	span: Span,
2329	kind: ast::ErrorKind,
2330	}
2331
2332	impl PartialEq<ast::Error> for TestError {
2333	fn eq(&self, other: &ast::Error) -> bool {
2334	self.span == other.span && self.kind == other.kind
2335	}
2336	}
2337
2338	impl PartialEq<TestError> for ast::Error {
2339	fn eq(&self, other: &TestError) -> bool {
2340	self.span == other.span && self.kind == other.kind
2341	}
2342	}
2343
2344	fn s(str: &str) -> String {
2345	str.to_string()
2346	}
2347
2348	fn parser(pattern: &str) -> ParserI<'_, Parser> {
2349	ParserI::new(Parser::new(), pattern)
2350	}
2351
2352	fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2353	let parser = ParserBuilder::new().octal(`true`).build();
2354	ParserI::new(parser, pattern)
2355	}
2356
2357	fn parser_nest_limit(
2358	pattern: &str,
2359	nest_limit: u32,
2360	) -> ParserI<'_, Parser> {
2361	let p = ParserBuilder::new().nest_limit(nest_limit).build();
2362	ParserI::new(p, pattern)
2363	}
2364
2365	fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2366	let p = ParserBuilder::new().ignore_whitespace(`true`).build();
2367	ParserI::new(p, pattern)
2368	}
2369
2370	/// Short alias for creating a new span.
2371	fn nspan(start: Position, end: Position) -> Span {
2372	Span::new(start, end)
2373	}
2374
2375	/// Short alias for creating a new position.
2376	fn npos(offset: usize, line: usize, column: usize) -> Position {
2377	Position::new(offset, line, column)
2378	}
2379
2380	/// Create a new span from the given offset range. This assumes a single
2381	/// line and sets the columns based on the offsets. i.e., This only works
2382	/// out of the box for ASCII, which is fine for most tests.
2383	fn span(range: Range<usize>) -> Span {
2384	let start = Position::new(range.start, `1`, range.start + `1`);
2385	let end = Position::new(range.end, `1`, range.end + `1`);
2386	Span::new(start, end)
2387	}
2388
2389	/// Create a new span for the corresponding byte range in the given string.
2390	fn span_range(subject: &str, range: Range<usize>) -> Span {
2391	let start = Position {
2392	offset: range.start,
2393	line: `1` + subject[..range.start].matches('`\n`').count(),
2394	column: `1` + subject[..range.start]
2395	.chars()
2396	.rev()
2397	.position(\|c\| c == '`\n`')
2398	.unwrap_or(subject[..range.start].chars().count()),
2399	};
2400	let end = Position {
2401	offset: range.end,
2402	line: `1` + subject[..range.end].matches('`\n`').count(),
2403	column: `1` + subject[..range.end]
2404	.chars()
2405	.rev()
2406	.position(\|c\| c == '`\n`')
2407	.unwrap_or(subject[..range.end].chars().count()),
2408	};
2409	Span::new(start, end)
2410	}
2411
2412	/// Create a verbatim literal starting at the given position.
2413	fn lit(c: char, start: usize) -> Ast {
2414	lit_with(c, span(start..start + c.len_utf8()))
2415	}
2416
2417	/// Create a punctuation literal starting at the given position.
2418	fn punct_lit(c: char, span: Span) -> Ast {
2419	Ast::Literal(ast::Literal {
2420	span,
2421	kind: ast::LiteralKind::Punctuation,
2422	c,
2423	})
2424	}
2425
2426	/// Create a verbatim literal with the given span.
2427	fn lit_with(c: char, span: Span) -> Ast {
2428	Ast::Literal(ast::Literal {
2429	span,
2430	kind: ast::LiteralKind::Verbatim,
2431	c,
2432	})
2433	}
2434
2435	/// Create a concatenation with the given range.
2436	fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2437	concat_with(span(range), asts)
2438	}
2439
2440	/// Create a concatenation with the given span.
2441	fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2442	Ast::Concat(ast::Concat { span, asts })
2443	}
2444
2445	/// Create an alternation with the given span.
2446	fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2447	Ast::Alternation(ast::Alternation { span: span(range), asts })
2448	}
2449
2450	/// Create a capturing group with the given span.
2451	fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2452	Ast::Group(ast::Group {
2453	span: span(range),
2454	kind: ast::GroupKind::CaptureIndex(index),
2455	ast: Box::new(ast),
2456	})
2457	}
2458
2459	/// Create an ast::SetFlags.
2460	///
2461	/// The given pattern should be the full pattern string. The range given
2462	/// should correspond to the byte offsets where the flag set occurs.
2463	///
2464	/// If negated is true, then the set is interpreted as beginning with a
2465	/// negation.
2466	fn flag_set(
2467	pat: &str,
2468	range: Range<usize>,
2469	flag: ast::Flag,
2470	negated: bool,
2471	) -> Ast {
2472	let mut items = vec![ast::FlagsItem {
2473	span: span_range(pat, (range.end - `2`)..(range.end - `1`)),
2474	kind: ast::FlagsItemKind::Flag(flag),
2475	}];
2476	if negated {
2477	items.insert(
2478	`0`,
2479	ast::FlagsItem {
2480	span: span_range(pat, (range.start + `2`)..(range.end - `2`)),
2481	kind: ast::FlagsItemKind::Negation,
2482	},
2483	);
2484	}
2485	Ast::Flags(ast::SetFlags {
2486	span: span_range(pat, range.clone()),
2487	flags: ast::Flags {
2488	span: span_range(pat, (range.start + `2`)..(range.end - `1`)),
2489	items,
2490	},
2491	})
2492	}
2493
2494	#[test]
2495	fn parse_nest_limit() {
2496	// A nest limit of 0 still allows some types of regexes.
2497	assert_eq!(
2498	parser_nest_limit("", `0`).parse(),
2499	Ok(Ast::Empty(span(`0`..`0`)))
2500	);
2501	assert_eq!(parser_nest_limit("a", `0`).parse(), Ok(lit('a', `0`)));
2502
2503	// Test repetition operations, which require one level of nesting.
2504	assert_eq!(
2505	parser_nest_limit("a+", `0`).parse().unwrap_err(),
2506	TestError {
2507	span: span(`0`..`2`),
2508	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2509	}
2510	);
2511	assert_eq!(
2512	parser_nest_limit("a+", `1`).parse(),
2513	Ok(Ast::Repetition(ast::Repetition {
2514	span: span(`0`..`2`),
2515	op: ast::RepetitionOp {
2516	span: span(`1`..`2`),
2517	kind: ast::RepetitionKind::OneOrMore,
2518	},
2519	greedy: `true`,
2520	ast: Box::new(lit('a', `0`)),
2521	}))
2522	);
2523	assert_eq!(
2524	parser_nest_limit("(a)+", `1`).parse().unwrap_err(),
2525	TestError {
2526	span: span(`0`..`3`),
2527	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2528	}
2529	);
2530	assert_eq!(
2531	parser_nest_limit("a+*", `1`).parse().unwrap_err(),
2532	TestError {
2533	span: span(`0`..`2`),
2534	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2535	}
2536	);
2537	assert_eq!(
2538	parser_nest_limit("a+*", `2`).parse(),
2539	Ok(Ast::Repetition(ast::Repetition {
2540	span: span(`0`..`3`),
2541	op: ast::RepetitionOp {
2542	span: span(`2`..`3`),
2543	kind: ast::RepetitionKind::ZeroOrMore,
2544	},
2545	greedy: `true`,
2546	ast: Box::new(Ast::Repetition(ast::Repetition {
2547	span: span(`0`..`2`),
2548	op: ast::RepetitionOp {
2549	span: span(`1`..`2`),
2550	kind: ast::RepetitionKind::OneOrMore,
2551	},
2552	greedy: `true`,
2553	ast: Box::new(lit('a', `0`)),
2554	})),
2555	}))
2556	);
2557
2558	// Test concatenations. A concatenation requires one level of nesting.
2559	assert_eq!(
2560	parser_nest_limit("ab", `0`).parse().unwrap_err(),
2561	TestError {
2562	span: span(`0`..`2`),
2563	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2564	}
2565	);
2566	assert_eq!(
2567	parser_nest_limit("ab", `1`).parse(),
2568	Ok(concat(`0`..`2`, vec![lit('a', `0`), lit('b', `1`)]))
2569	);
2570	assert_eq!(
2571	parser_nest_limit("abc", `1`).parse(),
2572	Ok(concat(`0`..`3`, vec![lit('a', `0`), lit('b', `1`), lit('c', `2`)]))
2573	);
2574
2575	// Test alternations. An alternation requires one level of nesting.
2576	assert_eq!(
2577	parser_nest_limit("a\|b", `0`).parse().unwrap_err(),
2578	TestError {
2579	span: span(`0`..`3`),
2580	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2581	}
2582	);
2583	assert_eq!(
2584	parser_nest_limit("a\|b", `1`).parse(),
2585	Ok(alt(`0`..`3`, vec![lit('a', `0`), lit('b', `2`)]))
2586	);
2587	assert_eq!(
2588	parser_nest_limit("a\|b\|c", `1`).parse(),
2589	Ok(alt(`0`..`5`, vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)]))
2590	);
2591
2592	// Test character classes. Classes form their own mini-recursive
2593	// syntax!
2594	assert_eq!(
2595	parser_nest_limit("[a]", `0`).parse().unwrap_err(),
2596	TestError {
2597	span: span(`0`..`3`),
2598	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2599	}
2600	);
2601	assert_eq!(
2602	parser_nest_limit("[a]", `1`).parse(),
2603	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2604	span: span(`0`..`3`),
2605	negated: `false`,
2606	kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2607	ast::Literal {
2608	span: span(`1`..`2`),
2609	kind: ast::LiteralKind::Verbatim,
2610	c: 'a',
2611	}
2612	)),
2613	})))
2614	);
2615	assert_eq!(
2616	parser_nest_limit("[ab]", `1`).parse().unwrap_err(),
2617	TestError {
2618	span: span(`1`..`3`),
2619	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2620	}
2621	);
2622	assert_eq!(
2623	parser_nest_limit("[ab[cd]]", `2`).parse().unwrap_err(),
2624	TestError {
2625	span: span(`3`..`7`),
2626	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2627	}
2628	);
2629	assert_eq!(
2630	parser_nest_limit("[ab[cd]]", `3`).parse().unwrap_err(),
2631	TestError {
2632	span: span(`4`..`6`),
2633	kind: ast::ErrorKind::NestLimitExceeded(`3`),
2634	}
2635	);
2636	assert_eq!(
2637	parser_nest_limit("[a--b]", `1`).parse().unwrap_err(),
2638	TestError {
2639	span: span(`1`..`5`),
2640	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2641	}
2642	);
2643	assert_eq!(
2644	parser_nest_limit("[a--bc]", `2`).parse().unwrap_err(),
2645	TestError {
2646	span: span(`4`..`6`),
2647	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2648	}
2649	);
2650	}
2651
2652	#[test]
2653	fn parse_comments() {
2654	let pat = "(?x)
2655	# This is comment 1.
2656	foo # This is comment 2.
2657	# This is comment 3.
2658	bar
2659	# This is comment 4.";
2660	let astc = parser(pat).parse_with_comments().unwrap();
2661	assert_eq!(
2662	astc.ast,
2663	concat_with(
2664	span_range(pat, `0`..pat.len()),
2665	vec![
2666	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2667	lit_with('f', span_range(pat, `26`..`27`)),
2668	lit_with('o', span_range(pat, `27`..`28`)),
2669	lit_with('o', span_range(pat, `28`..`29`)),
2670	lit_with('b', span_range(pat, `74`..`75`)),
2671	lit_with('a', span_range(pat, `75`..`76`)),
2672	lit_with('r', span_range(pat, `76`..`77`)),
2673	]
2674	)
2675	);
2676	assert_eq!(
2677	astc.comments,
2678	vec![
2679	ast::Comment {
2680	span: span_range(pat, `5`..`26`),
2681	comment: s(" This is comment 1."),
2682	},
2683	ast::Comment {
2684	span: span_range(pat, `30`..`51`),
2685	comment: s(" This is comment 2."),
2686	},
2687	ast::Comment {
2688	span: span_range(pat, `53`..`74`),
2689	comment: s(" This is comment 3."),
2690	},
2691	ast::Comment {
2692	span: span_range(pat, `78`..`98`),
2693	comment: s(" This is comment 4."),
2694	},
2695	]
2696	);
2697	}
2698
2699	#[test]
2700	fn parse_holistic() {
2701	assert_eq!(parser("]").parse(), Ok(lit(']', `0`)));
2702	assert_eq!(
2703	parser(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#\&\-\~").parse(),
2704	Ok(concat(
2705	`0`..`36`,
2706	vec![
2707	punct_lit('`\\`', span(`0`..`2`)),
2708	punct_lit('.', span(`2`..`4`)),
2709	punct_lit('+', span(`4`..`6`)),
2710	punct_lit('*', span(`6`..`8`)),
2711	punct_lit('?', span(`8`..`10`)),
2712	punct_lit('(', span(`10`..`12`)),
2713	punct_lit(')', span(`12`..`14`)),
2714	punct_lit('\|', span(`14`..`16`)),
2715	punct_lit('[', span(`16`..`18`)),
2716	punct_lit(']', span(`18`..`20`)),
2717	punct_lit('{', span(`20`..`22`)),
2718	punct_lit('}', span(`22`..`24`)),
2719	punct_lit('^', span(`24`..`26`)),
2720	punct_lit('$', span(`26`..`28`)),
2721	punct_lit('#', span(`28`..`30`)),
2722	punct_lit('&', span(`30`..`32`)),
2723	punct_lit('-', span(`32`..`34`)),
2724	punct_lit('~', span(`34`..`36`)),
2725	]
2726	))
2727	);
2728	}
2729
2730	#[test]
2731	fn parse_ignore_whitespace() {
2732	// Test that basic whitespace insensitivity works.
2733	let pat = "(?x)a b";
2734	assert_eq!(
2735	parser(pat).parse(),
2736	Ok(concat_with(
2737	nspan(npos(`0`, `1`, `1`), npos(`7`, `1`, `8`)),
2738	vec![
2739	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2740	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2741	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2742	]
2743	))
2744	);
2745
2746	// Test that we can toggle whitespace insensitivity.
2747	let pat = "(?x)a b(?-x)a b";
2748	assert_eq!(
2749	parser(pat).parse(),
2750	Ok(concat_with(
2751	nspan(npos(`0`, `1`, `1`), npos(`15`, `1`, `16`)),
2752	vec![
2753	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2754	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2755	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2756	flag_set(pat, `7`..`12`, ast::Flag::IgnoreWhitespace, `true`),
2757	lit_with('a', nspan(npos(`12`, `1`, `13`), npos(`13`, `1`, `14`))),
2758	lit_with(' ', nspan(npos(`13`, `1`, `14`), npos(`14`, `1`, `15`))),
2759	lit_with('b', nspan(npos(`14`, `1`, `15`), npos(`15`, `1`, `16`))),
2760	]
2761	))
2762	);
2763
2764	// Test that nesting whitespace insensitive flags works.
2765	let pat = "a (?x:a )a ";
2766	assert_eq!(
2767	parser(pat).parse(),
2768	Ok(concat_with(
2769	span_range(pat, `0`..`11`),
2770	vec![
2771	lit_with('a', span_range(pat, `0`..`1`)),
2772	lit_with(' ', span_range(pat, `1`..`2`)),
2773	Ast::Group(ast::Group {
2774	span: span_range(pat, `2`..`9`),
2775	kind: ast::GroupKind::NonCapturing(ast::Flags {
2776	span: span_range(pat, `4`..`5`),
2777	items: vec![ast::FlagsItem {
2778	span: span_range(pat, `4`..`5`),
2779	kind: ast::FlagsItemKind::Flag(
2780	ast::Flag::IgnoreWhitespace
2781	),
2782	},],
2783	}),
2784	ast: Box::new(lit_with('a', span_range(pat, `6`..`7`))),
2785	}),
2786	lit_with('a', span_range(pat, `9`..`10`)),
2787	lit_with(' ', span_range(pat, `10`..`11`)),
2788	]
2789	))
2790	);
2791
2792	// Test that whitespace after an opening paren is insignificant.
2793	let pat = "(?x)( ?P<foo> a )";
2794	assert_eq!(
2795	parser(pat).parse(),
2796	Ok(concat_with(
2797	span_range(pat, `0`..pat.len()),
2798	vec![
2799	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2800	Ast::Group(ast::Group {
2801	span: span_range(pat, `4`..pat.len()),
2802	kind: ast::GroupKind::CaptureName(ast::CaptureName {
2803	span: span_range(pat, `9`..`12`),
2804	name: s("foo"),
2805	index: `1`,
2806	}),
2807	ast: Box::new(lit_with('a', span_range(pat, `14`..`15`))),
2808	}),
2809	]
2810	))
2811	);
2812	let pat = "(?x)( a )";
2813	assert_eq!(
2814	parser(pat).parse(),
2815	Ok(concat_with(
2816	span_range(pat, `0`..pat.len()),
2817	vec![
2818	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2819	Ast::Group(ast::Group {
2820	span: span_range(pat, `4`..pat.len()),
2821	kind: ast::GroupKind::CaptureIndex(`1`),
2822	ast: Box::new(lit_with('a', span_range(pat, `7`..`8`))),
2823	}),
2824	]
2825	))
2826	);
2827	let pat = "(?x)( ?: a )";
2828	assert_eq!(
2829	parser(pat).parse(),
2830	Ok(concat_with(
2831	span_range(pat, `0`..pat.len()),
2832	vec![
2833	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2834	Ast::Group(ast::Group {
2835	span: span_range(pat, `4`..pat.len()),
2836	kind: ast::GroupKind::NonCapturing(ast::Flags {
2837	span: span_range(pat, `8`..`8`),
2838	items: vec![],
2839	}),
2840	ast: Box::new(lit_with('a', span_range(pat, `11`..`12`))),
2841	}),
2842	]
2843	))
2844	);
2845	let pat = r"(?x)\x { 53 }";
2846	assert_eq!(
2847	parser(pat).parse(),
2848	Ok(concat_with(
2849	span_range(pat, `0`..pat.len()),
2850	vec![
2851	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2852	Ast::Literal(ast::Literal {
2853	span: span(`4`..`13`),
2854	kind: ast::LiteralKind::HexBrace(
2855	ast::HexLiteralKind::X
2856	),
2857	c: 'S',
2858	}),
2859	]
2860	))
2861	);
2862
2863	// Test that whitespace after an escape is OK.
2864	let pat = r"(?x)\ ";
2865	assert_eq!(
2866	parser(pat).parse(),
2867	Ok(concat_with(
2868	span_range(pat, `0`..pat.len()),
2869	vec![
2870	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2871	Ast::Literal(ast::Literal {
2872	span: span_range(pat, `4`..`6`),
2873	kind: ast::LiteralKind::Special(
2874	ast::SpecialLiteralKind::Space
2875	),
2876	c: ' ',
2877	}),
2878	]
2879	))
2880	);
2881	// ... but only when `x` mode is enabled.
2882	let pat = r"\ ";
2883	assert_eq!(
2884	parser(pat).parse().unwrap_err(),
2885	TestError {
2886	span: span_range(pat, `0`..`2`),
2887	kind: ast::ErrorKind::EscapeUnrecognized,
2888	}
2889	);
2890	}
2891
2892	#[test]
2893	fn parse_newlines() {
2894	let pat = ".`\n`.";
2895	assert_eq!(
2896	parser(pat).parse(),
2897	Ok(concat_with(
2898	span_range(pat, `0`..`3`),
2899	vec![
2900	Ast::Dot(span_range(pat, `0`..`1`)),
2901	lit_with('`\n`', span_range(pat, `1`..`2`)),
2902	Ast::Dot(span_range(pat, `2`..`3`)),
2903	]
2904	))
2905	);
2906
2907	let pat = "foobar`\n`baz`\n`quux`\n`";
2908	assert_eq!(
2909	parser(pat).parse(),
2910	Ok(concat_with(
2911	span_range(pat, `0`..pat.len()),
2912	vec![
2913	lit_with('f', nspan(npos(`0`, `1`, `1`), npos(`1`, `1`, `2`))),
2914	lit_with('o', nspan(npos(`1`, `1`, `2`), npos(`2`, `1`, `3`))),
2915	lit_with('o', nspan(npos(`2`, `1`, `3`), npos(`3`, `1`, `4`))),
2916	lit_with('b', nspan(npos(`3`, `1`, `4`), npos(`4`, `1`, `5`))),
2917	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2918	lit_with('r', nspan(npos(`5`, `1`, `6`), npos(`6`, `1`, `7`))),
2919	lit_with('`\n`', nspan(npos(`6`, `1`, `7`), npos(`7`, `2`, `1`))),
2920	lit_with('b', nspan(npos(`7`, `2`, `1`), npos(`8`, `2`, `2`))),
2921	lit_with('a', nspan(npos(`8`, `2`, `2`), npos(`9`, `2`, `3`))),
2922	lit_with('z', nspan(npos(`9`, `2`, `3`), npos(`10`, `2`, `4`))),
2923	lit_with('`\n`', nspan(npos(`10`, `2`, `4`), npos(`11`, `3`, `1`))),
2924	lit_with('q', nspan(npos(`11`, `3`, `1`), npos(`12`, `3`, `2`))),
2925	lit_with('u', nspan(npos(`12`, `3`, `2`), npos(`13`, `3`, `3`))),
2926	lit_with('u', nspan(npos(`13`, `3`, `3`), npos(`14`, `3`, `4`))),
2927	lit_with('x', nspan(npos(`14`, `3`, `4`), npos(`15`, `3`, `5`))),
2928	lit_with('`\n`', nspan(npos(`15`, `3`, `5`), npos(`16`, `4`, `1`))),
2929	]
2930	))
2931	);
2932	}
2933
2934	#[test]
2935	fn parse_uncounted_repetition() {
2936	assert_eq!(
2937	parser(r"a*").parse(),
2938	Ok(Ast::Repetition(ast::Repetition {
2939	span: span(`0`..`2`),
2940	op: ast::RepetitionOp {
2941	span: span(`1`..`2`),
2942	kind: ast::RepetitionKind::ZeroOrMore,
2943	},
2944	greedy: `true`,
2945	ast: Box::new(lit('a', `0`)),
2946	}))
2947	);
2948	assert_eq!(
2949	parser(r"a+").parse(),
2950	Ok(Ast::Repetition(ast::Repetition {
2951	span: span(`0`..`2`),
2952	op: ast::RepetitionOp {
2953	span: span(`1`..`2`),
2954	kind: ast::RepetitionKind::OneOrMore,
2955	},
2956	greedy: `true`,
2957	ast: Box::new(lit('a', `0`)),
2958	}))
2959	);
2960
2961	assert_eq!(
2962	parser(r"a?").parse(),
2963	Ok(Ast::Repetition(ast::Repetition {
2964	span: span(`0`..`2`),
2965	op: ast::RepetitionOp {
2966	span: span(`1`..`2`),
2967	kind: ast::RepetitionKind::ZeroOrOne,
2968	},
2969	greedy: `true`,
2970	ast: Box::new(lit('a', `0`)),
2971	}))
2972	);
2973	assert_eq!(
2974	parser(r"a??").parse(),
2975	Ok(Ast::Repetition(ast::Repetition {
2976	span: span(`0`..`3`),
2977	op: ast::RepetitionOp {
2978	span: span(`1`..`3`),
2979	kind: ast::RepetitionKind::ZeroOrOne,
2980	},
2981	greedy: `false`,
2982	ast: Box::new(lit('a', `0`)),
2983	}))
2984	);
2985	assert_eq!(
2986	parser(r"a?").parse(),
2987	Ok(Ast::Repetition(ast::Repetition {
2988	span: span(`0`..`2`),
2989	op: ast::RepetitionOp {
2990	span: span(`1`..`2`),
2991	kind: ast::RepetitionKind::ZeroOrOne,
2992	},
2993	greedy: `true`,
2994	ast: Box::new(lit('a', `0`)),
2995	}))
2996	);
2997	assert_eq!(
2998	parser(r"a?b").parse(),
2999	Ok(concat(
3000	`0`..`3`,
3001	vec![
3002	Ast::Repetition(ast::Repetition {
3003	span: span(`0`..`2`),
3004	op: ast::RepetitionOp {
3005	span: span(`1`..`2`),
3006	kind: ast::RepetitionKind::ZeroOrOne,
3007	},
3008	greedy: `true`,
3009	ast: Box::new(lit('a', `0`)),
3010	}),
3011	lit('b', `2`),
3012	]
3013	))
3014	);
3015	assert_eq!(
3016	parser(r"a??b").parse(),
3017	Ok(concat(
3018	`0`..`4`,
3019	vec![
3020	Ast::Repetition(ast::Repetition {
3021	span: span(`0`..`3`),
3022	op: ast::RepetitionOp {
3023	span: span(`1`..`3`),
3024	kind: ast::RepetitionKind::ZeroOrOne,
3025	},
3026	greedy: `false`,
3027	ast: Box::new(lit('a', `0`)),
3028	}),
3029	lit('b', `3`),
3030	]
3031	))
3032	);
3033	assert_eq!(
3034	parser(r"ab?").parse(),
3035	Ok(concat(
3036	`0`..`3`,
3037	vec![
3038	lit('a', `0`),
3039	Ast::Repetition(ast::Repetition {
3040	span: span(`1`..`3`),
3041	op: ast::RepetitionOp {
3042	span: span(`2`..`3`),
3043	kind: ast::RepetitionKind::ZeroOrOne,
3044	},
3045	greedy: `true`,
3046	ast: Box::new(lit('b', `1`)),
3047	}),
3048	]
3049	))
3050	);
3051	assert_eq!(
3052	parser(r"(ab)?").parse(),
3053	Ok(Ast::Repetition(ast::Repetition {
3054	span: span(`0`..`5`),
3055	op: ast::RepetitionOp {
3056	span: span(`4`..`5`),
3057	kind: ast::RepetitionKind::ZeroOrOne,
3058	},
3059	greedy: `true`,
3060	ast: Box::new(group(
3061	`0`..`4`,
3062	`1`,
3063	concat(`1`..`3`, vec![lit('a', `1`), lit('b', `2`),])
3064	)),
3065	}))
3066	);
3067	assert_eq!(
3068	parser(r"\|a?").parse(),
3069	Ok(alt(
3070	`0`..`3`,
3071	vec![
3072	Ast::Empty(span(`0`..`0`)),
3073	Ast::Repetition(ast::Repetition {
3074	span: span(`1`..`3`),
3075	op: ast::RepetitionOp {
3076	span: span(`2`..`3`),
3077	kind: ast::RepetitionKind::ZeroOrOne,
3078	},
3079	greedy: `true`,
3080	ast: Box::new(lit('a', `1`)),
3081	}),
3082	]
3083	))
3084	);
3085
3086	assert_eq!(
3087	parser(r"*").parse().unwrap_err(),
3088	TestError {
3089	span: span(`0`..`0`),
3090	kind: ast::ErrorKind::RepetitionMissing,
3091	}
3092	);
3093	assert_eq!(
3094	parser(r"(?i)*").parse().unwrap_err(),
3095	TestError {
3096	span: span(`4`..`4`),
3097	kind: ast::ErrorKind::RepetitionMissing,
3098	}
3099	);
3100	assert_eq!(
3101	parser(r"(*)").parse().unwrap_err(),
3102	TestError {
3103	span: span(`1`..`1`),
3104	kind: ast::ErrorKind::RepetitionMissing,
3105	}
3106	);
3107	assert_eq!(
3108	parser(r"(?:?)").parse().unwrap_err(),
3109	TestError {
3110	span: span(`3`..`3`),
3111	kind: ast::ErrorKind::RepetitionMissing,
3112	}
3113	);
3114	assert_eq!(
3115	parser(r"+").parse().unwrap_err(),
3116	TestError {
3117	span: span(`0`..`0`),
3118	kind: ast::ErrorKind::RepetitionMissing,
3119	}
3120	);
3121	assert_eq!(
3122	parser(r"?").parse().unwrap_err(),
3123	TestError {
3124	span: span(`0`..`0`),
3125	kind: ast::ErrorKind::RepetitionMissing,
3126	}
3127	);
3128	assert_eq!(
3129	parser(r"(?)").parse().unwrap_err(),
3130	TestError {
3131	span: span(`1`..`1`),
3132	kind: ast::ErrorKind::RepetitionMissing,
3133	}
3134	);
3135	assert_eq!(
3136	parser(r"\|*").parse().unwrap_err(),
3137	TestError {
3138	span: span(`1`..`1`),
3139	kind: ast::ErrorKind::RepetitionMissing,
3140	}
3141	);
3142	assert_eq!(
3143	parser(r"\|+").parse().unwrap_err(),
3144	TestError {
3145	span: span(`1`..`1`),
3146	kind: ast::ErrorKind::RepetitionMissing,
3147	}
3148	);
3149	assert_eq!(
3150	parser(r"\|?").parse().unwrap_err(),
3151	TestError {
3152	span: span(`1`..`1`),
3153	kind: ast::ErrorKind::RepetitionMissing,
3154	}
3155	);
3156	}
3157
3158	#[test]
3159	fn parse_counted_repetition() {
3160	assert_eq!(
3161	parser(r"a{5}").parse(),
3162	Ok(Ast::Repetition(ast::Repetition {
3163	span: span(`0`..`4`),
3164	op: ast::RepetitionOp {
3165	span: span(`1`..`4`),
3166	kind: ast::RepetitionKind::Range(
3167	ast::RepetitionRange::Exactly(`5`)
3168	),
3169	},
3170	greedy: `true`,
3171	ast: Box::new(lit('a', `0`)),
3172	}))
3173	);
3174	assert_eq!(
3175	parser(r"a{5,}").parse(),
3176	Ok(Ast::Repetition(ast::Repetition {
3177	span: span(`0`..`5`),
3178	op: ast::RepetitionOp {
3179	span: span(`1`..`5`),
3180	kind: ast::RepetitionKind::Range(
3181	ast::RepetitionRange::AtLeast(`5`)
3182	),
3183	},
3184	greedy: `true`,
3185	ast: Box::new(lit('a', `0`)),
3186	}))
3187	);
3188	assert_eq!(
3189	parser(r"a{5,9}").parse(),
3190	Ok(Ast::Repetition(ast::Repetition {
3191	span: span(`0`..`6`),
3192	op: ast::RepetitionOp {
3193	span: span(`1`..`6`),
3194	kind: ast::RepetitionKind::Range(
3195	ast::RepetitionRange::Bounded(`5`, `9`)
3196	),
3197	},
3198	greedy: `true`,
3199	ast: Box::new(lit('a', `0`)),
3200	}))
3201	);
3202	assert_eq!(
3203	parser(r"a{5}?").parse(),
3204	Ok(Ast::Repetition(ast::Repetition {
3205	span: span(`0`..`5`),
3206	op: ast::RepetitionOp {
3207	span: span(`1`..`5`),
3208	kind: ast::RepetitionKind::Range(
3209	ast::RepetitionRange::Exactly(`5`)
3210	),
3211	},
3212	greedy: `false`,
3213	ast: Box::new(lit('a', `0`)),
3214	}))
3215	);
3216	assert_eq!(
3217	parser(r"ab{5}").parse(),
3218	Ok(concat(
3219	`0`..`5`,
3220	vec![
3221	lit('a', `0`),
3222	Ast::Repetition(ast::Repetition {
3223	span: span(`1`..`5`),
3224	op: ast::RepetitionOp {
3225	span: span(`2`..`5`),
3226	kind: ast::RepetitionKind::Range(
3227	ast::RepetitionRange::Exactly(`5`)
3228	),
3229	},
3230	greedy: `true`,
3231	ast: Box::new(lit('b', `1`)),
3232	}),
3233	]
3234	))
3235	);
3236	assert_eq!(
3237	parser(r"ab{5}c").parse(),
3238	Ok(concat(
3239	`0`..`6`,
3240	vec![
3241	lit('a', `0`),
3242	Ast::Repetition(ast::Repetition {
3243	span: span(`1`..`5`),
3244	op: ast::RepetitionOp {
3245	span: span(`2`..`5`),
3246	kind: ast::RepetitionKind::Range(
3247	ast::RepetitionRange::Exactly(`5`)
3248	),
3249	},
3250	greedy: `true`,
3251	ast: Box::new(lit('b', `1`)),
3252	}),
3253	lit('c', `5`),
3254	]
3255	))
3256	);
3257
3258	assert_eq!(
3259	parser(r"a{ 5 }").parse(),
3260	Ok(Ast::Repetition(ast::Repetition {
3261	span: span(`0`..`6`),
3262	op: ast::RepetitionOp {
3263	span: span(`1`..`6`),
3264	kind: ast::RepetitionKind::Range(
3265	ast::RepetitionRange::Exactly(`5`)
3266	),
3267	},
3268	greedy: `true`,
3269	ast: Box::new(lit('a', `0`)),
3270	}))
3271	);
3272	assert_eq!(
3273	parser(r"a{ 5 , 9 }").parse(),
3274	Ok(Ast::Repetition(ast::Repetition {
3275	span: span(`0`..`10`),
3276	op: ast::RepetitionOp {
3277	span: span(`1`..`10`),
3278	kind: ast::RepetitionKind::Range(
3279	ast::RepetitionRange::Bounded(`5`, `9`)
3280	),
3281	},
3282	greedy: `true`,
3283	ast: Box::new(lit('a', `0`)),
3284	}))
3285	);
3286	assert_eq!(
3287	parser_ignore_whitespace(r"a{5,9} ?").parse(),
3288	Ok(Ast::Repetition(ast::Repetition {
3289	span: span(`0`..`8`),
3290	op: ast::RepetitionOp {
3291	span: span(`1`..`8`),
3292	kind: ast::RepetitionKind::Range(
3293	ast::RepetitionRange::Bounded(`5`, `9`)
3294	),
3295	},
3296	greedy: `false`,
3297	ast: Box::new(lit('a', `0`)),
3298	}))
3299	);
3300
3301	assert_eq!(
3302	parser(r"(?i){0}").parse().unwrap_err(),
3303	TestError {
3304	span: span(`4`..`4`),
3305	kind: ast::ErrorKind::RepetitionMissing,
3306	}
3307	);
3308	assert_eq!(
3309	parser(r"(?m){1,1}").parse().unwrap_err(),
3310	TestError {
3311	span: span(`4`..`4`),
3312	kind: ast::ErrorKind::RepetitionMissing,
3313	}
3314	);
3315	assert_eq!(
3316	parser(r"a{]}").parse().unwrap_err(),
3317	TestError {
3318	span: span(`2`..`2`),
3319	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3320	}
3321	);
3322	assert_eq!(
3323	parser(r"a{1,]}").parse().unwrap_err(),
3324	TestError {
3325	span: span(`4`..`4`),
3326	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3327	}
3328	);
3329	assert_eq!(
3330	parser(r"a{").parse().unwrap_err(),
3331	TestError {
3332	span: span(`1`..`2`),
3333	kind: ast::ErrorKind::RepetitionCountUnclosed,
3334	}
3335	);
3336	assert_eq!(
3337	parser(r"a{}").parse().unwrap_err(),
3338	TestError {
3339	span: span(`2`..`2`),
3340	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3341	}
3342	);
3343	assert_eq!(
3344	parser(r"a{a").parse().unwrap_err(),
3345	TestError {
3346	span: span(`2`..`2`),
3347	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3348	}
3349	);
3350	assert_eq!(
3351	parser(r"a{9999999999}").parse().unwrap_err(),
3352	TestError {
3353	span: span(`2`..`12`),
3354	kind: ast::ErrorKind::DecimalInvalid,
3355	}
3356	);
3357	assert_eq!(
3358	parser(r"a{9").parse().unwrap_err(),
3359	TestError {
3360	span: span(`1`..`3`),
3361	kind: ast::ErrorKind::RepetitionCountUnclosed,
3362	}
3363	);
3364	assert_eq!(
3365	parser(r"a{9,a").parse().unwrap_err(),
3366	TestError {
3367	span: span(`4`..`4`),
3368	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3369	}
3370	);
3371	assert_eq!(
3372	parser(r"a{9,9999999999}").parse().unwrap_err(),
3373	TestError {
3374	span: span(`4`..`14`),
3375	kind: ast::ErrorKind::DecimalInvalid,
3376	}
3377	);
3378	assert_eq!(
3379	parser(r"a{9,").parse().unwrap_err(),
3380	TestError {
3381	span: span(`1`..`4`),
3382	kind: ast::ErrorKind::RepetitionCountUnclosed,
3383	}
3384	);
3385	assert_eq!(
3386	parser(r"a{9,11").parse().unwrap_err(),
3387	TestError {
3388	span: span(`1`..`6`),
3389	kind: ast::ErrorKind::RepetitionCountUnclosed,
3390	}
3391	);
3392	assert_eq!(
3393	parser(r"a{2,1}").parse().unwrap_err(),
3394	TestError {
3395	span: span(`1`..`6`),
3396	kind: ast::ErrorKind::RepetitionCountInvalid,
3397	}
3398	);
3399	assert_eq!(
3400	parser(r"{5}").parse().unwrap_err(),
3401	TestError {
3402	span: span(`0`..`0`),
3403	kind: ast::ErrorKind::RepetitionMissing,
3404	}
3405	);
3406	assert_eq!(
3407	parser(r"\|{5}").parse().unwrap_err(),
3408	TestError {
3409	span: span(`1`..`1`),
3410	kind: ast::ErrorKind::RepetitionMissing,
3411	}
3412	);
3413	}
3414
3415	#[test]
3416	fn parse_alternate() {
3417	assert_eq!(
3418	parser(r"a\|b").parse(),
3419	Ok(Ast::Alternation(ast::Alternation {
3420	span: span(`0`..`3`),
3421	asts: vec![lit('a', `0`), lit('b', `2`)],
3422	}))
3423	);
3424	assert_eq!(
3425	parser(r"(a\|b)").parse(),
3426	Ok(group(
3427	`0`..`5`,
3428	`1`,
3429	Ast::Alternation(ast::Alternation {
3430	span: span(`1`..`4`),
3431	asts: vec![lit('a', `1`), lit('b', `3`)],
3432	})
3433	))
3434	);
3435
3436	assert_eq!(
3437	parser(r"a\|b\|c").parse(),
3438	Ok(Ast::Alternation(ast::Alternation {
3439	span: span(`0`..`5`),
3440	asts: vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)],
3441	}))
3442	);
3443	assert_eq!(
3444	parser(r"ax\|by\|cz").parse(),
3445	Ok(Ast::Alternation(ast::Alternation {
3446	span: span(`0`..`8`),
3447	asts: vec![
3448	concat(`0`..`2`, vec![lit('a', `0`), lit('x', `1`)]),
3449	concat(`3`..`5`, vec![lit('b', `3`), lit('y', `4`)]),
3450	concat(`6`..`8`, vec![lit('c', `6`), lit('z', `7`)]),
3451	],
3452	}))
3453	);
3454	assert_eq!(
3455	parser(r"(ax\|by\|cz)").parse(),
3456	Ok(group(
3457	`0`..`10`,
3458	`1`,
3459	Ast::Alternation(ast::Alternation {
3460	span: span(`1`..`9`),
3461	asts: vec![
3462	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3463	concat(`4`..`6`, vec![lit('b', `4`), lit('y', `5`)]),
3464	concat(`7`..`9`, vec![lit('c', `7`), lit('z', `8`)]),
3465	],
3466	})
3467	))
3468	);
3469	assert_eq!(
3470	parser(r"(ax\|(by\|(cz)))").parse(),
3471	Ok(group(
3472	`0`..`14`,
3473	`1`,
3474	alt(
3475	`1`..`13`,
3476	vec![
3477	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3478	group(
3479	`4`..`13`,
3480	`2`,
3481	alt(
3482	`5`..`12`,
3483	vec![
3484	concat(
3485	`5`..`7`,
3486	vec![lit('b', `5`), lit('y', `6`)]
3487	),
3488	group(
3489	`8`..`12`,
3490	`3`,
3491	concat(
3492	`9`..`11`,
3493	vec![lit('c', `9`), lit('z', `10`),]
3494	)
3495	),
3496	]
3497	)
3498	),
3499	]
3500	)
3501	))
3502	);
3503
3504	assert_eq!(
3505	parser(r"\|").parse(),
3506	Ok(alt(
3507	`0`..`1`,
3508	vec![Ast::Empty(span(`0`..`0`)), Ast::Empty(span(`1`..`1`)),]
3509	))
3510	);
3511	assert_eq!(
3512	parser(r"\|\|").parse(),
3513	Ok(alt(
3514	`0`..`2`,
3515	vec![
3516	Ast::Empty(span(`0`..`0`)),
3517	Ast::Empty(span(`1`..`1`)),
3518	Ast::Empty(span(`2`..`2`)),
3519	]
3520	))
3521	);
3522	assert_eq!(
3523	parser(r"a\|").parse(),
3524	Ok(alt(`0`..`2`, vec![lit('a', `0`), Ast::Empty(span(`2`..`2`)),]))
3525	);
3526	assert_eq!(
3527	parser(r"\|a").parse(),
3528	Ok(alt(`0`..`2`, vec![Ast::Empty(span(`0`..`0`)), lit('a', `1`),]))
3529	);
3530
3531	assert_eq!(
3532	parser(r"(\|)").parse(),
3533	Ok(group(
3534	`0`..`3`,
3535	`1`,
3536	alt(
3537	`1`..`2`,
3538	vec![Ast::Empty(span(`1`..`1`)), Ast::Empty(span(`2`..`2`)),]
3539	)
3540	))
3541	);
3542	assert_eq!(
3543	parser(r"(a\|)").parse(),
3544	Ok(group(
3545	`0`..`4`,
3546	`1`,
3547	alt(`1`..`3`, vec![lit('a', `1`), Ast::Empty(span(`3`..`3`)),])
3548	))
3549	);
3550	assert_eq!(
3551	parser(r"(\|a)").parse(),
3552	Ok(group(
3553	`0`..`4`,
3554	`1`,
3555	alt(`1`..`3`, vec![Ast::Empty(span(`1`..`1`)), lit('a', `2`),])
3556	))
3557	);
3558
3559	assert_eq!(
3560	parser(r"a\|b)").parse().unwrap_err(),
3561	TestError {
3562	span: span(`3`..`4`),
3563	kind: ast::ErrorKind::GroupUnopened,
3564	}
3565	);
3566	assert_eq!(
3567	parser(r"(a\|b").parse().unwrap_err(),
3568	TestError {
3569	span: span(`0`..`1`),
3570	kind: ast::ErrorKind::GroupUnclosed,
3571	}
3572	);
3573	}
3574
3575	#[test]
3576	fn parse_unsupported_lookaround() {
3577	assert_eq!(
3578	parser(r"(?=a)").parse().unwrap_err(),
3579	TestError {
3580	span: span(`0`..`3`),
3581	kind: ast::ErrorKind::UnsupportedLookAround,
3582	}
3583	);
3584	assert_eq!(
3585	parser(r"(?!a)").parse().unwrap_err(),
3586	TestError {
3587	span: span(`0`..`3`),
3588	kind: ast::ErrorKind::UnsupportedLookAround,
3589	}
3590	);
3591	assert_eq!(
3592	parser(r"(?<=a)").parse().unwrap_err(),
3593	TestError {
3594	span: span(`0`..`4`),
3595	kind: ast::ErrorKind::UnsupportedLookAround,
3596	}
3597	);
3598	assert_eq!(
3599	parser(r"(?<!a)").parse().unwrap_err(),
3600	TestError {
3601	span: span(`0`..`4`),
3602	kind: ast::ErrorKind::UnsupportedLookAround,
3603	}
3604	);
3605	}
3606
3607	#[test]
3608	fn parse_group() {
3609	assert_eq!(
3610	parser("(?i)").parse(),
3611	Ok(Ast::Flags(ast::SetFlags {
3612	span: span(`0`..`4`),
3613	flags: ast::Flags {
3614	span: span(`2`..`3`),
3615	items: vec![ast::FlagsItem {
3616	span: span(`2`..`3`),
3617	kind: ast::FlagsItemKind::Flag(
3618	ast::Flag::CaseInsensitive
3619	),
3620	}],
3621	},
3622	}))
3623	);
3624	assert_eq!(
3625	parser("(?iU)").parse(),
3626	Ok(Ast::Flags(ast::SetFlags {
3627	span: span(`0`..`5`),
3628	flags: ast::Flags {
3629	span: span(`2`..`4`),
3630	items: vec![
3631	ast::FlagsItem {
3632	span: span(`2`..`3`),
3633	kind: ast::FlagsItemKind::Flag(
3634	ast::Flag::CaseInsensitive
3635	),
3636	},
3637	ast::FlagsItem {
3638	span: span(`3`..`4`),
3639	kind: ast::FlagsItemKind::Flag(
3640	ast::Flag::SwapGreed
3641	),
3642	},
3643	],
3644	},
3645	}))
3646	);
3647	assert_eq!(
3648	parser("(?i-U)").parse(),
3649	Ok(Ast::Flags(ast::SetFlags {
3650	span: span(`0`..`6`),
3651	flags: ast::Flags {
3652	span: span(`2`..`5`),
3653	items: vec![
3654	ast::FlagsItem {
3655	span: span(`2`..`3`),
3656	kind: ast::FlagsItemKind::Flag(
3657	ast::Flag::CaseInsensitive
3658	),
3659	},
3660	ast::FlagsItem {
3661	span: span(`3`..`4`),
3662	kind: ast::FlagsItemKind::Negation,
3663	},
3664	ast::FlagsItem {
3665	span: span(`4`..`5`),
3666	kind: ast::FlagsItemKind::Flag(
3667	ast::Flag::SwapGreed
3668	),
3669	},
3670	],
3671	},
3672	}))
3673	);
3674
3675	assert_eq!(
3676	parser("()").parse(),
3677	Ok(Ast::Group(ast::Group {
3678	span: span(`0`..`2`),
3679	kind: ast::GroupKind::CaptureIndex(`1`),
3680	ast: Box::new(Ast::Empty(span(`1`..`1`))),
3681	}))
3682	);
3683	assert_eq!(
3684	parser("(a)").parse(),
3685	Ok(Ast::Group(ast::Group {
3686	span: span(`0`..`3`),
3687	kind: ast::GroupKind::CaptureIndex(`1`),
3688	ast: Box::new(lit('a', `1`)),
3689	}))
3690	);
3691	assert_eq!(
3692	parser("(())").parse(),
3693	Ok(Ast::Group(ast::Group {
3694	span: span(`0`..`4`),
3695	kind: ast::GroupKind::CaptureIndex(`1`),
3696	ast: Box::new(Ast::Group(ast::Group {
3697	span: span(`1`..`3`),
3698	kind: ast::GroupKind::CaptureIndex(`2`),
3699	ast: Box::new(Ast::Empty(span(`2`..`2`))),
3700	})),
3701	}))
3702	);
3703
3704	assert_eq!(
3705	parser("(?:a)").parse(),
3706	Ok(Ast::Group(ast::Group {
3707	span: span(`0`..`5`),
3708	kind: ast::GroupKind::NonCapturing(ast::Flags {
3709	span: span(`2`..`2`),
3710	items: vec![],
3711	}),
3712	ast: Box::new(lit('a', `3`)),
3713	}))
3714	);
3715
3716	assert_eq!(
3717	parser("(?i:a)").parse(),
3718	Ok(Ast::Group(ast::Group {
3719	span: span(`0`..`6`),
3720	kind: ast::GroupKind::NonCapturing(ast::Flags {
3721	span: span(`2`..`3`),
3722	items: vec![ast::FlagsItem {
3723	span: span(`2`..`3`),
3724	kind: ast::FlagsItemKind::Flag(
3725	ast::Flag::CaseInsensitive
3726	),
3727	},],
3728	}),
3729	ast: Box::new(lit('a', `4`)),
3730	}))
3731	);
3732	assert_eq!(
3733	parser("(?i-U:a)").parse(),
3734	Ok(Ast::Group(ast::Group {
3735	span: span(`0`..`8`),
3736	kind: ast::GroupKind::NonCapturing(ast::Flags {
3737	span: span(`2`..`5`),
3738	items: vec![
3739	ast::FlagsItem {
3740	span: span(`2`..`3`),
3741	kind: ast::FlagsItemKind::Flag(
3742	ast::Flag::CaseInsensitive
3743	),
3744	},
3745	ast::FlagsItem {
3746	span: span(`3`..`4`),
3747	kind: ast::FlagsItemKind::Negation,
3748	},
3749	ast::FlagsItem {
3750	span: span(`4`..`5`),
3751	kind: ast::FlagsItemKind::Flag(
3752	ast::Flag::SwapGreed
3753	),
3754	},
3755	],
3756	}),
3757	ast: Box::new(lit('a', `6`)),
3758	}))
3759	);
3760
3761	assert_eq!(
3762	parser("(").parse().unwrap_err(),
3763	TestError {
3764	span: span(`0`..`1`),
3765	kind: ast::ErrorKind::GroupUnclosed,
3766	}
3767	);
3768	assert_eq!(
3769	parser("(?").parse().unwrap_err(),
3770	TestError {
3771	span: span(`0`..`1`),
3772	kind: ast::ErrorKind::GroupUnclosed,
3773	}
3774	);
3775	assert_eq!(
3776	parser("(?P").parse().unwrap_err(),
3777	TestError {
3778	span: span(`2`..`3`),
3779	kind: ast::ErrorKind::FlagUnrecognized,
3780	}
3781	);
3782	assert_eq!(
3783	parser("(?P<").parse().unwrap_err(),
3784	TestError {
3785	span: span(`4`..`4`),
3786	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3787	}
3788	);
3789	assert_eq!(
3790	parser("(a").parse().unwrap_err(),
3791	TestError {
3792	span: span(`0`..`1`),
3793	kind: ast::ErrorKind::GroupUnclosed,
3794	}
3795	);
3796	assert_eq!(
3797	parser("(()").parse().unwrap_err(),
3798	TestError {
3799	span: span(`0`..`1`),
3800	kind: ast::ErrorKind::GroupUnclosed,
3801	}
3802	);
3803	assert_eq!(
3804	parser(")").parse().unwrap_err(),
3805	TestError {
3806	span: span(`0`..`1`),
3807	kind: ast::ErrorKind::GroupUnopened,
3808	}
3809	);
3810	assert_eq!(
3811	parser("a)").parse().unwrap_err(),
3812	TestError {
3813	span: span(`1`..`2`),
3814	kind: ast::ErrorKind::GroupUnopened,
3815	}
3816	);
3817	}
3818
3819	#[test]
3820	fn parse_capture_name() {
3821	assert_eq!(
3822	parser("(?P<a>z)").parse(),
3823	Ok(Ast::Group(ast::Group {
3824	span: span(`0`..`8`),
3825	kind: ast::GroupKind::CaptureName(ast::CaptureName {
3826	span: span(`4`..`5`),
3827	name: s("a"),
3828	index: `1`,
3829	}),
3830	ast: Box::new(lit('z', `6`)),
3831	}))
3832	);
3833	assert_eq!(
3834	parser("(?P<abc>z)").parse(),
3835	Ok(Ast::Group(ast::Group {
3836	span: span(`0`..`10`),
3837	kind: ast::GroupKind::CaptureName(ast::CaptureName {
3838	span: span(`4`..`7`),
3839	name: s("abc"),
3840	index: `1`,
3841	}),
3842	ast: Box::new(lit('z', `8`)),
3843	}))
3844	);
3845
3846	assert_eq!(
3847	parser("(?P<a_1>z)").parse(),
3848	Ok(Ast::Group(ast::Group {
3849	span: span(`0`..`10`),
3850	kind: ast::GroupKind::CaptureName(ast::CaptureName {
3851	span: span(`4`..`7`),
3852	name: s("a_1"),
3853	index: `1`,
3854	}),
3855	ast: Box::new(lit('z', `8`)),
3856	}))
3857	);
3858
3859	assert_eq!(
3860	parser("(?P<a.1>z)").parse(),
3861	Ok(Ast::Group(ast::Group {
3862	span: span(`0`..`10`),
3863	kind: ast::GroupKind::CaptureName(ast::CaptureName {
3864	span: span(`4`..`7`),
3865	name: s("a.1"),
3866	index: `1`,
3867	}),
3868	ast: Box::new(lit('z', `8`)),
3869	}))
3870	);
3871
3872	assert_eq!(
3873	parser("(?P<a[1]>z)").parse(),
3874	Ok(Ast::Group(ast::Group {
3875	span: span(`0`..`11`),
3876	kind: ast::GroupKind::CaptureName(ast::CaptureName {
3877	span: span(`4`..`8`),
3878	name: s("a[1]"),
3879	index: `1`,
3880	}),
3881	ast: Box::new(lit('z', `9`)),
3882	}))
3883	);
3884
3885	assert_eq!(
3886	parser("(?P<").parse().unwrap_err(),
3887	TestError {
3888	span: span(`4`..`4`),
3889	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3890	}
3891	);
3892	assert_eq!(
3893	parser("(?P<>z)").parse().unwrap_err(),
3894	TestError {
3895	span: span(`4`..`4`),
3896	kind: ast::ErrorKind::GroupNameEmpty,
3897	}
3898	);
3899	assert_eq!(
3900	parser("(?P<a").parse().unwrap_err(),
3901	TestError {
3902	span: span(`5`..`5`),
3903	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3904	}
3905	);
3906	assert_eq!(
3907	parser("(?P<ab").parse().unwrap_err(),
3908	TestError {
3909	span: span(`6`..`6`),
3910	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3911	}
3912	);
3913	assert_eq!(
3914	parser("(?P<0a").parse().unwrap_err(),
3915	TestError {
3916	span: span(`4`..`5`),
3917	kind: ast::ErrorKind::GroupNameInvalid,
3918	}
3919	);
3920	assert_eq!(
3921	parser("(?P<~").parse().unwrap_err(),
3922	TestError {
3923	span: span(`4`..`5`),
3924	kind: ast::ErrorKind::GroupNameInvalid,
3925	}
3926	);
3927	assert_eq!(
3928	parser("(?P<abc~").parse().unwrap_err(),
3929	TestError {
3930	span: span(`7`..`8`),
3931	kind: ast::ErrorKind::GroupNameInvalid,
3932	}
3933	);
3934	assert_eq!(
3935	parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3936	TestError {
3937	span: span(`12`..`13`),
3938	kind: ast::ErrorKind::GroupNameDuplicate {
3939	original: span(`4`..`5`),
3940	},
3941	}
3942	);
3943	}
3944
3945	#[test]
3946	fn parse_flags() {
3947	assert_eq!(
3948	parser("i:").parse_flags(),
3949	Ok(ast::Flags {
3950	span: span(`0`..`1`),
3951	items: vec![ast::FlagsItem {
3952	span: span(`0`..`1`),
3953	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3954	}],
3955	})
3956	);
3957	assert_eq!(
3958	parser("i)").parse_flags(),
3959	Ok(ast::Flags {
3960	span: span(`0`..`1`),
3961	items: vec![ast::FlagsItem {
3962	span: span(`0`..`1`),
3963	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3964	}],
3965	})
3966	);
3967
3968	assert_eq!(
3969	parser("isU:").parse_flags(),
3970	Ok(ast::Flags {
3971	span: span(`0`..`3`),
3972	items: vec![
3973	ast::FlagsItem {
3974	span: span(`0`..`1`),
3975	kind: ast::FlagsItemKind::Flag(
3976	ast::Flag::CaseInsensitive
3977	),
3978	},
3979	ast::FlagsItem {
3980	span: span(`1`..`2`),
3981	kind: ast::FlagsItemKind::Flag(
3982	ast::Flag::DotMatchesNewLine
3983	),
3984	},
3985	ast::FlagsItem {
3986	span: span(`2`..`3`),
3987	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3988	},
3989	],
3990	})
3991	);
3992
3993	assert_eq!(
3994	parser("-isU:").parse_flags(),
3995	Ok(ast::Flags {
3996	span: span(`0`..`4`),
3997	items: vec![
3998	ast::FlagsItem {
3999	span: span(`0`..`1`),
4000	kind: ast::FlagsItemKind::Negation,
4001	},
4002	ast::FlagsItem {
4003	span: span(`1`..`2`),
4004	kind: ast::FlagsItemKind::Flag(
4005	ast::Flag::CaseInsensitive
4006	),
4007	},
4008	ast::FlagsItem {
4009	span: span(`2`..`3`),
4010	kind: ast::FlagsItemKind::Flag(
4011	ast::Flag::DotMatchesNewLine
4012	),
4013	},
4014	ast::FlagsItem {
4015	span: span(`3`..`4`),
4016	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4017	},
4018	],
4019	})
4020	);
4021	assert_eq!(
4022	parser("i-sU:").parse_flags(),
4023	Ok(ast::Flags {
4024	span: span(`0`..`4`),
4025	items: vec![
4026	ast::FlagsItem {
4027	span: span(`0`..`1`),
4028	kind: ast::FlagsItemKind::Flag(
4029	ast::Flag::CaseInsensitive
4030	),
4031	},
4032	ast::FlagsItem {
4033	span: span(`1`..`2`),
4034	kind: ast::FlagsItemKind::Negation,
4035	},
4036	ast::FlagsItem {
4037	span: span(`2`..`3`),
4038	kind: ast::FlagsItemKind::Flag(
4039	ast::Flag::DotMatchesNewLine
4040	),
4041	},
4042	ast::FlagsItem {
4043	span: span(`3`..`4`),
4044	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4045	},
4046	],
4047	})
4048	);
4049
4050	assert_eq!(
4051	parser("isU").parse_flags().unwrap_err(),
4052	TestError {
4053	span: span(`3`..`3`),
4054	kind: ast::ErrorKind::FlagUnexpectedEof,
4055	}
4056	);
4057	assert_eq!(
4058	parser("isUa:").parse_flags().unwrap_err(),
4059	TestError {
4060	span: span(`3`..`4`),
4061	kind: ast::ErrorKind::FlagUnrecognized,
4062	}
4063	);
4064	assert_eq!(
4065	parser("isUi:").parse_flags().unwrap_err(),
4066	TestError {
4067	span: span(`3`..`4`),
4068	kind: ast::ErrorKind::FlagDuplicate { original: span(`0`..`1`) },
4069	}
4070	);
4071	assert_eq!(
4072	parser("i-sU-i:").parse_flags().unwrap_err(),
4073	TestError {
4074	span: span(`4`..`5`),
4075	kind: ast::ErrorKind::FlagRepeatedNegation {
4076	original: span(`1`..`2`),
4077	},
4078	}
4079	);
4080	assert_eq!(
4081	parser("-)").parse_flags().unwrap_err(),
4082	TestError {
4083	span: span(`0`..`1`),
4084	kind: ast::ErrorKind::FlagDanglingNegation,
4085	}
4086	);
4087	assert_eq!(
4088	parser("i-)").parse_flags().unwrap_err(),
4089	TestError {
4090	span: span(`1`..`2`),
4091	kind: ast::ErrorKind::FlagDanglingNegation,
4092	}
4093	);
4094	assert_eq!(
4095	parser("iU-)").parse_flags().unwrap_err(),
4096	TestError {
4097	span: span(`2`..`3`),
4098	kind: ast::ErrorKind::FlagDanglingNegation,
4099	}
4100	);
4101	}
4102
4103	#[test]
4104	fn parse_flag() {
4105	assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4106	assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4107	assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4108	assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4109	assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4110	assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4111
4112	assert_eq!(
4113	parser("a").parse_flag().unwrap_err(),
4114	TestError {
4115	span: span(`0`..`1`),
4116	kind: ast::ErrorKind::FlagUnrecognized,
4117	}
4118	);
4119	assert_eq!(
4120	parser("☃").parse_flag().unwrap_err(),
4121	TestError {
4122	span: span_range("☃", `0`..`3`),
4123	kind: ast::ErrorKind::FlagUnrecognized,
4124	}
4125	);
4126	}
4127
4128	#[test]
4129	fn parse_primitive_non_escape() {
4130	assert_eq!(
4131	parser(r".").parse_primitive(),
4132	Ok(Primitive::Dot(span(`0`..`1`)))
4133	);
4134	assert_eq!(
4135	parser(r"^").parse_primitive(),
4136	Ok(Primitive::Assertion(ast::Assertion {
4137	span: span(`0`..`1`),
4138	kind: ast::AssertionKind::StartLine,
4139	}))
4140	);
4141	assert_eq!(
4142	parser(r"$").parse_primitive(),
4143	Ok(Primitive::Assertion(ast::Assertion {
4144	span: span(`0`..`1`),
4145	kind: ast::AssertionKind::EndLine,
4146	}))
4147	);
4148
4149	assert_eq!(
4150	parser(r"a").parse_primitive(),
4151	Ok(Primitive::Literal(ast::Literal {
4152	span: span(`0`..`1`),
4153	kind: ast::LiteralKind::Verbatim,
4154	c: 'a',
4155	}))
4156	);
4157	assert_eq!(
4158	parser(r"\|").parse_primitive(),
4159	Ok(Primitive::Literal(ast::Literal {
4160	span: span(`0`..`1`),
4161	kind: ast::LiteralKind::Verbatim,
4162	c: '\|',
4163	}))
4164	);
4165	assert_eq!(
4166	parser(r"☃").parse_primitive(),
4167	Ok(Primitive::Literal(ast::Literal {
4168	span: span_range("☃", `0`..`3`),
4169	kind: ast::LiteralKind::Verbatim,
4170	c: '☃',
4171	}))
4172	);
4173	}
4174
4175	#[test]
4176	fn parse_escape() {
4177	assert_eq!(
4178	parser(r"\\|").parse_primitive(),
4179	Ok(Primitive::Literal(ast::Literal {
4180	span: span(`0`..`2`),
4181	kind: ast::LiteralKind::Punctuation,
4182	c: '\|',
4183	}))
4184	);
4185	let specials = &[
4186	(r"\a", '`\x07`', ast::SpecialLiteralKind::Bell),
4187	(r"\f", '`\x0C`', ast::SpecialLiteralKind::FormFeed),
4188	(r"\t", '`\t`', ast::SpecialLiteralKind::Tab),
4189	(r"\n", '`\n`', ast::SpecialLiteralKind::LineFeed),
4190	(r"\r", '`\r`', ast::SpecialLiteralKind::CarriageReturn),
4191	(r"\v", '`\x0B`', ast::SpecialLiteralKind::VerticalTab),
4192	];
4193	for &(pat, c, ref kind) in specials {
4194	assert_eq!(
4195	parser(pat).parse_primitive(),
4196	Ok(Primitive::Literal(ast::Literal {
4197	span: span(`0`..`2`),
4198	kind: ast::LiteralKind::Special(kind.clone()),
4199	c,
4200	}))
4201	);
4202	}
4203	assert_eq!(
4204	parser(r"\A").parse_primitive(),
4205	Ok(Primitive::Assertion(ast::Assertion {
4206	span: span(`0`..`2`),
4207	kind: ast::AssertionKind::StartText,
4208	}))
4209	);
4210	assert_eq!(
4211	parser(r"\z").parse_primitive(),
4212	Ok(Primitive::Assertion(ast::Assertion {
4213	span: span(`0`..`2`),
4214	kind: ast::AssertionKind::EndText,
4215	}))
4216	);
4217	assert_eq!(
4218	parser(r"\b").parse_primitive(),
4219	Ok(Primitive::Assertion(ast::Assertion {
4220	span: span(`0`..`2`),
4221	kind: ast::AssertionKind::WordBoundary,
4222	}))
4223	);
4224	assert_eq!(
4225	parser(r"\B").parse_primitive(),
4226	Ok(Primitive::Assertion(ast::Assertion {
4227	span: span(`0`..`2`),
4228	kind: ast::AssertionKind::NotWordBoundary,
4229	}))
4230	);
4231
4232	assert_eq!(
4233	parser(r"\").parse_escape().unwrap_err(),
4234	TestError {
4235	span: span(`0`..`1`),
4236	kind: ast::ErrorKind::EscapeUnexpectedEof,
4237	}
4238	);
4239	assert_eq!(
4240	parser(r"\y").parse_escape().unwrap_err(),
4241	TestError {
4242	span: span(`0`..`2`),
4243	kind: ast::ErrorKind::EscapeUnrecognized,
4244	}
4245	);
4246	}
4247
4248	#[test]
4249	fn parse_unsupported_backreference() {
4250	assert_eq!(
4251	parser(r"\0").parse_escape().unwrap_err(),
4252	TestError {
4253	span: span(`0`..`2`),
4254	kind: ast::ErrorKind::UnsupportedBackreference,
4255	}
4256	);
4257	assert_eq!(
4258	parser(r"\9").parse_escape().unwrap_err(),
4259	TestError {
4260	span: span(`0`..`2`),
4261	kind: ast::ErrorKind::UnsupportedBackreference,
4262	}
4263	);
4264	}
4265
4266	#[test]
4267	fn parse_octal() {
4268	for i in `0`..`511` {
4269	let pat = format!(r"\{:o}", i);
4270	assert_eq!(
4271	parser_octal(&pat).parse_escape(),
4272	Ok(Primitive::Literal(ast::Literal {
4273	span: span(`0`..pat.len()),
4274	kind: ast::LiteralKind::Octal,
4275	c: ::std::char::from_u32(i).unwrap(),
4276	}))
4277	);
4278	}
4279	assert_eq!(
4280	parser_octal(r"\778").parse_escape(),
4281	Ok(Primitive::Literal(ast::Literal {
4282	span: span(`0`..`3`),
4283	kind: ast::LiteralKind::Octal,
4284	c: '?',
4285	}))
4286	);
4287	assert_eq!(
4288	parser_octal(r"\7777").parse_escape(),
4289	Ok(Primitive::Literal(ast::Literal {
4290	span: span(`0`..`4`),
4291	kind: ast::LiteralKind::Octal,
4292	c: '`\u{01FF}`',
4293	}))
4294	);
4295	assert_eq!(
4296	parser_octal(r"\778").parse(),
4297	Ok(Ast::Concat(ast::Concat {
4298	span: span(`0`..`4`),
4299	asts: vec![
4300	Ast::Literal(ast::Literal {
4301	span: span(`0`..`3`),
4302	kind: ast::LiteralKind::Octal,
4303	c: '?',
4304	}),
4305	Ast::Literal(ast::Literal {
4306	span: span(`3`..`4`),
4307	kind: ast::LiteralKind::Verbatim,
4308	c: '8',
4309	}),
4310	],
4311	}))
4312	);
4313	assert_eq!(
4314	parser_octal(r"\7777").parse(),
4315	Ok(Ast::Concat(ast::Concat {
4316	span: span(`0`..`5`),
4317	asts: vec![
4318	Ast::Literal(ast::Literal {
4319	span: span(`0`..`4`),
4320	kind: ast::LiteralKind::Octal,
4321	c: '`\u{01FF}`',
4322	}),
4323	Ast::Literal(ast::Literal {
4324	span: span(`4`..`5`),
4325	kind: ast::LiteralKind::Verbatim,
4326	c: '7',
4327	}),
4328	],
4329	}))
4330	);
4331
4332	assert_eq!(
4333	parser_octal(r"\8").parse_escape().unwrap_err(),
4334	TestError {
4335	span: span(`0`..`2`),
4336	kind: ast::ErrorKind::EscapeUnrecognized,
4337	}
4338	);
4339	}
4340
4341	#[test]
4342	fn parse_hex_two() {
4343	for i in `0`..`256` {
4344	let pat = format!(r"\x{:02x}", i);
4345	assert_eq!(
4346	parser(&pat).parse_escape(),
4347	Ok(Primitive::Literal(ast::Literal {
4348	span: span(`0`..pat.len()),
4349	kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4350	c: ::std::char::from_u32(i).unwrap(),
4351	}))
4352	);
4353	}
4354
4355	assert_eq!(
4356	parser(r"\xF").parse_escape().unwrap_err(),
4357	TestError {
4358	span: span(`3`..`3`),
4359	kind: ast::ErrorKind::EscapeUnexpectedEof,
4360	}
4361	);
4362	assert_eq!(
4363	parser(r"\xG").parse_escape().unwrap_err(),
4364	TestError {
4365	span: span(`2`..`3`),
4366	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4367	}
4368	);
4369	assert_eq!(
4370	parser(r"\xFG").parse_escape().unwrap_err(),
4371	TestError {
4372	span: span(`3`..`4`),
4373	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4374	}
4375	);
4376	}
4377
4378	#[test]
4379	fn parse_hex_four() {
4380	for i in `0`..`65536` {
4381	let c = match ::std::char::from_u32(i) {
4382	None => continue,
4383	Some(c) => c,
4384	};
4385	let pat = format!(r"\u{:04x}", i);
4386	assert_eq!(
4387	parser(&pat).parse_escape(),
4388	Ok(Primitive::Literal(ast::Literal {
4389	span: span(`0`..pat.len()),
4390	kind: ast::LiteralKind::HexFixed(
4391	ast::HexLiteralKind::UnicodeShort
4392	),
4393	c,
4394	}))
4395	);
4396	}
4397
4398	assert_eq!(
4399	parser(r"\uF").parse_escape().unwrap_err(),
4400	TestError {
4401	span: span(`3`..`3`),
4402	kind: ast::ErrorKind::EscapeUnexpectedEof,
4403	}
4404	);
4405	assert_eq!(
4406	parser(r"\uG").parse_escape().unwrap_err(),
4407	TestError {
4408	span: span(`2`..`3`),
4409	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4410	}
4411	);
4412	assert_eq!(
4413	parser(r"\uFG").parse_escape().unwrap_err(),
4414	TestError {
4415	span: span(`3`..`4`),
4416	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4417	}
4418	);
4419	assert_eq!(
4420	parser(r"\uFFG").parse_escape().unwrap_err(),
4421	TestError {
4422	span: span(`4`..`5`),
4423	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4424	}
4425	);
4426	assert_eq!(
4427	parser(r"\uFFFG").parse_escape().unwrap_err(),
4428	TestError {
4429	span: span(`5`..`6`),
4430	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4431	}
4432	);
4433	assert_eq!(
4434	parser(r"\uD800").parse_escape().unwrap_err(),
4435	TestError {
4436	span: span(`2`..`6`),
4437	kind: ast::ErrorKind::EscapeHexInvalid,
4438	}
4439	);
4440	}
4441
4442	#[test]
4443	fn parse_hex_eight() {
4444	for i in `0`..`65536` {
4445	let c = match ::std::char::from_u32(i) {
4446	None => continue,
4447	Some(c) => c,
4448	};
4449	let pat = format!(r"\U{:08x}", i);
4450	assert_eq!(
4451	parser(&pat).parse_escape(),
4452	Ok(Primitive::Literal(ast::Literal {
4453	span: span(`0`..pat.len()),
4454	kind: ast::LiteralKind::HexFixed(
4455	ast::HexLiteralKind::UnicodeLong
4456	),
4457	c,
4458	}))
4459	);
4460	}
4461
4462	assert_eq!(
4463	parser(r"\UF").parse_escape().unwrap_err(),
4464	TestError {
4465	span: span(`3`..`3`),
4466	kind: ast::ErrorKind::EscapeUnexpectedEof,
4467	}
4468	);
4469	assert_eq!(
4470	parser(r"\UG").parse_escape().unwrap_err(),
4471	TestError {
4472	span: span(`2`..`3`),
4473	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4474	}
4475	);
4476	assert_eq!(
4477	parser(r"\UFG").parse_escape().unwrap_err(),
4478	TestError {
4479	span: span(`3`..`4`),
4480	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4481	}
4482	);
4483	assert_eq!(
4484	parser(r"\UFFG").parse_escape().unwrap_err(),
4485	TestError {
4486	span: span(`4`..`5`),
4487	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4488	}
4489	);
4490	assert_eq!(
4491	parser(r"\UFFFG").parse_escape().unwrap_err(),
4492	TestError {
4493	span: span(`5`..`6`),
4494	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4495	}
4496	);
4497	assert_eq!(
4498	parser(r"\UFFFFG").parse_escape().unwrap_err(),
4499	TestError {
4500	span: span(`6`..`7`),
4501	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4502	}
4503	);
4504	assert_eq!(
4505	parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4506	TestError {
4507	span: span(`7`..`8`),
4508	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4509	}
4510	);
4511	assert_eq!(
4512	parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4513	TestError {
4514	span: span(`8`..`9`),
4515	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4516	}
4517	);
4518	assert_eq!(
4519	parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4520	TestError {
4521	span: span(`9`..`10`),
4522	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4523	}
4524	);
4525	}
4526
4527	#[test]
4528	fn parse_hex_brace() {
4529	assert_eq!(
4530	parser(r"\u{26c4}").parse_escape(),
4531	Ok(Primitive::Literal(ast::Literal {
4532	span: span(`0`..`8`),
4533	kind: ast::LiteralKind::HexBrace(
4534	ast::HexLiteralKind::UnicodeShort
4535	),
4536	c: '⛄',
4537	}))
4538	);
4539	assert_eq!(
4540	parser(r"\U{26c4}").parse_escape(),
4541	Ok(Primitive::Literal(ast::Literal {
4542	span: span(`0`..`8`),
4543	kind: ast::LiteralKind::HexBrace(
4544	ast::HexLiteralKind::UnicodeLong
4545	),
4546	c: '⛄',
4547	}))
4548	);
4549	assert_eq!(
4550	parser(r"\x{26c4}").parse_escape(),
4551	Ok(Primitive::Literal(ast::Literal {
4552	span: span(`0`..`8`),
4553	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4554	c: '⛄',
4555	}))
4556	);
4557	assert_eq!(
4558	parser(r"\x{26C4}").parse_escape(),
4559	Ok(Primitive::Literal(ast::Literal {
4560	span: span(`0`..`8`),
4561	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4562	c: '⛄',
4563	}))
4564	);
4565	assert_eq!(
4566	parser(r"\x{10fFfF}").parse_escape(),
4567	Ok(Primitive::Literal(ast::Literal {
4568	span: span(`0`..`10`),
4569	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4570	c: '`\u{10FFFF}`',
4571	}))
4572	);
4573
4574	assert_eq!(
4575	parser(r"\x").parse_escape().unwrap_err(),
4576	TestError {
4577	span: span(`2`..`2`),
4578	kind: ast::ErrorKind::EscapeUnexpectedEof,
4579	}
4580	);
4581	assert_eq!(
4582	parser(r"\x{").parse_escape().unwrap_err(),
4583	TestError {
4584	span: span(`2`..`3`),
4585	kind: ast::ErrorKind::EscapeUnexpectedEof,
4586	}
4587	);
4588	assert_eq!(
4589	parser(r"\x{FF").parse_escape().unwrap_err(),
4590	TestError {
4591	span: span(`2`..`5`),
4592	kind: ast::ErrorKind::EscapeUnexpectedEof,
4593	}
4594	);
4595	assert_eq!(
4596	parser(r"\x{}").parse_escape().unwrap_err(),
4597	TestError {
4598	span: span(`2`..`4`),
4599	kind: ast::ErrorKind::EscapeHexEmpty,
4600	}
4601	);
4602	assert_eq!(
4603	parser(r"\x{FGF}").parse_escape().unwrap_err(),
4604	TestError {
4605	span: span(`4`..`5`),
4606	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4607	}
4608	);
4609	assert_eq!(
4610	parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4611	TestError {
4612	span: span(`3`..`9`),
4613	kind: ast::ErrorKind::EscapeHexInvalid,
4614	}
4615	);
4616	assert_eq!(
4617	parser(r"\x{D800}").parse_escape().unwrap_err(),
4618	TestError {
4619	span: span(`3`..`7`),
4620	kind: ast::ErrorKind::EscapeHexInvalid,
4621	}
4622	);
4623	assert_eq!(
4624	parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4625	TestError {
4626	span: span(`3`..`12`),
4627	kind: ast::ErrorKind::EscapeHexInvalid,
4628	}
4629	);
4630	}
4631
4632	#[test]
4633	fn parse_decimal() {
4634	assert_eq!(parser("123").parse_decimal(), Ok(`123`));
4635	assert_eq!(parser("0").parse_decimal(), Ok(`0`));
4636	assert_eq!(parser("01").parse_decimal(), Ok(`1`));
4637
4638	assert_eq!(
4639	parser("-1").parse_decimal().unwrap_err(),
4640	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
4641	);
4642	assert_eq!(
4643	parser("").parse_decimal().unwrap_err(),
4644	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
4645	);
4646	assert_eq!(
4647	parser("9999999999").parse_decimal().unwrap_err(),
4648	TestError {
4649	span: span(`0`..`10`),
4650	kind: ast::ErrorKind::DecimalInvalid,
4651	}
4652	);
4653	}
4654
4655	#[test]
4656	fn parse_set_class() {
4657	fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4658	ast::ClassSet::union(ast::ClassSetUnion { span, items })
4659	}
4660
4661	fn intersection(
4662	span: Span,
4663	lhs: ast::ClassSet,
4664	rhs: ast::ClassSet,
4665	) -> ast::ClassSet {
4666	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4667	span,
4668	kind: ast::ClassSetBinaryOpKind::Intersection,
4669	lhs: Box::new(lhs),
4670	rhs: Box::new(rhs),
4671	})
4672	}
4673
4674	fn difference(
4675	span: Span,
4676	lhs: ast::ClassSet,
4677	rhs: ast::ClassSet,
4678	) -> ast::ClassSet {
4679	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4680	span,
4681	kind: ast::ClassSetBinaryOpKind::Difference,
4682	lhs: Box::new(lhs),
4683	rhs: Box::new(rhs),
4684	})
4685	}
4686
4687	fn symdifference(
4688	span: Span,
4689	lhs: ast::ClassSet,
4690	rhs: ast::ClassSet,
4691	) -> ast::ClassSet {
4692	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4693	span,
4694	kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4695	lhs: Box::new(lhs),
4696	rhs: Box::new(rhs),
4697	})
4698	}
4699
4700	fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4701	ast::ClassSet::Item(item)
4702	}
4703
4704	fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4705	ast::ClassSetItem::Ascii(cls)
4706	}
4707
4708	fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4709	ast::ClassSetItem::Unicode(cls)
4710	}
4711
4712	fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4713	ast::ClassSetItem::Perl(cls)
4714	}
4715
4716	fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4717	ast::ClassSetItem::Bracketed(Box::new(cls))
4718	}
4719
4720	fn lit(span: Span, c: char) -> ast::ClassSetItem {
4721	ast::ClassSetItem::Literal(ast::Literal {
4722	span,
4723	kind: ast::LiteralKind::Verbatim,
4724	c,
4725	})
4726	}
4727
4728	fn empty(span: Span) -> ast::ClassSetItem {
4729	ast::ClassSetItem::Empty(span)
4730	}
4731
4732	fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4733	let pos1 = Position {
4734	offset: span.start.offset + start.len_utf8(),
4735	column: span.start.column + `1`,
4736	..span.start
4737	};
4738	let pos2 = Position {
4739	offset: span.end.offset - end.len_utf8(),
4740	column: span.end.column - `1`,
4741	..span.end
4742	};
4743	ast::ClassSetItem::Range(ast::ClassSetRange {
4744	span,
4745	start: ast::Literal {
4746	span: Span { end: pos1, ..span },
4747	kind: ast::LiteralKind::Verbatim,
4748	c: start,
4749	},
4750	end: ast::Literal {
4751	span: Span { start: pos2, ..span },
4752	kind: ast::LiteralKind::Verbatim,
4753	c: end,
4754	},
4755	})
4756	}
4757
4758	fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4759	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
4760	}
4761
4762	fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4763	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
4764	}
4765
4766	assert_eq!(
4767	parser("[[:alnum:]]").parse(),
4768	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4769	span: span(`0`..`11`),
4770	negated: `false`,
4771	kind: itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
4772	})))
4773	);
4774	assert_eq!(
4775	parser("[[[:alnum:]]]").parse(),
4776	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4777	span: span(`0`..`13`),
4778	negated: `false`,
4779	kind: itemset(item_bracket(ast::ClassBracketed {
4780	span: span(`1`..`12`),
4781	negated: `false`,
4782	kind: itemset(item_ascii(alnum(span(`2`..`11`), `false`))),
4783	})),
4784	})))
4785	);
4786	assert_eq!(
4787	parser("[[:alnum:]&&[:lower:]]").parse(),
4788	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4789	span: span(`0`..`22`),
4790	negated: `false`,
4791	kind: intersection(
4792	span(`1`..`21`),
4793	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
4794	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
4795	),
4796	})))
4797	);
4798	assert_eq!(
4799	parser("[[:alnum:]--[:lower:]]").parse(),
4800	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4801	span: span(`0`..`22`),
4802	negated: `false`,
4803	kind: difference(
4804	span(`1`..`21`),
4805	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
4806	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
4807	),
4808	})))
4809	);
4810	assert_eq!(
4811	parser("[[:alnum:]~~[:lower:]]").parse(),
4812	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4813	span: span(`0`..`22`),
4814	negated: `false`,
4815	kind: symdifference(
4816	span(`1`..`21`),
4817	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
4818	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
4819	),
4820	})))
4821	);
4822
4823	assert_eq!(
4824	parser("[a]").parse(),
4825	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4826	span: span(`0`..`3`),
4827	negated: `false`,
4828	kind: itemset(lit(span(`1`..`2`), 'a')),
4829	})))
4830	);
4831	assert_eq!(
4832	parser(r"[a\]]").parse(),
4833	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4834	span: span(`0`..`5`),
4835	negated: `false`,
4836	kind: union(
4837	span(`1`..`4`),
4838	vec![
4839	lit(span(`1`..`2`), 'a'),
4840	ast::ClassSetItem::Literal(ast::Literal {
4841	span: span(`2`..`4`),
4842	kind: ast::LiteralKind::Punctuation,
4843	c: ']',
4844	}),
4845	]
4846	),
4847	})))
4848	);
4849	assert_eq!(
4850	parser(r"[a\-z]").parse(),
4851	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4852	span: span(`0`..`6`),
4853	negated: `false`,
4854	kind: union(
4855	span(`1`..`5`),
4856	vec![
4857	lit(span(`1`..`2`), 'a'),
4858	ast::ClassSetItem::Literal(ast::Literal {
4859	span: span(`2`..`4`),
4860	kind: ast::LiteralKind::Punctuation,
4861	c: '-',
4862	}),
4863	lit(span(`4`..`5`), 'z'),
4864	]
4865	),
4866	})))
4867	);
4868	assert_eq!(
4869	parser("[ab]").parse(),
4870	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4871	span: span(`0`..`4`),
4872	negated: `false`,
4873	kind: union(
4874	span(`1`..`3`),
4875	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), 'b'),]
4876	),
4877	})))
4878	);
4879	assert_eq!(
4880	parser("[a-]").parse(),
4881	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4882	span: span(`0`..`4`),
4883	negated: `false`,
4884	kind: union(
4885	span(`1`..`3`),
4886	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), '-'),]
4887	),
4888	})))
4889	);
4890	assert_eq!(
4891	parser("[-a]").parse(),
4892	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4893	span: span(`0`..`4`),
4894	negated: `false`,
4895	kind: union(
4896	span(`1`..`3`),
4897	vec![lit(span(`1`..`2`), '-'), lit(span(`2`..`3`), 'a'),]
4898	),
4899	})))
4900	);
4901	assert_eq!(
4902	parser(r"[\pL]").parse(),
4903	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4904	span: span(`0`..`5`),
4905	negated: `false`,
4906	kind: itemset(item_unicode(ast::ClassUnicode {
4907	span: span(`1`..`4`),
4908	negated: `false`,
4909	kind: ast::ClassUnicodeKind::OneLetter('L'),
4910	})),
4911	})))
4912	);
4913	assert_eq!(
4914	parser(r"[\w]").parse(),
4915	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4916	span: span(`0`..`4`),
4917	negated: `false`,
4918	kind: itemset(item_perl(ast::ClassPerl {
4919	span: span(`1`..`3`),
4920	kind: ast::ClassPerlKind::Word,
4921	negated: `false`,
4922	})),
4923	})))
4924	);
4925	assert_eq!(
4926	parser(r"[a\wz]").parse(),
4927	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4928	span: span(`0`..`6`),
4929	negated: `false`,
4930	kind: union(
4931	span(`1`..`5`),
4932	vec![
4933	lit(span(`1`..`2`), 'a'),
4934	item_perl(ast::ClassPerl {
4935	span: span(`2`..`4`),
4936	kind: ast::ClassPerlKind::Word,
4937	negated: `false`,
4938	}),
4939	lit(span(`4`..`5`), 'z'),
4940	]
4941	),
4942	})))
4943	);
4944
4945	assert_eq!(
4946	parser("[a-z]").parse(),
4947	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4948	span: span(`0`..`5`),
4949	negated: `false`,
4950	kind: itemset(range(span(`1`..`4`), 'a', 'z')),
4951	})))
4952	);
4953	assert_eq!(
4954	parser("[a-cx-z]").parse(),
4955	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4956	span: span(`0`..`8`),
4957	negated: `false`,
4958	kind: union(
4959	span(`1`..`7`),
4960	vec![
4961	range(span(`1`..`4`), 'a', 'c'),
4962	range(span(`4`..`7`), 'x', 'z'),
4963	]
4964	),
4965	})))
4966	);
4967	assert_eq!(
4968	parser(r"[\w&&a-cx-z]").parse(),
4969	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4970	span: span(`0`..`12`),
4971	negated: `false`,
4972	kind: intersection(
4973	span(`1`..`11`),
4974	itemset(item_perl(ast::ClassPerl {
4975	span: span(`1`..`3`),
4976	kind: ast::ClassPerlKind::Word,
4977	negated: `false`,
4978	})),
4979	union(
4980	span(`5`..`11`),
4981	vec![
4982	range(span(`5`..`8`), 'a', 'c'),
4983	range(span(`8`..`11`), 'x', 'z'),
4984	]
4985	),
4986	),
4987	})))
4988	);
4989	assert_eq!(
4990	parser(r"[a-cx-z&&\w]").parse(),
4991	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4992	span: span(`0`..`12`),
4993	negated: `false`,
4994	kind: intersection(
4995	span(`1`..`11`),
4996	union(
4997	span(`1`..`7`),
4998	vec![
4999	range(span(`1`..`4`), 'a', 'c'),
5000	range(span(`4`..`7`), 'x', 'z'),
5001	]
5002	),
5003	itemset(item_perl(ast::ClassPerl {
5004	span: span(`9`..`11`),
5005	kind: ast::ClassPerlKind::Word,
5006	negated: `false`,
5007	})),
5008	),
5009	})))
5010	);
5011	assert_eq!(
5012	parser(r"[a--b--c]").parse(),
5013	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5014	span: span(`0`..`9`),
5015	negated: `false`,
5016	kind: difference(
5017	span(`1`..`8`),
5018	difference(
5019	span(`1`..`5`),
5020	itemset(lit(span(`1`..`2`), 'a')),
5021	itemset(lit(span(`4`..`5`), 'b')),
5022	),
5023	itemset(lit(span(`7`..`8`), 'c')),
5024	),
5025	})))
5026	);
5027	assert_eq!(
5028	parser(r"[a~~b~~c]").parse(),
5029	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5030	span: span(`0`..`9`),
5031	negated: `false`,
5032	kind: symdifference(
5033	span(`1`..`8`),
5034	symdifference(
5035	span(`1`..`5`),
5036	itemset(lit(span(`1`..`2`), 'a')),
5037	itemset(lit(span(`4`..`5`), 'b')),
5038	),
5039	itemset(lit(span(`7`..`8`), 'c')),
5040	),
5041	})))
5042	);
5043	assert_eq!(
5044	parser(r"[\^&&^]").parse(),
5045	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5046	span: span(`0`..`7`),
5047	negated: `false`,
5048	kind: intersection(
5049	span(`1`..`6`),
5050	itemset(ast::ClassSetItem::Literal(ast::Literal {
5051	span: span(`1`..`3`),
5052	kind: ast::LiteralKind::Punctuation,
5053	c: '^',
5054	})),
5055	itemset(lit(span(`5`..`6`), '^')),
5056	),
5057	})))
5058	);
5059	assert_eq!(
5060	parser(r"[\&&&&]").parse(),
5061	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5062	span: span(`0`..`7`),
5063	negated: `false`,
5064	kind: intersection(
5065	span(`1`..`6`),
5066	itemset(ast::ClassSetItem::Literal(ast::Literal {
5067	span: span(`1`..`3`),
5068	kind: ast::LiteralKind::Punctuation,
5069	c: '&',
5070	})),
5071	itemset(lit(span(`5`..`6`), '&')),
5072	),
5073	})))
5074	);
5075	assert_eq!(
5076	parser(r"[&&&&]").parse(),
5077	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5078	span: span(`0`..`6`),
5079	negated: `false`,
5080	kind: intersection(
5081	span(`1`..`5`),
5082	intersection(
5083	span(`1`..`3`),
5084	itemset(empty(span(`1`..`1`))),
5085	itemset(empty(span(`3`..`3`))),
5086	),
5087	itemset(empty(span(`5`..`5`))),
5088	),
5089	})))
5090	);
5091
5092	let pat = "[☃-⛄]";
5093	assert_eq!(
5094	parser(pat).parse(),
5095	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5096	span: span_range(pat, `0`..`9`),
5097	negated: `false`,
5098	kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5099	span: span_range(pat, `1`..`8`),
5100	start: ast::Literal {
5101	span: span_range(pat, `1`..`4`),
5102	kind: ast::LiteralKind::Verbatim,
5103	c: '☃',
5104	},
5105	end: ast::Literal {
5106	span: span_range(pat, `5`..`8`),
5107	kind: ast::LiteralKind::Verbatim,
5108	c: '⛄',
5109	},
5110	})),
5111	})))
5112	);
5113
5114	assert_eq!(
5115	parser(r"[]]").parse(),
5116	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5117	span: span(`0`..`3`),
5118	negated: `false`,
5119	kind: itemset(lit(span(`1`..`2`), ']')),
5120	})))
5121	);
5122	assert_eq!(
5123	parser(r"[]\[]").parse(),
5124	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5125	span: span(`0`..`5`),
5126	negated: `false`,
5127	kind: union(
5128	span(`1`..`4`),
5129	vec![
5130	lit(span(`1`..`2`), ']'),
5131	ast::ClassSetItem::Literal(ast::Literal {
5132	span: span(`2`..`4`),
5133	kind: ast::LiteralKind::Punctuation,
5134	c: '[',
5135	}),
5136	]
5137	),
5138	})))
5139	);
5140	assert_eq!(
5141	parser(r"[\[]]").parse(),
5142	Ok(concat(
5143	`0`..`5`,
5144	vec![
5145	Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5146	span: span(`0`..`4`),
5147	negated: `false`,
5148	kind: itemset(ast::ClassSetItem::Literal(
5149	ast::Literal {
5150	span: span(`1`..`3`),
5151	kind: ast::LiteralKind::Punctuation,
5152	c: '[',
5153	}
5154	)),
5155	})),
5156	Ast::Literal(ast::Literal {
5157	span: span(`4`..`5`),
5158	kind: ast::LiteralKind::Verbatim,
5159	c: ']',
5160	}),
5161	]
5162	))
5163	);
5164
5165	assert_eq!(
5166	parser("[").parse().unwrap_err(),
5167	TestError {
5168	span: span(`0`..`1`),
5169	kind: ast::ErrorKind::ClassUnclosed,
5170	}
5171	);
5172	assert_eq!(
5173	parser("[[").parse().unwrap_err(),
5174	TestError {
5175	span: span(`1`..`2`),
5176	kind: ast::ErrorKind::ClassUnclosed,
5177	}
5178	);
5179	assert_eq!(
5180	parser("[[-]").parse().unwrap_err(),
5181	TestError {
5182	span: span(`0`..`1`),
5183	kind: ast::ErrorKind::ClassUnclosed,
5184	}
5185	);
5186	assert_eq!(
5187	parser("[[[:alnum:]").parse().unwrap_err(),
5188	TestError {
5189	span: span(`1`..`2`),
5190	kind: ast::ErrorKind::ClassUnclosed,
5191	}
5192	);
5193	assert_eq!(
5194	parser(r"[\b]").parse().unwrap_err(),
5195	TestError {
5196	span: span(`1`..`3`),
5197	kind: ast::ErrorKind::ClassEscapeInvalid,
5198	}
5199	);
5200	assert_eq!(
5201	parser(r"[\w-a]").parse().unwrap_err(),
5202	TestError {
5203	span: span(`1`..`3`),
5204	kind: ast::ErrorKind::ClassRangeLiteral,
5205	}
5206	);
5207	assert_eq!(
5208	parser(r"[a-\w]").parse().unwrap_err(),
5209	TestError {
5210	span: span(`3`..`5`),
5211	kind: ast::ErrorKind::ClassRangeLiteral,
5212	}
5213	);
5214	assert_eq!(
5215	parser(r"[z-a]").parse().unwrap_err(),
5216	TestError {
5217	span: span(`1`..`4`),
5218	kind: ast::ErrorKind::ClassRangeInvalid,
5219	}
5220	);
5221
5222	assert_eq!(
5223	parser_ignore_whitespace("[a ").parse().unwrap_err(),
5224	TestError {
5225	span: span(`0`..`1`),
5226	kind: ast::ErrorKind::ClassUnclosed,
5227	}
5228	);
5229	assert_eq!(
5230	parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5231	TestError {
5232	span: span(`0`..`1`),
5233	kind: ast::ErrorKind::ClassUnclosed,
5234	}
5235	);
5236	}
5237
5238	#[test]
5239	fn parse_set_class_open() {
5240	assert_eq!(parser("[a]").parse_set_class_open(), {
5241	let set = ast::ClassBracketed {
5242	span: span(`0`..`1`),
5243	negated: `false`,
5244	kind: ast::ClassSet::union(ast::ClassSetUnion {
5245	span: span(`1`..`1`),
5246	items: vec![],
5247	}),
5248	};
5249	let union = ast::ClassSetUnion { span: span(`1`..`1`), items: vec![] };
5250	Ok((set, union))
5251	});
5252	assert_eq!(
5253	parser_ignore_whitespace("[ a]").parse_set_class_open(),
5254	{
5255	let set = ast::ClassBracketed {
5256	span: span(`0`..`4`),
5257	negated: `false`,
5258	kind: ast::ClassSet::union(ast::ClassSetUnion {
5259	span: span(`4`..`4`),
5260	items: vec![],
5261	}),
5262	};
5263	let union =
5264	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5265	Ok((set, union))
5266	}
5267	);
5268	assert_eq!(parser("[^a]").parse_set_class_open(), {
5269	let set = ast::ClassBracketed {
5270	span: span(`0`..`2`),
5271	negated: `true`,
5272	kind: ast::ClassSet::union(ast::ClassSetUnion {
5273	span: span(`2`..`2`),
5274	items: vec![],
5275	}),
5276	};
5277	let union = ast::ClassSetUnion { span: span(`2`..`2`), items: vec![] };
5278	Ok((set, union))
5279	});
5280	assert_eq!(
5281	parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5282	{
5283	let set = ast::ClassBracketed {
5284	span: span(`0`..`4`),
5285	negated: `true`,
5286	kind: ast::ClassSet::union(ast::ClassSetUnion {
5287	span: span(`4`..`4`),
5288	items: vec![],
5289	}),
5290	};
5291	let union =
5292	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5293	Ok((set, union))
5294	}
5295	);
5296	assert_eq!(parser("[-a]").parse_set_class_open(), {
5297	let set = ast::ClassBracketed {
5298	span: span(`0`..`2`),
5299	negated: `false`,
5300	kind: ast::ClassSet::union(ast::ClassSetUnion {
5301	span: span(`1`..`1`),
5302	items: vec![],
5303	}),
5304	};
5305	let union = ast::ClassSetUnion {
5306	span: span(`1`..`2`),
5307	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5308	span: span(`1`..`2`),
5309	kind: ast::LiteralKind::Verbatim,
5310	c: '-',
5311	})],
5312	};
5313	Ok((set, union))
5314	});
5315	assert_eq!(
5316	parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5317	{
5318	let set = ast::ClassBracketed {
5319	span: span(`0`..`4`),
5320	negated: `false`,
5321	kind: ast::ClassSet::union(ast::ClassSetUnion {
5322	span: span(`2`..`2`),
5323	items: vec![],
5324	}),
5325	};
5326	let union = ast::ClassSetUnion {
5327	span: span(`2`..`3`),
5328	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5329	span: span(`2`..`3`),
5330	kind: ast::LiteralKind::Verbatim,
5331	c: '-',
5332	})],
5333	};
5334	Ok((set, union))
5335	}
5336	);
5337	assert_eq!(parser("[^-a]").parse_set_class_open(), {
5338	let set = ast::ClassBracketed {
5339	span: span(`0`..`3`),
5340	negated: `true`,
5341	kind: ast::ClassSet::union(ast::ClassSetUnion {
5342	span: span(`2`..`2`),
5343	items: vec![],
5344	}),
5345	};
5346	let union = ast::ClassSetUnion {
5347	span: span(`2`..`3`),
5348	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5349	span: span(`2`..`3`),
5350	kind: ast::LiteralKind::Verbatim,
5351	c: '-',
5352	})],
5353	};
5354	Ok((set, union))
5355	});
5356	assert_eq!(parser("[--a]").parse_set_class_open(), {
5357	let set = ast::ClassBracketed {
5358	span: span(`0`..`3`),
5359	negated: `false`,
5360	kind: ast::ClassSet::union(ast::ClassSetUnion {
5361	span: span(`1`..`1`),
5362	items: vec![],
5363	}),
5364	};
5365	let union = ast::ClassSetUnion {
5366	span: span(`1`..`3`),
5367	items: vec![
5368	ast::ClassSetItem::Literal(ast::Literal {
5369	span: span(`1`..`2`),
5370	kind: ast::LiteralKind::Verbatim,
5371	c: '-',
5372	}),
5373	ast::ClassSetItem::Literal(ast::Literal {
5374	span: span(`2`..`3`),
5375	kind: ast::LiteralKind::Verbatim,
5376	c: '-',
5377	}),
5378	],
5379	};
5380	Ok((set, union))
5381	});
5382	assert_eq!(parser("[]a]").parse_set_class_open(), {
5383	let set = ast::ClassBracketed {
5384	span: span(`0`..`2`),
5385	negated: `false`,
5386	kind: ast::ClassSet::union(ast::ClassSetUnion {
5387	span: span(`1`..`1`),
5388	items: vec![],
5389	}),
5390	};
5391	let union = ast::ClassSetUnion {
5392	span: span(`1`..`2`),
5393	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5394	span: span(`1`..`2`),
5395	kind: ast::LiteralKind::Verbatim,
5396	c: ']',
5397	})],
5398	};
5399	Ok((set, union))
5400	});
5401	assert_eq!(
5402	parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5403	{
5404	let set = ast::ClassBracketed {
5405	span: span(`0`..`4`),
5406	negated: `false`,
5407	kind: ast::ClassSet::union(ast::ClassSetUnion {
5408	span: span(`2`..`2`),
5409	items: vec![],
5410	}),
5411	};
5412	let union = ast::ClassSetUnion {
5413	span: span(`2`..`3`),
5414	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5415	span: span(`2`..`3`),
5416	kind: ast::LiteralKind::Verbatim,
5417	c: ']',
5418	})],
5419	};
5420	Ok((set, union))
5421	}
5422	);
5423	assert_eq!(parser("[^]a]").parse_set_class_open(), {
5424	let set = ast::ClassBracketed {
5425	span: span(`0`..`3`),
5426	negated: `true`,
5427	kind: ast::ClassSet::union(ast::ClassSetUnion {
5428	span: span(`2`..`2`),
5429	items: vec![],
5430	}),
5431	};
5432	let union = ast::ClassSetUnion {
5433	span: span(`2`..`3`),
5434	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5435	span: span(`2`..`3`),
5436	kind: ast::LiteralKind::Verbatim,
5437	c: ']',
5438	})],
5439	};
5440	Ok((set, union))
5441	});
5442	assert_eq!(parser("[-]a]").parse_set_class_open(), {
5443	let set = ast::ClassBracketed {
5444	span: span(`0`..`2`),
5445	negated: `false`,
5446	kind: ast::ClassSet::union(ast::ClassSetUnion {
5447	span: span(`1`..`1`),
5448	items: vec![],
5449	}),
5450	};
5451	let union = ast::ClassSetUnion {
5452	span: span(`1`..`2`),
5453	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5454	span: span(`1`..`2`),
5455	kind: ast::LiteralKind::Verbatim,
5456	c: '-',
5457	})],
5458	};
5459	Ok((set, union))
5460	});
5461
5462	assert_eq!(
5463	parser("[").parse_set_class_open().unwrap_err(),
5464	TestError {
5465	span: span(`0`..`1`),
5466	kind: ast::ErrorKind::ClassUnclosed,
5467	}
5468	);
5469	assert_eq!(
5470	parser_ignore_whitespace("[ ")
5471	.parse_set_class_open()
5472	.unwrap_err(),
5473	TestError {
5474	span: span(`0`..`5`),
5475	kind: ast::ErrorKind::ClassUnclosed,
5476	}
5477	);
5478	assert_eq!(
5479	parser("[^").parse_set_class_open().unwrap_err(),
5480	TestError {
5481	span: span(`0`..`2`),
5482	kind: ast::ErrorKind::ClassUnclosed,
5483	}
5484	);
5485	assert_eq!(
5486	parser("[]").parse_set_class_open().unwrap_err(),
5487	TestError {
5488	span: span(`0`..`2`),
5489	kind: ast::ErrorKind::ClassUnclosed,
5490	}
5491	);
5492	assert_eq!(
5493	parser("[-").parse_set_class_open().unwrap_err(),
5494	TestError {
5495	span: span(`0`..`0`),
5496	kind: ast::ErrorKind::ClassUnclosed,
5497	}
5498	);
5499	assert_eq!(
5500	parser("[--").parse_set_class_open().unwrap_err(),
5501	TestError {
5502	span: span(`0`..`0`),
5503	kind: ast::ErrorKind::ClassUnclosed,
5504	}
5505	);
5506
5507	// See: https://github.com/rust-lang/regex/issues/792
5508	assert_eq!(
5509	parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5510	TestError {
5511	span: span(`4`..`4`),
5512	kind: ast::ErrorKind::ClassUnclosed,
5513	}
5514	);
5515	}
5516
5517	#[test]
5518	fn maybe_parse_ascii_class() {
5519	assert_eq!(
5520	parser(r"[:alnum:]").maybe_parse_ascii_class(),
5521	Some(ast::ClassAscii {
5522	span: span(`0`..`9`),
5523	kind: ast::ClassAsciiKind::Alnum,
5524	negated: `false`,
5525	})
5526	);
5527	assert_eq!(
5528	parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5529	Some(ast::ClassAscii {
5530	span: span(`0`..`9`),
5531	kind: ast::ClassAsciiKind::Alnum,
5532	negated: `false`,
5533	})
5534	);
5535	assert_eq!(
5536	parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5537	Some(ast::ClassAscii {
5538	span: span(`0`..`10`),
5539	kind: ast::ClassAsciiKind::Alnum,
5540	negated: `true`,
5541	})
5542	);
5543
5544	let p = parser(r"[:");
5545	assert_eq!(p.maybe_parse_ascii_class(), None);
5546	assert_eq!(p.offset(), `0`);
5547
5548	let p = parser(r"[:^");
5549	assert_eq!(p.maybe_parse_ascii_class(), None);
5550	assert_eq!(p.offset(), `0`);
5551
5552	let p = parser(r"[^:alnum:]");
5553	assert_eq!(p.maybe_parse_ascii_class(), None);
5554	assert_eq!(p.offset(), `0`);
5555
5556	let p = parser(r"[:alnnum:]");
5557	assert_eq!(p.maybe_parse_ascii_class(), None);
5558	assert_eq!(p.offset(), `0`);
5559
5560	let p = parser(r"[:alnum]");
5561	assert_eq!(p.maybe_parse_ascii_class(), None);
5562	assert_eq!(p.offset(), `0`);
5563
5564	let p = parser(r"[:alnum:");
5565	assert_eq!(p.maybe_parse_ascii_class(), None);
5566	assert_eq!(p.offset(), `0`);
5567	}
5568
5569	#[test]
5570	fn parse_unicode_class() {
5571	assert_eq!(
5572	parser(r"\pN").parse_escape(),
5573	Ok(Primitive::Unicode(ast::ClassUnicode {
5574	span: span(`0`..`3`),
5575	negated: `false`,
5576	kind: ast::ClassUnicodeKind::OneLetter('N'),
5577	}))
5578	);
5579	assert_eq!(
5580	parser(r"\PN").parse_escape(),
5581	Ok(Primitive::Unicode(ast::ClassUnicode {
5582	span: span(`0`..`3`),
5583	negated: `true`,
5584	kind: ast::ClassUnicodeKind::OneLetter('N'),
5585	}))
5586	);
5587	assert_eq!(
5588	parser(r"\p{N}").parse_escape(),
5589	Ok(Primitive::Unicode(ast::ClassUnicode {
5590	span: span(`0`..`5`),
5591	negated: `false`,
5592	kind: ast::ClassUnicodeKind::Named(s("N")),
5593	}))
5594	);
5595	assert_eq!(
5596	parser(r"\P{N}").parse_escape(),
5597	Ok(Primitive::Unicode(ast::ClassUnicode {
5598	span: span(`0`..`5`),
5599	negated: `true`,
5600	kind: ast::ClassUnicodeKind::Named(s("N")),
5601	}))
5602	);
5603	assert_eq!(
5604	parser(r"\p{Greek}").parse_escape(),
5605	Ok(Primitive::Unicode(ast::ClassUnicode {
5606	span: span(`0`..`9`),
5607	negated: `false`,
5608	kind: ast::ClassUnicodeKind::Named(s("Greek")),
5609	}))
5610	);
5611
5612	assert_eq!(
5613	parser(r"\p{scx:Katakana}").parse_escape(),
5614	Ok(Primitive::Unicode(ast::ClassUnicode {
5615	span: span(`0`..`16`),
5616	negated: `false`,
5617	kind: ast::ClassUnicodeKind::NamedValue {
5618	op: ast::ClassUnicodeOpKind::Colon,
5619	name: s("scx"),
5620	value: s("Katakana"),
5621	},
5622	}))
5623	);
5624	assert_eq!(
5625	parser(r"\p{scx=Katakana}").parse_escape(),
5626	Ok(Primitive::Unicode(ast::ClassUnicode {
5627	span: span(`0`..`16`),
5628	negated: `false`,
5629	kind: ast::ClassUnicodeKind::NamedValue {
5630	op: ast::ClassUnicodeOpKind::Equal,
5631	name: s("scx"),
5632	value: s("Katakana"),
5633	},
5634	}))
5635	);
5636	assert_eq!(
5637	parser(r"\p{scx!=Katakana}").parse_escape(),
5638	Ok(Primitive::Unicode(ast::ClassUnicode {
5639	span: span(`0`..`17`),
5640	negated: `false`,
5641	kind: ast::ClassUnicodeKind::NamedValue {
5642	op: ast::ClassUnicodeOpKind::NotEqual,
5643	name: s("scx"),
5644	value: s("Katakana"),
5645	},
5646	}))
5647	);
5648
5649	assert_eq!(
5650	parser(r"\p{:}").parse_escape(),
5651	Ok(Primitive::Unicode(ast::ClassUnicode {
5652	span: span(`0`..`5`),
5653	negated: `false`,
5654	kind: ast::ClassUnicodeKind::NamedValue {
5655	op: ast::ClassUnicodeOpKind::Colon,
5656	name: s(""),
5657	value: s(""),
5658	},
5659	}))
5660	);
5661	assert_eq!(
5662	parser(r"\p{=}").parse_escape(),
5663	Ok(Primitive::Unicode(ast::ClassUnicode {
5664	span: span(`0`..`5`),
5665	negated: `false`,
5666	kind: ast::ClassUnicodeKind::NamedValue {
5667	op: ast::ClassUnicodeOpKind::Equal,
5668	name: s(""),
5669	value: s(""),
5670	},
5671	}))
5672	);
5673	assert_eq!(
5674	parser(r"\p{!=}").parse_escape(),
5675	Ok(Primitive::Unicode(ast::ClassUnicode {
5676	span: span(`0`..`6`),
5677	negated: `false`,
5678	kind: ast::ClassUnicodeKind::NamedValue {
5679	op: ast::ClassUnicodeOpKind::NotEqual,
5680	name: s(""),
5681	value: s(""),
5682	},
5683	}))
5684	);
5685
5686	assert_eq!(
5687	parser(r"\p").parse_escape().unwrap_err(),
5688	TestError {
5689	span: span(`2`..`2`),
5690	kind: ast::ErrorKind::EscapeUnexpectedEof,
5691	}
5692	);
5693	assert_eq!(
5694	parser(r"\p{").parse_escape().unwrap_err(),
5695	TestError {
5696	span: span(`3`..`3`),
5697	kind: ast::ErrorKind::EscapeUnexpectedEof,
5698	}
5699	);
5700	assert_eq!(
5701	parser(r"\p{N").parse_escape().unwrap_err(),
5702	TestError {
5703	span: span(`4`..`4`),
5704	kind: ast::ErrorKind::EscapeUnexpectedEof,
5705	}
5706	);
5707	assert_eq!(
5708	parser(r"\p{Greek").parse_escape().unwrap_err(),
5709	TestError {
5710	span: span(`8`..`8`),
5711	kind: ast::ErrorKind::EscapeUnexpectedEof,
5712	}
5713	);
5714
5715	assert_eq!(
5716	parser(r"\pNz").parse(),
5717	Ok(Ast::Concat(ast::Concat {
5718	span: span(`0`..`4`),
5719	asts: vec![
5720	Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5721	span: span(`0`..`3`),
5722	negated: `false`,
5723	kind: ast::ClassUnicodeKind::OneLetter('N'),
5724	})),
5725	Ast::Literal(ast::Literal {
5726	span: span(`3`..`4`),
5727	kind: ast::LiteralKind::Verbatim,
5728	c: 'z',
5729	}),
5730	],
5731	}))
5732	);
5733	assert_eq!(
5734	parser(r"\p{Greek}z").parse(),
5735	Ok(Ast::Concat(ast::Concat {
5736	span: span(`0`..`10`),
5737	asts: vec![
5738	Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5739	span: span(`0`..`9`),
5740	negated: `false`,
5741	kind: ast::ClassUnicodeKind::Named(s("Greek")),
5742	})),
5743	Ast::Literal(ast::Literal {
5744	span: span(`9`..`10`),
5745	kind: ast::LiteralKind::Verbatim,
5746	c: 'z',
5747	}),
5748	],
5749	}))
5750	);
5751	assert_eq!(
5752	parser(r"\p\{").parse().unwrap_err(),
5753	TestError {
5754	span: span(`2`..`3`),
5755	kind: ast::ErrorKind::UnicodeClassInvalid,
5756	}
5757	);
5758	assert_eq!(
5759	parser(r"\P\{").parse().unwrap_err(),
5760	TestError {
5761	span: span(`2`..`3`),
5762	kind: ast::ErrorKind::UnicodeClassInvalid,
5763	}
5764	);
5765	}
5766
5767	#[test]
5768	fn parse_perl_class() {
5769	assert_eq!(
5770	parser(r"\d").parse_escape(),
5771	Ok(Primitive::Perl(ast::ClassPerl {
5772	span: span(`0`..`2`),
5773	kind: ast::ClassPerlKind::Digit,
5774	negated: `false`,
5775	}))
5776	);
5777	assert_eq!(
5778	parser(r"\D").parse_escape(),
5779	Ok(Primitive::Perl(ast::ClassPerl {
5780	span: span(`0`..`2`),
5781	kind: ast::ClassPerlKind::Digit,
5782	negated: `true`,
5783	}))
5784	);
5785	assert_eq!(
5786	parser(r"\s").parse_escape(),
5787	Ok(Primitive::Perl(ast::ClassPerl {
5788	span: span(`0`..`2`),
5789	kind: ast::ClassPerlKind::Space,
5790	negated: `false`,
5791	}))
5792	);
5793	assert_eq!(
5794	parser(r"\S").parse_escape(),
5795	Ok(Primitive::Perl(ast::ClassPerl {
5796	span: span(`0`..`2`),
5797	kind: ast::ClassPerlKind::Space,
5798	negated: `true`,
5799	}))
5800	);
5801	assert_eq!(
5802	parser(r"\w").parse_escape(),
5803	Ok(Primitive::Perl(ast::ClassPerl {
5804	span: span(`0`..`2`),
5805	kind: ast::ClassPerlKind::Word,
5806	negated: `false`,
5807	}))
5808	);
5809	assert_eq!(
5810	parser(r"\W").parse_escape(),
5811	Ok(Primitive::Perl(ast::ClassPerl {
5812	span: span(`0`..`2`),
5813	kind: ast::ClassPerlKind::Word,
5814	negated: `true`,
5815	}))
5816	);
5817
5818	assert_eq!(
5819	parser(r"\d").parse(),
5820	Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5821	span: span(`0`..`2`),
5822	kind: ast::ClassPerlKind::Digit,
5823	negated: `false`,
5824	})))
5825	);
5826	assert_eq!(
5827	parser(r"\dz").parse(),
5828	Ok(Ast::Concat(ast::Concat {
5829	span: span(`0`..`3`),
5830	asts: vec![
5831	Ast::Class(ast::Class::Perl(ast::ClassPerl {
5832	span: span(`0`..`2`),
5833	kind: ast::ClassPerlKind::Digit,
5834	negated: `false`,
5835	})),
5836	Ast::Literal(ast::Literal {
5837	span: span(`2`..`3`),
5838	kind: ast::LiteralKind::Verbatim,
5839	c: 'z',
5840	}),
5841	],
5842	}))
5843	);
5844	}
5845
5846	// This tests a bug fix where the nest limit checker wasn't decrementing
5847	// its depth during post-traversal, which causes long regexes to trip
5848	// the default limit too aggressively.
5849	#[test]
5850	fn regression_454_nest_too_big() {
5851	let pattern = r#"
5852	2(?:
5853	[45]\d{3}\|
5854	7(?:
5855	1[0-267]\|
5856	2[0-289]\|
5857	3[0-29]\|
5858	4[01]\|
5859	5[1-3]\|
5860	6[013]\|
5861	7[0178]\|
5862	91
5863	)\|
5864	8(?:
5865	0[125]\|
5866	[139][1-6]\|
5867	2[0157-9]\|
5868	41\|
5869	6[1-35]\|
5870	7[1-5]\|
5871	8[1-8]\|
5872	90
5873	)\|
5874	9(?:
5875	0[0-2]\|
5876	1[0-4]\|
5877	2[568]\|
5878	3[3-6]\|
5879	5[5-7]\|
5880	6[0167]\|
5881	7[15]\|
5882	8[0146-9]
5883	)
5884	)\d{4}
5885	"#;
5886	assert!(parser_nest_limit(pattern, `50`).parse().is_ok());
5887	}
5888
5889	// This tests that we treat a trailing `-` in a character class as a
5890	// literal `-` even when whitespace mode is enabled and there is whitespace
5891	// after the trailing `-`.
5892	#[test]
5893	fn regression_455_trailing_dash_ignore_whitespace() {
5894	assert!(parser("(?x)[ / - ]").parse().is_ok());
5895	assert!(parser("(?x)[ a - ]").parse().is_ok());
5896	assert!(parser(
5897	"(?x)[
5898	a
5899	- ]
5900	"
5901	)
5902	.parse()
5903	.is_ok());
5904	assert!(parser(
5905	"(?x)[
5906	a # wat
5907	- ]
5908	"
5909	)
5910	.parse()
5911	.is_ok());
5912
5913	assert!(parser("(?x)[ / -").parse().is_err());
5914	assert!(parser("(?x)[ / - ").parse().is_err());
5915	assert!(parser(
5916	"(?x)[
5917	/ -
5918	"
5919	)
5920	.parse()
5921	.is_err());
5922	assert!(parser(
5923	"(?x)[
5924	/ - # wat
5925	"
5926	)
5927	.parse()
5928	.is_err());
5929	}
5930	}
5931