parse.rs source code [crates/regex-syntax-0.7.2/src/ast/parse.rs]

1	/!*
2	This module provides a regular expression parser.
3	*/
4
5	use core::{
6	borrow::Borrow,
7	cell::{Cell, RefCell},
8	mem,
9	};
10
11	use alloc::{
12	boxed::Box,
13	string::{String, ToString},
14	vec,
15	vec::Vec,
16	};
17
18	use crate::{
19	ast::{self, Ast, Position, Span},
20	either::Either,
21	is_escapeable_character, is_meta_character,
22	};
23
24	type Result<T> = core::result::Result<T, ast::Error>;
25
26	/// A primitive is an expression with no sub-expressions. This includes
27	/// literals, assertions and non-set character classes. This representation
28	/// is used as intermediate state in the parser.
29	///
30	/// This does not include ASCII character classes, since they can only appear
31	/// within a set character class.
32	#[derive(Clone, Debug, Eq, PartialEq)]
33	enum Primitive {
34	Literal(ast::Literal),
35	Assertion(ast::Assertion),
36	Dot(Span),
37	Perl(ast::ClassPerl),
38	Unicode(ast::ClassUnicode),
39	}
40
41	impl Primitive {
42	/// Return the span of this primitive.
43	fn span(&self) -> &Span {
44	match *self {
45	Primitive::Literal(ref x) => &x.span,
46	Primitive::Assertion(ref x) => &x.span,
47	Primitive::Dot(ref span) => span,
48	Primitive::Perl(ref x) => &x.span,
49	Primitive::Unicode(ref x) => &x.span,
50	}
51	}
52
53	/// Convert this primitive into a proper AST.
54	fn into_ast(self) -> Ast {
55	match self {
56	Primitive::Literal(lit) => Ast::Literal(lit),
57	Primitive::Assertion(assert) => Ast::Assertion(assert),
58	Primitive::Dot(span) => Ast::Dot(span),
59	Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
60	Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
61	}
62	}
63
64	/// Convert this primitive into an item in a character class.
65	///
66	/// If this primitive is not a legal item (i.e., an assertion or a dot),
67	/// then return an error.
68	fn into_class_set_item<P: Borrow<Parser>>(
69	self,
70	p: &ParserI<'_, P>,
71	) -> Result<ast::ClassSetItem> {
72	use self::Primitive::*;
73	use crate::ast::ClassSetItem;
74
75	match self {
76	Literal(lit) => Ok(ClassSetItem::Literal(lit)),
77	Perl(cls) => Ok(ClassSetItem::Perl(cls)),
78	Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
79	x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
80	}
81	}
82
83	/// Convert this primitive into a literal in a character class. In
84	/// particular, literals are the only valid items that can appear in
85	/// ranges.
86	///
87	/// If this primitive is not a legal item (i.e., a class, assertion or a
88	/// dot), then return an error.
89	fn into_class_literal<P: Borrow<Parser>>(
90	self,
91	p: &ParserI<'_, P>,
92	) -> Result<ast::Literal> {
93	use self::Primitive::*;
94
95	match self {
96	Literal(lit) => Ok(lit),
97	x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
98	}
99	}
100	}
101
102	/// Returns true if the given character is a hexadecimal digit.
103	fn is_hex(c: char) -> bool {
104	('0' <= c && c <= '9') \|\| ('a' <= c && c <= 'f') \|\| ('A' <= c && c <= 'F')
105	}
106
107	/// Returns true if the given character is a valid in a capture group name.
108	///
109	/// If `first` is true, then `c` is treated as the first character in the
110	/// group name (which must be alphabetic or underscore).
111	fn is_capture_char(c: char, first: bool) -> bool {
112	if first {
113	c == '_' \|\| c.is_alphabetic()
114	} else {
115	c == '_' \|\| c == '.' \|\| c == '[' \|\| c == ']' \|\| c.is_alphanumeric()
116	}
117	}
118
119	/// A builder for a regular expression parser.
120	///
121	/// This builder permits modifying configuration options for the parser.
122	#[derive(Clone, Debug)]
123	pub struct ParserBuilder {
124	ignore_whitespace: bool,
125	nest_limit: u32,
126	octal: bool,
127	}
128
129	impl Default for ParserBuilder {
130	fn default() -> ParserBuilder {
131	ParserBuilder::new()
132	}
133	}
134
135	impl ParserBuilder {
136	/// Create a new parser builder with a default configuration.
137	pub fn new() -> ParserBuilder {
138	ParserBuilder {
139	ignore_whitespace: `false`,
140	nest_limit: `250`,
141	octal: `false`,
142	}
143	}
144
145	/// Build a parser from this configuration with the given pattern.
146	pub fn build(&self) -> Parser {
147	Parser {
148	pos: Cell::new(Position { offset: `0`, line: `1`, column: `1` }),
149	capture_index: Cell::new(`0`),
150	nest_limit: self.nest_limit,
151	octal: self.octal,
152	initial_ignore_whitespace: self.ignore_whitespace,
153	ignore_whitespace: Cell::new(self.ignore_whitespace),
154	comments: RefCell::new(vec![]),
155	stack_group: RefCell::new(vec![]),
156	stack_class: RefCell::new(vec![]),
157	capture_names: RefCell::new(vec![]),
158	scratch: RefCell::new(String::new()),
159	}
160	}
161
162	/// Set the nesting limit for this parser.
163	///
164	/// The nesting limit controls how deep the abstract syntax tree is allowed
165	/// to be. If the AST exceeds the given limit (e.g., with too many nested
166	/// groups), then an error is returned by the parser.
167	///
168	/// The purpose of this limit is to act as a heuristic to prevent stack
169	/// overflow for consumers that do structural induction on an `Ast` using
170	/// explicit recursion. While this crate never does this (instead using
171	/// constant stack space and moving the call stack to the heap), other
172	/// crates may.
173	///
174	/// This limit is not checked until the entire AST is parsed. Therefore,
175	/// if callers want to put a limit on the amount of heap space used, then
176	/// they should impose a limit on the length, in bytes, of the concrete
177	/// pattern string. In particular, this is viable since this parser
178	/// implementation will limit itself to heap space proportional to the
179	/// length of the pattern string.
180	///
181	/// Note that a nest limit of `0` will return a nest limit error for most
182	/// patterns but not all. For example, a nest limit of `0` permits `a` but
183	/// not `ab`, since `ab` requires a concatenation, which results in a nest
184	/// depth of `1`. In general, a nest limit is not something that manifests
185	/// in an obvious way in the concrete syntax, therefore, it should not be
186	/// used in a granular way.
187	pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
188	self.nest_limit = limit;
189	self
190	}
191
192	/// Whether to support octal syntax or not.
193	///
194	/// Octal syntax is a little-known way of uttering Unicode codepoints in
195	/// a regular expression. For example, `a`, `\x61`, `\u0061` and
196	/// `\141` are all equivalent regular expressions, where the last example
197	/// shows octal syntax.
198	///
199	/// While supporting octal syntax isn't in and of itself a problem, it does
200	/// make good error messages harder. That is, in PCRE based regex engines,
201	/// syntax like `\0` invokes a backreference, which is explicitly
202	/// unsupported in Rust's regex engine. However, many users expect it to
203	/// be supported. Therefore, when octal support is disabled, the error
204	/// message will explicitly mention that backreferences aren't supported.
205	///
206	/// Octal syntax is disabled by default.
207	pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
208	self.octal = yes;
209	self
210	}
211
212	/// Enable verbose mode in the regular expression.
213	///
214	/// When enabled, verbose mode permits insignificant whitespace in many
215	/// places in the regular expression, as well as comments. Comments are
216	/// started using `#` and continue until the end of the line.
217	///
218	/// By default, this is disabled. It may be selectively enabled in the
219	/// regular expression by using the `x` flag regardless of this setting.
220	pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
221	self.ignore_whitespace = yes;
222	self
223	}
224	}
225
226	/// A regular expression parser.
227	///
228	/// This parses a string representation of a regular expression into an
229	/// abstract syntax tree. The size of the tree is proportional to the length
230	/// of the regular expression pattern.
231	///
232	/// A `Parser` can be configured in more detail via a [`ParserBuilder`].
233	#[derive(Clone, Debug)]
234	pub struct Parser {
235	/// The current position of the parser.
236	pos: Cell<Position>,
237	/// The current capture index.
238	capture_index: Cell<u32>,
239	/// The maximum number of open parens/brackets allowed. If the parser
240	/// exceeds this number, then an error is returned.
241	nest_limit: u32,
242	/// Whether to support octal syntax or not. When `false`, the parser will
243	/// return an error helpfully pointing out that backreferences are not
244	/// supported.
245	octal: bool,
246	/// The initial setting for `ignore_whitespace` as provided by
247	/// `ParserBuilder`. It is used when resetting the parser's state.
248	initial_ignore_whitespace: bool,
249	/// Whether whitespace should be ignored. When enabled, comments are
250	/// also permitted.
251	ignore_whitespace: Cell<bool>,
252	/// A list of comments, in order of appearance.
253	comments: RefCell<Vec<ast::Comment>>,
254	/// A stack of grouped sub-expressions, including alternations.
255	stack_group: RefCell<Vec<GroupState>>,
256	/// A stack of nested character classes. This is only non-empty when
257	/// parsing a class.
258	stack_class: RefCell<Vec<ClassState>>,
259	/// A sorted sequence of capture names. This is used to detect duplicate
260	/// capture names and report an error if one is detected.
261	capture_names: RefCell<Vec<ast::CaptureName>>,
262	/// A scratch buffer used in various places. Mostly this is used to
263	/// accumulate relevant characters from parts of a pattern.
264	scratch: RefCell<String>,
265	}
266
267	/// ParserI is the internal parser implementation.
268	///
269	/// We use this separate type so that we can carry the provided pattern string
270	/// along with us. In particular, a `Parser` internal state is not tied to any
271	/// one pattern, but `ParserI` is.
272	///
273	/// This type also lets us use `ParserI<&Parser>` in production code while
274	/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
275	/// work against the internal interface of the parser.
276	#[derive(Clone, Debug)]
277	struct ParserI<'s, P> {
278	/// The parser state/configuration.
279	parser: P,
280	/// The full regular expression provided by the user.
281	pattern: &'s str,
282	}
283
284	/// GroupState represents a single stack frame while parsing nested groups
285	/// and alternations. Each frame records the state up to an opening parenthesis
286	/// or a alternating bracket `\|`.
287	#[derive(Clone, Debug)]
288	enum GroupState {
289	/// This state is pushed whenever an opening group is found.
290	Group {
291	/// The concatenation immediately preceding the opening group.
292	concat: ast::Concat,
293	/// The group that has been opened. Its sub-AST is always empty.
294	group: ast::Group,
295	/// Whether this group has the `x` flag enabled or not.
296	ignore_whitespace: bool,
297	},
298	/// This state is pushed whenever a new alternation branch is found. If
299	/// an alternation branch is found and this state is at the top of the
300	/// stack, then this state should be modified to include the new
301	/// alternation.
302	Alternation(ast::Alternation),
303	}
304
305	/// ClassState represents a single stack frame while parsing character classes.
306	/// Each frame records the state up to an intersection, difference, symmetric
307	/// difference or nested class.
308	///
309	/// Note that a parser's character class stack is only non-empty when parsing
310	/// a character class. In all other cases, it is empty.
311	#[derive(Clone, Debug)]
312	enum ClassState {
313	/// This state is pushed whenever an opening bracket is found.
314	Open {
315	/// The union of class items immediately preceding this class.
316	union: ast::ClassSetUnion,
317	/// The class that has been opened. Typically this just corresponds
318	/// to the `[`, but it can also include `[^` since `^` indicates
319	/// negation of the class.
320	set: ast::ClassBracketed,
321	},
322	/// This state is pushed when a operator is seen. When popped, the stored
323	/// set becomes the left hand side of the operator.
324	Op {
325	/// The type of the operation, i.e., &&, -- or ~~.
326	kind: ast::ClassSetBinaryOpKind,
327	/// The left-hand side of the operator.
328	lhs: ast::ClassSet,
329	},
330	}
331
332	impl Parser {
333	/// Create a new parser with a default configuration.
334	///
335	/// The parser can be run with either the `parse` or `parse_with_comments`
336	/// methods. The parse methods return an abstract syntax tree.
337	///
338	/// To set configuration options on the parser, use [`ParserBuilder`].
339	pub fn new() -> Parser {
340	ParserBuilder::new().build()
341	}
342
343	/// Parse the regular expression into an abstract syntax tree.
344	pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
345	ParserI::new(self, pattern).parse()
346	}
347
348	/// Parse the regular expression and return an abstract syntax tree with
349	/// all of the comments found in the pattern.
350	pub fn parse_with_comments(
351	&mut self,
352	pattern: &str,
353	) -> Result<ast::WithComments> {
354	ParserI::new(self, pattern).parse_with_comments()
355	}
356
357	/// Reset the internal state of a parser.
358	///
359	/// This is called at the beginning of every parse. This prevents the
360	/// parser from running with inconsistent state (say, if a previous
361	/// invocation returned an error and the parser is reused).
362	fn reset(&self) {
363	// These settings should be in line with the construction
364	// in `ParserBuilder::build`.
365	self.pos.set(Position { offset: `0`, line: `1`, column: `1` });
366	self.ignore_whitespace.set(self.initial_ignore_whitespace);
367	self.comments.borrow_mut().clear();
368	self.stack_group.borrow_mut().clear();
369	self.stack_class.borrow_mut().clear();
370	}
371	}
372
373	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
374	/// Build an internal parser from a parser configuration and a pattern.
375	fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
376	ParserI { parser, pattern }
377	}
378
379	/// Return a reference to the parser state.
380	fn parser(&self) -> &Parser {
381	self.parser.borrow()
382	}
383
384	/// Return a reference to the pattern being parsed.
385	fn pattern(&self) -> &str {
386	self.pattern.borrow()
387	}
388
389	/// Create a new error with the given span and error type.
390	fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
391	ast::Error { kind, pattern: self.pattern().to_string(), span }
392	}
393
394	/// Return the current offset of the parser.
395	///
396	/// The offset starts at `0` from the beginning of the regular expression
397	/// pattern string.
398	fn offset(&self) -> usize {
399	self.parser().pos.get().offset
400	}
401
402	/// Return the current line number of the parser.
403	///
404	/// The line number starts at `1`.
405	fn line(&self) -> usize {
406	self.parser().pos.get().line
407	}
408
409	/// Return the current column of the parser.
410	///
411	/// The column number starts at `1` and is reset whenever a `\n` is seen.
412	fn column(&self) -> usize {
413	self.parser().pos.get().column
414	}
415
416	/// Return the next capturing index. Each subsequent call increments the
417	/// internal index.
418	///
419	/// The span given should correspond to the location of the opening
420	/// parenthesis.
421	///
422	/// If the capture limit is exceeded, then an error is returned.
423	fn next_capture_index(&self, span: Span) -> Result<u32> {
424	let current = self.parser().capture_index.get();
425	let i = current.checked_add(`1`).ok_or_else(\|\| {
426	self.error(span, ast::ErrorKind::CaptureLimitExceeded)
427	})?;
428	self.parser().capture_index.set(i);
429	Ok(i)
430	}
431
432	/// Adds the given capture name to this parser. If this capture name has
433	/// already been used, then an error is returned.
434	fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
435	let mut names = self.parser().capture_names.borrow_mut();
436	match names
437	.binary_search_by_key(&cap.name.as_str(), \|c\| c.name.as_str())
438	{
439	Err(i) => {
440	names.insert(i, cap.clone());
441	Ok(())
442	}
443	Ok(i) => Err(self.error(
444	cap.span,
445	ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
446	)),
447	}
448	}
449
450	/// Return whether the parser should ignore whitespace or not.
451	fn ignore_whitespace(&self) -> bool {
452	self.parser().ignore_whitespace.get()
453	}
454
455	/// Return the character at the current position of the parser.
456	///
457	/// This panics if the current position does not point to a valid char.
458	fn char(&self) -> char {
459	self.char_at(self.offset())
460	}
461
462	/// Return the character at the given position.
463	///
464	/// This panics if the given position does not point to a valid char.
465	fn char_at(&self, i: usize) -> char {
466	self.pattern()[i..]
467	.chars()
468	.next()
469	.unwrap_or_else(\|\| panic!("expected char at offset {}", i))
470	}
471
472	/// Bump the parser to the next Unicode scalar value.
473	///
474	/// If the end of the input has been reached, then `false` is returned.
475	fn bump(&self) -> bool {
476	if self.is_eof() {
477	return `false`;
478	}
479	let Position { mut offset, mut line, mut column } = self.pos();
480	if self.char() == '`\n`' {
481	line = line.checked_add(`1`).unwrap();
482	column = `1`;
483	} else {
484	column = column.checked_add(`1`).unwrap();
485	}
486	offset += self.char().len_utf8();
487	self.parser().pos.set(Position { offset, line, column });
488	self.pattern()[self.offset()..].chars().next().is_some()
489	}
490
491	/// If the substring starting at the current position of the parser has
492	/// the given prefix, then bump the parser to the character immediately
493	/// following the prefix and return true. Otherwise, don't bump the parser
494	/// and return false.
495	fn bump_if(&self, prefix: &str) -> bool {
496	if self.pattern()[self.offset()..].starts_with(prefix) {
497	for _ in `0`..prefix.chars().count() {
498	self.bump();
499	}
500	`true`
501	} else {
502	`false`
503	}
504	}
505
506	/// Returns true if and only if the parser is positioned at a look-around
507	/// prefix. The conditions under which this returns true must always
508	/// correspond to a regular expression that would otherwise be consider
509	/// invalid.
510	///
511	/// This should only be called immediately after parsing the opening of
512	/// a group or a set of flags.
513	fn is_lookaround_prefix(&self) -> bool {
514	self.bump_if("?=")
515	\|\| self.bump_if("?!")
516	\|\| self.bump_if("?<=")
517	\|\| self.bump_if("?<!")
518	}
519
520	/// Bump the parser, and if the `x` flag is enabled, bump through any
521	/// subsequent spaces. Return true if and only if the parser is not at
522	/// EOF.
523	fn bump_and_bump_space(&self) -> bool {
524	if !self.bump() {
525	return `false`;
526	}
527	self.bump_space();
528	!self.is_eof()
529	}
530
531	/// If the `x` flag is enabled (i.e., whitespace insensitivity with
532	/// comments), then this will advance the parser through all whitespace
533	/// and comments to the next non-whitespace non-comment byte.
534	///
535	/// If the `x` flag is disabled, then this is a no-op.
536	///
537	/// This should be used selectively throughout the parser where
538	/// arbitrary whitespace is permitted when the `x` flag is enabled. For
539	/// example, `{ 5 , 6}` is equivalent to `{5,6}`.
540	fn bump_space(&self) {
541	if !self.ignore_whitespace() {
542	return;
543	}
544	while !self.is_eof() {
545	if self.char().is_whitespace() {
546	self.bump();
547	} else if self.char() == '#' {
548	let start = self.pos();
549	let mut comment_text = String::new();
550	self.bump();
551	while !self.is_eof() {
552	let c = self.char();
553	self.bump();
554	if c == '`\n`' {
555	break;
556	}
557	comment_text.push(c);
558	}
559	let comment = ast::Comment {
560	span: Span::new(start, self.pos()),
561	comment: comment_text,
562	};
563	self.parser().comments.borrow_mut().push(comment);
564	} else {
565	break;
566	}
567	}
568	}
569
570	/// Peek at the next character in the input without advancing the parser.
571	///
572	/// If the input has been exhausted, then this returns `None`.
573	fn peek(&self) -> Option<char> {
574	if self.is_eof() {
575	return None;
576	}
577	self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
578	}
579
580	/// Like peek, but will ignore spaces when the parser is in whitespace
581	/// insensitive mode.
582	fn peek_space(&self) -> Option<char> {
583	if !self.ignore_whitespace() {
584	return self.peek();
585	}
586	if self.is_eof() {
587	return None;
588	}
589	let mut start = self.offset() + self.char().len_utf8();
590	let mut in_comment = `false`;
591	for (i, c) in self.pattern()[start..].char_indices() {
592	if c.is_whitespace() {
593	continue;
594	} else if !in_comment && c == '#' {
595	in_comment = `true`;
596	} else if in_comment && c == '`\n`' {
597	in_comment = `false`;
598	} else {
599	start += i;
600	break;
601	}
602	}
603	self.pattern()[start..].chars().next()
604	}
605
606	/// Returns true if the next call to `bump` would return false.
607	fn is_eof(&self) -> bool {
608	self.offset() == self.pattern().len()
609	}
610
611	/// Return the current position of the parser, which includes the offset,
612	/// line and column.
613	fn pos(&self) -> Position {
614	self.parser().pos.get()
615	}
616
617	/// Create a span at the current position of the parser. Both the start
618	/// and end of the span are set.
619	fn span(&self) -> Span {
620	Span::splat(self.pos())
621	}
622
623	/// Create a span that covers the current character.
624	fn span_char(&self) -> Span {
625	let mut next = Position {
626	offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
627	line: self.line(),
628	column: self.column().checked_add(`1`).unwrap(),
629	};
630	if self.char() == '`\n`' {
631	next.line += `1`;
632	next.column = `1`;
633	}
634	Span::new(self.pos(), next)
635	}
636
637	/// Parse and push a single alternation on to the parser's internal stack.
638	/// If the top of the stack already has an alternation, then add to that
639	/// instead of pushing a new one.
640	///
641	/// The concatenation given corresponds to a single alternation branch.
642	/// The concatenation returned starts the next branch and is empty.
643	///
644	/// This assumes the parser is currently positioned at `\|` and will advance
645	/// the parser to the character following `\|`.
646	#[inline(never)]
647	fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
648	assert_eq!(self.char(), '\|');
649	concat.span.end = self.pos();
650	self.push_or_add_alternation(concat);
651	self.bump();
652	Ok(ast::Concat { span: self.span(), asts: vec![] })
653	}
654
655	/// Pushes or adds the given branch of an alternation to the parser's
656	/// internal stack of state.
657	fn push_or_add_alternation(&self, concat: ast::Concat) {
658	use self::GroupState::*;
659
660	let mut stack = self.parser().stack_group.borrow_mut();
661	if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
662	alts.asts.push(concat.into_ast());
663	return;
664	}
665	stack.push(Alternation(ast::Alternation {
666	span: Span::new(concat.span.start, self.pos()),
667	asts: vec![concat.into_ast()],
668	}));
669	}
670
671	/// Parse and push a group AST (and its parent concatenation) on to the
672	/// parser's internal stack. Return a fresh concatenation corresponding
673	/// to the group's sub-AST.
674	///
675	/// If a set of flags was found (with no group), then the concatenation
676	/// is returned with that set of flags added.
677	///
678	/// This assumes that the parser is currently positioned on the opening
679	/// parenthesis. It advances the parser to the character at the start
680	/// of the sub-expression (or adjoining expression).
681	///
682	/// If there was a problem parsing the start of the group, then an error
683	/// is returned.
684	#[inline(never)]
685	fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
686	assert_eq!(self.char(), '(');
687	match self.parse_group()? {
688	Either::Left(set) => {
689	let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
690	if let Some(v) = ignore {
691	self.parser().ignore_whitespace.set(v);
692	}
693
694	concat.asts.push(Ast::Flags(set));
695	Ok(concat)
696	}
697	Either::Right(group) => {
698	let old_ignore_whitespace = self.ignore_whitespace();
699	let new_ignore_whitespace = group
700	.flags()
701	.and_then(\|f\| f.flag_state(ast::Flag::IgnoreWhitespace))
702	.unwrap_or(old_ignore_whitespace);
703	self.parser().stack_group.borrow_mut().push(
704	GroupState::Group {
705	concat,
706	group,
707	ignore_whitespace: old_ignore_whitespace,
708	},
709	);
710	self.parser().ignore_whitespace.set(new_ignore_whitespace);
711	Ok(ast::Concat { span: self.span(), asts: vec![] })
712	}
713	}
714	}
715
716	/// Pop a group AST from the parser's internal stack and set the group's
717	/// AST to the given concatenation. Return the concatenation containing
718	/// the group.
719	///
720	/// This assumes that the parser is currently positioned on the closing
721	/// parenthesis and advances the parser to the character following the `)`.
722	///
723	/// If no such group could be popped, then an unopened group error is
724	/// returned.
725	#[inline(never)]
726	fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
727	use self::GroupState::*;
728
729	assert_eq!(self.char(), ')');
730	let mut stack = self.parser().stack_group.borrow_mut();
731	let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
732	.pop()
733	{
734	Some(Group { concat, group, ignore_whitespace }) => {
735	(concat, group, ignore_whitespace, None)
736	}
737	Some(Alternation(alt)) => match stack.pop() {
738	Some(Group { concat, group, ignore_whitespace }) => {
739	(concat, group, ignore_whitespace, Some(alt))
740	}
741	None \| Some(Alternation(_)) => {
742	return Err(self.error(
743	self.span_char(),
744	ast::ErrorKind::GroupUnopened,
745	));
746	}
747	},
748	None => {
749	return Err(self
750	.error(self.span_char(), ast::ErrorKind::GroupUnopened));
751	}
752	};
753	self.parser().ignore_whitespace.set(ignore_whitespace);
754	group_concat.span.end = self.pos();
755	self.bump();
756	group.span.end = self.pos();
757	match alt {
758	Some(mut alt) => {
759	alt.span.end = group_concat.span.end;
760	alt.asts.push(group_concat.into_ast());
761	group.ast = Box::new(alt.into_ast());
762	}
763	None => {
764	group.ast = Box::new(group_concat.into_ast());
765	}
766	}
767	prior_concat.asts.push(Ast::Group(group));
768	Ok(prior_concat)
769	}
770
771	/// Pop the last state from the parser's internal stack, if it exists, and
772	/// add the given concatenation to it. There either must be no state or a
773	/// single alternation item on the stack. Any other scenario produces an
774	/// error.
775	///
776	/// This assumes that the parser has advanced to the end.
777	#[inline(never)]
778	fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
779	concat.span.end = self.pos();
780	let mut stack = self.parser().stack_group.borrow_mut();
781	let ast = match stack.pop() {
782	None => Ok(concat.into_ast()),
783	Some(GroupState::Alternation(mut alt)) => {
784	alt.span.end = self.pos();
785	alt.asts.push(concat.into_ast());
786	Ok(Ast::Alternation(alt))
787	}
788	Some(GroupState::Group { group, .. }) => {
789	return Err(
790	self.error(group.span, ast::ErrorKind::GroupUnclosed)
791	);
792	}
793	};
794	// If we try to pop again, there should be nothing.
795	match stack.pop() {
796	None => ast,
797	Some(GroupState::Alternation(_)) => {
798	// This unreachable is unfortunate. This case can't happen
799	// because the only way we can be here is if there were two
800	// `GroupState::Alternation`s adjacent in the parser's stack,
801	// which we guarantee to never happen because we never push a
802	// `GroupState::Alternation` if one is already at the top of
803	// the stack.
804	unreachable!()
805	}
806	Some(GroupState::Group { group, .. }) => {
807	Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
808	}
809	}
810	}
811
812	/// Parse the opening of a character class and push the current class
813	/// parsing context onto the parser's stack. This assumes that the parser
814	/// is positioned at an opening `[`. The given union should correspond to
815	/// the union of set items built up before seeing the `[`.
816	///
817	/// If there was a problem parsing the opening of the class, then an error
818	/// is returned. Otherwise, a new union of set items for the class is
819	/// returned (which may be populated with either a `]` or a `-`).
820	#[inline(never)]
821	fn push_class_open(
822	&self,
823	parent_union: ast::ClassSetUnion,
824	) -> Result<ast::ClassSetUnion> {
825	assert_eq!(self.char(), '[');
826
827	let (nested_set, nested_union) = self.parse_set_class_open()?;
828	self.parser()
829	.stack_class
830	.borrow_mut()
831	.push(ClassState::Open { union: parent_union, set: nested_set });
832	Ok(nested_union)
833	}
834
835	/// Parse the end of a character class set and pop the character class
836	/// parser stack. The union given corresponds to the last union built
837	/// before seeing the closing `]`. The union returned corresponds to the
838	/// parent character class set with the nested class added to it.
839	///
840	/// This assumes that the parser is positioned at a `]` and will advance
841	/// the parser to the byte immediately following the `]`.
842	///
843	/// If the stack is empty after popping, then this returns the final
844	/// "top-level" character class AST (where a "top-level" character class
845	/// is one that is not nested inside any other character class).
846	///
847	/// If there is no corresponding opening bracket on the parser's stack,
848	/// then an error is returned.
849	#[inline(never)]
850	fn pop_class(
851	&self,
852	nested_union: ast::ClassSetUnion,
853	) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
854	assert_eq!(self.char(), ']');
855
856	let item = ast::ClassSet::Item(nested_union.into_item());
857	let prevset = self.pop_class_op(item);
858	let mut stack = self.parser().stack_class.borrow_mut();
859	match stack.pop() {
860	None => {
861	// We can never observe an empty stack:
862	//
863	// 1) We are guaranteed to start with a non-empty stack since
864	// the character class parser is only initiated when it sees
865	// a `[`.
866	// 2) If we ever observe an empty stack while popping after
867	// seeing a `]`, then we signal the character class parser
868	// to terminate.
869	panic!("unexpected empty character class stack")
870	}
871	Some(ClassState::Op { .. }) => {
872	// This panic is unfortunate, but this case is impossible
873	// since we already popped the Op state if one exists above.
874	// Namely, every push to the class parser stack is guarded by
875	// whether an existing Op is already on the top of the stack.
876	// If it is, the existing Op is modified. That is, the stack
877	// can never have consecutive Op states.
878	panic!("unexpected ClassState::Op")
879	}
880	Some(ClassState::Open { mut union, mut set }) => {
881	self.bump();
882	set.span.end = self.pos();
883	set.kind = prevset;
884	if stack.is_empty() {
885	Ok(Either::Right(ast::Class::Bracketed(set)))
886	} else {
887	union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
888	Ok(Either::Left(union))
889	}
890	}
891	}
892	}
893
894	/// Return an "unclosed class" error whose span points to the most
895	/// recently opened class.
896	///
897	/// This should only be called while parsing a character class.
898	#[inline(never)]
899	fn unclosed_class_error(&self) -> ast::Error {
900	for state in self.parser().stack_class.borrow().iter().rev() {
901	if let ClassState::Open { ref set, .. } = *state {
902	return self.error(set.span, ast::ErrorKind::ClassUnclosed);
903	}
904	}
905	// We are guaranteed to have a non-empty stack with at least
906	// one open bracket, so we should never get here.
907	panic!("no open character class found")
908	}
909
910	/// Push the current set of class items on to the class parser's stack as
911	/// the left hand side of the given operator.
912	///
913	/// A fresh set union is returned, which should be used to build the right
914	/// hand side of this operator.
915	#[inline(never)]
916	fn push_class_op(
917	&self,
918	next_kind: ast::ClassSetBinaryOpKind,
919	next_union: ast::ClassSetUnion,
920	) -> ast::ClassSetUnion {
921	let item = ast::ClassSet::Item(next_union.into_item());
922	let new_lhs = self.pop_class_op(item);
923	self.parser()
924	.stack_class
925	.borrow_mut()
926	.push(ClassState::Op { kind: next_kind, lhs: new_lhs });
927	ast::ClassSetUnion { span: self.span(), items: vec![] }
928	}
929
930	/// Pop a character class set from the character class parser stack. If the
931	/// top of the stack is just an item (not an operation), then return the
932	/// given set unchanged. If the top of the stack is an operation, then the
933	/// given set will be used as the rhs of the operation on the top of the
934	/// stack. In that case, the binary operation is returned as a set.
935	#[inline(never)]
936	fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
937	let mut stack = self.parser().stack_class.borrow_mut();
938	let (kind, lhs) = match stack.pop() {
939	Some(ClassState::Op { kind, lhs }) => (kind, lhs),
940	Some(state @ ClassState::Open { .. }) => {
941	stack.push(state);
942	return rhs;
943	}
944	None => unreachable!(),
945	};
946	let span = Span::new(lhs.span().start, rhs.span().end);
947	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
948	span,
949	kind,
950	lhs: Box::new(lhs),
951	rhs: Box::new(rhs),
952	})
953	}
954	}
955
956	impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
957	/// Parse the regular expression into an abstract syntax tree.
958	fn parse(&self) -> Result<Ast> {
959	self.parse_with_comments().map(\|astc\| astc.ast)
960	}
961
962	/// Parse the regular expression and return an abstract syntax tree with
963	/// all of the comments found in the pattern.
964	fn parse_with_comments(&self) -> Result<ast::WithComments> {
965	assert_eq!(self.offset(), `0`, "parser can only be used once");
966	self.parser().reset();
967	let mut concat = ast::Concat { span: self.span(), asts: vec![] };
968	loop {
969	self.bump_space();
970	if self.is_eof() {
971	break;
972	}
973	match self.char() {
974	'(' => concat = self.push_group(concat)?,
975	')' => concat = self.pop_group(concat)?,
976	'\|' => concat = self.push_alternate(concat)?,
977	'[' => {
978	let class = self.parse_set_class()?;
979	concat.asts.push(Ast::Class(class));
980	}
981	'?' => {
982	concat = self.parse_uncounted_repetition(
983	concat,
984	ast::RepetitionKind::ZeroOrOne,
985	)?;
986	}
987	'*' => {
988	concat = self.parse_uncounted_repetition(
989	concat,
990	ast::RepetitionKind::ZeroOrMore,
991	)?;
992	}
993	'+' => {
994	concat = self.parse_uncounted_repetition(
995	concat,
996	ast::RepetitionKind::OneOrMore,
997	)?;
998	}
999	'{' => {
1000	concat = self.parse_counted_repetition(concat)?;
1001	}
1002	_ => concat.asts.push(self.parse_primitive()?.into_ast()),
1003	}
1004	}
1005	let ast = self.pop_group_end(concat)?;
1006	NestLimiter::new(self).check(&ast)?;
1007	Ok(ast::WithComments {
1008	ast,
1009	comments: mem::replace(
1010	&mut *self.parser().comments.borrow_mut(),
1011	vec![],
1012	),
1013	})
1014	}
1015
1016	/// Parses an uncounted repetition operation. An uncounted repetition
1017	/// operator includes ?, and +, but does not include the {m,n} syntax.*
1018	/// The given `kind` should correspond to the operator observed by the
1019	/// caller.
1020	///
1021	/// This assumes that the parser is currently positioned at the repetition
1022	/// operator and advances the parser to the first character after the
1023	/// operator. (Note that the operator may include a single additional `?`,
1024	/// which makes the operator ungreedy.)
1025	///
1026	/// The caller should include the concatenation that is being built. The
1027	/// concatenation returned includes the repetition operator applied to the
1028	/// last expression in the given concatenation.
1029	#[inline(never)]
1030	fn parse_uncounted_repetition(
1031	&self,
1032	mut concat: ast::Concat,
1033	kind: ast::RepetitionKind,
1034	) -> Result<ast::Concat> {
1035	assert!(
1036	self.char() == '?' \|\| self.char() == '*' \|\| self.char() == '+'
1037	);
1038	let op_start = self.pos();
1039	let ast = match concat.asts.pop() {
1040	Some(ast) => ast,
1041	None => {
1042	return Err(
1043	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1044	)
1045	}
1046	};
1047	match ast {
1048	Ast::Empty(_) \| Ast::Flags(_) => {
1049	return Err(
1050	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1051	)
1052	}
1053	_ => {}
1054	}
1055	let mut greedy = `true`;
1056	if self.bump() && self.char() == '?' {
1057	greedy = `false`;
1058	self.bump();
1059	}
1060	concat.asts.push(Ast::Repetition(ast::Repetition {
1061	span: ast.span().with_end(self.pos()),
1062	op: ast::RepetitionOp {
1063	span: Span::new(op_start, self.pos()),
1064	kind,
1065	},
1066	greedy,
1067	ast: Box::new(ast),
1068	}));
1069	Ok(concat)
1070	}
1071
1072	/// Parses a counted repetition operation. A counted repetition operator
1073	/// corresponds to the {m,n} syntax, and does not include the ?, or +*
1074	/// operators.
1075	///
1076	/// This assumes that the parser is currently positioned at the opening `{`
1077	/// and advances the parser to the first character after the operator.
1078	/// (Note that the operator may include a single additional `?`, which
1079	/// makes the operator ungreedy.)
1080	///
1081	/// The caller should include the concatenation that is being built. The
1082	/// concatenation returned includes the repetition operator applied to the
1083	/// last expression in the given concatenation.
1084	#[inline(never)]
1085	fn parse_counted_repetition(
1086	&self,
1087	mut concat: ast::Concat,
1088	) -> Result<ast::Concat> {
1089	assert!(self.char() == '{');
1090	let start = self.pos();
1091	let ast = match concat.asts.pop() {
1092	Some(ast) => ast,
1093	None => {
1094	return Err(
1095	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1096	)
1097	}
1098	};
1099	match ast {
1100	Ast::Empty(_) \| Ast::Flags(_) => {
1101	return Err(
1102	self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1103	)
1104	}
1105	_ => {}
1106	}
1107	if !self.bump_and_bump_space() {
1108	return Err(self.error(
1109	Span::new(start, self.pos()),
1110	ast::ErrorKind::RepetitionCountUnclosed,
1111	));
1112	}
1113	let count_start = specialize_err(
1114	self.parse_decimal(),
1115	ast::ErrorKind::DecimalEmpty,
1116	ast::ErrorKind::RepetitionCountDecimalEmpty,
1117	)?;
1118	let mut range = ast::RepetitionRange::Exactly(count_start);
1119	if self.is_eof() {
1120	return Err(self.error(
1121	Span::new(start, self.pos()),
1122	ast::ErrorKind::RepetitionCountUnclosed,
1123	));
1124	}
1125	if self.char() == ',' {
1126	if !self.bump_and_bump_space() {
1127	return Err(self.error(
1128	Span::new(start, self.pos()),
1129	ast::ErrorKind::RepetitionCountUnclosed,
1130	));
1131	}
1132	if self.char() != '}' {
1133	let count_end = specialize_err(
1134	self.parse_decimal(),
1135	ast::ErrorKind::DecimalEmpty,
1136	ast::ErrorKind::RepetitionCountDecimalEmpty,
1137	)?;
1138	range = ast::RepetitionRange::Bounded(count_start, count_end);
1139	} else {
1140	range = ast::RepetitionRange::AtLeast(count_start);
1141	}
1142	}
1143	if self.is_eof() \|\| self.char() != '}' {
1144	return Err(self.error(
1145	Span::new(start, self.pos()),
1146	ast::ErrorKind::RepetitionCountUnclosed,
1147	));
1148	}
1149
1150	let mut greedy = `true`;
1151	if self.bump_and_bump_space() && self.char() == '?' {
1152	greedy = `false`;
1153	self.bump();
1154	}
1155
1156	let op_span = Span::new(start, self.pos());
1157	if !range.is_valid() {
1158	return Err(
1159	self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1160	);
1161	}
1162	concat.asts.push(Ast::Repetition(ast::Repetition {
1163	span: ast.span().with_end(self.pos()),
1164	op: ast::RepetitionOp {
1165	span: op_span,
1166	kind: ast::RepetitionKind::Range(range),
1167	},
1168	greedy,
1169	ast: Box::new(ast),
1170	}));
1171	Ok(concat)
1172	}
1173
1174	/// Parse a group (which contains a sub-expression) or a set of flags.
1175	///
1176	/// If a group was found, then it is returned with an empty AST. If a set
1177	/// of flags is found, then that set is returned.
1178	///
1179	/// The parser should be positioned at the opening parenthesis.
1180	///
1181	/// This advances the parser to the character before the start of the
1182	/// sub-expression (in the case of a group) or to the closing parenthesis
1183	/// immediately following the set of flags.
1184	///
1185	/// # Errors
1186	///
1187	/// If flags are given and incorrectly specified, then a corresponding
1188	/// error is returned.
1189	///
1190	/// If a capture name is given and it is incorrectly specified, then a
1191	/// corresponding error is returned.
1192	#[inline(never)]
1193	fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1194	assert_eq!(self.char(), '(');
1195	let open_span = self.span_char();
1196	self.bump();
1197	self.bump_space();
1198	if self.is_lookaround_prefix() {
1199	return Err(self.error(
1200	Span::new(open_span.start, self.span().end),
1201	ast::ErrorKind::UnsupportedLookAround,
1202	));
1203	}
1204	let inner_span = self.span();
1205	let mut starts_with_p = `true`;
1206	if self.bump_if("?P<") \|\| {
1207	starts_with_p = `false`;
1208	self.bump_if("?<")
1209	} {
1210	let capture_index = self.next_capture_index(open_span)?;
1211	let name = self.parse_capture_name(capture_index)?;
1212	Ok(Either::Right(ast::Group {
1213	span: open_span,
1214	kind: ast::GroupKind::CaptureName { starts_with_p, name },
1215	ast: Box::new(Ast::Empty(self.span())),
1216	}))
1217	} else if self.bump_if("?") {
1218	if self.is_eof() {
1219	return Err(
1220	self.error(open_span, ast::ErrorKind::GroupUnclosed)
1221	);
1222	}
1223	let flags = self.parse_flags()?;
1224	let char_end = self.char();
1225	self.bump();
1226	if char_end == ')' {
1227	// We don't allow empty flags, e.g., `(?)`. We instead
1228	// interpret it as a repetition operator missing its argument.
1229	if flags.items.is_empty() {
1230	return Err(self.error(
1231	inner_span,
1232	ast::ErrorKind::RepetitionMissing,
1233	));
1234	}
1235	Ok(Either::Left(ast::SetFlags {
1236	span: Span { end: self.pos(), ..open_span },
1237	flags,
1238	}))
1239	} else {
1240	assert_eq!(char_end, ':');
1241	Ok(Either::Right(ast::Group {
1242	span: open_span,
1243	kind: ast::GroupKind::NonCapturing(flags),
1244	ast: Box::new(Ast::Empty(self.span())),
1245	}))
1246	}
1247	} else {
1248	let capture_index = self.next_capture_index(open_span)?;
1249	Ok(Either::Right(ast::Group {
1250	span: open_span,
1251	kind: ast::GroupKind::CaptureIndex(capture_index),
1252	ast: Box::new(Ast::Empty(self.span())),
1253	}))
1254	}
1255	}
1256
1257	/// Parses a capture group name. Assumes that the parser is positioned at
1258	/// the first character in the name following the opening `<` (and may
1259	/// possibly be EOF). This advances the parser to the first character
1260	/// following the closing `>`.
1261	///
1262	/// The caller must provide the capture index of the group for this name.
1263	#[inline(never)]
1264	fn parse_capture_name(
1265	&self,
1266	capture_index: u32,
1267	) -> Result<ast::CaptureName> {
1268	if self.is_eof() {
1269	return Err(self
1270	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1271	}
1272	let start = self.pos();
1273	loop {
1274	if self.char() == '>' {
1275	break;
1276	}
1277	if !is_capture_char(self.char(), self.pos() == start) {
1278	return Err(self.error(
1279	self.span_char(),
1280	ast::ErrorKind::GroupNameInvalid,
1281	));
1282	}
1283	if !self.bump() {
1284	break;
1285	}
1286	}
1287	let end = self.pos();
1288	if self.is_eof() {
1289	return Err(self
1290	.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1291	}
1292	assert_eq!(self.char(), '>');
1293	self.bump();
1294	let name = &self.pattern()[start.offset..end.offset];
1295	if name.is_empty() {
1296	return Err(self.error(
1297	Span::new(start, start),
1298	ast::ErrorKind::GroupNameEmpty,
1299	));
1300	}
1301	let capname = ast::CaptureName {
1302	span: Span::new(start, end),
1303	name: name.to_string(),
1304	index: capture_index,
1305	};
1306	self.add_capture_name(&capname)?;
1307	Ok(capname)
1308	}
1309
1310	/// Parse a sequence of flags starting at the current character.
1311	///
1312	/// This advances the parser to the character immediately following the
1313	/// flags, which is guaranteed to be either `:` or `)`.
1314	///
1315	/// # Errors
1316	///
1317	/// If any flags are duplicated, then an error is returned.
1318	///
1319	/// If the negation operator is used more than once, then an error is
1320	/// returned.
1321	///
1322	/// If no flags could be found or if the negation operation is not followed
1323	/// by any flags, then an error is returned.
1324	#[inline(never)]
1325	fn parse_flags(&self) -> Result<ast::Flags> {
1326	let mut flags = ast::Flags { span: self.span(), items: vec![] };
1327	let mut last_was_negation = None;
1328	while self.char() != ':' && self.char() != ')' {
1329	if self.char() == '-' {
1330	last_was_negation = Some(self.span_char());
1331	let item = ast::FlagsItem {
1332	span: self.span_char(),
1333	kind: ast::FlagsItemKind::Negation,
1334	};
1335	if let Some(i) = flags.add_item(item) {
1336	return Err(self.error(
1337	self.span_char(),
1338	ast::ErrorKind::FlagRepeatedNegation {
1339	original: flags.items[i].span,
1340	},
1341	));
1342	}
1343	} else {
1344	last_was_negation = None;
1345	let item = ast::FlagsItem {
1346	span: self.span_char(),
1347	kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1348	};
1349	if let Some(i) = flags.add_item(item) {
1350	return Err(self.error(
1351	self.span_char(),
1352	ast::ErrorKind::FlagDuplicate {
1353	original: flags.items[i].span,
1354	},
1355	));
1356	}
1357	}
1358	if !self.bump() {
1359	return Err(
1360	self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1361	);
1362	}
1363	}
1364	if let Some(span) = last_was_negation {
1365	return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1366	}
1367	flags.span.end = self.pos();
1368	Ok(flags)
1369	}
1370
1371	/// Parse the current character as a flag. Do not advance the parser.
1372	///
1373	/// # Errors
1374	///
1375	/// If the flag is not recognized, then an error is returned.
1376	#[inline(never)]
1377	fn parse_flag(&self) -> Result<ast::Flag> {
1378	match self.char() {
1379	'i' => Ok(ast::Flag::CaseInsensitive),
1380	'm' => Ok(ast::Flag::MultiLine),
1381	's' => Ok(ast::Flag::DotMatchesNewLine),
1382	'U' => Ok(ast::Flag::SwapGreed),
1383	'u' => Ok(ast::Flag::Unicode),
1384	'R' => Ok(ast::Flag::CRLF),
1385	'x' => Ok(ast::Flag::IgnoreWhitespace),
1386	_ => {
1387	Err(self
1388	.error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1389	}
1390	}
1391	}
1392
1393	/// Parse a primitive AST. e.g., A literal, non-set character class or
1394	/// assertion.
1395	///
1396	/// This assumes that the parser expects a primitive at the current
1397	/// location. i.e., All other non-primitive cases have been handled.
1398	/// For example, if the parser's position is at `\|`, then `\|` will be
1399	/// treated as a literal (e.g., inside a character class).
1400	///
1401	/// This advances the parser to the first character immediately following
1402	/// the primitive.
1403	fn parse_primitive(&self) -> Result<Primitive> {
1404	match self.char() {
1405	'`\\`' => self.parse_escape(),
1406	'.' => {
1407	let ast = Primitive::Dot(self.span_char());
1408	self.bump();
1409	Ok(ast)
1410	}
1411	'^' => {
1412	let ast = Primitive::Assertion(ast::Assertion {
1413	span: self.span_char(),
1414	kind: ast::AssertionKind::StartLine,
1415	});
1416	self.bump();
1417	Ok(ast)
1418	}
1419	'$' => {
1420	let ast = Primitive::Assertion(ast::Assertion {
1421	span: self.span_char(),
1422	kind: ast::AssertionKind::EndLine,
1423	});
1424	self.bump();
1425	Ok(ast)
1426	}
1427	c => {
1428	let ast = Primitive::Literal(ast::Literal {
1429	span: self.span_char(),
1430	kind: ast::LiteralKind::Verbatim,
1431	c,
1432	});
1433	self.bump();
1434	Ok(ast)
1435	}
1436	}
1437	}
1438
1439	/// Parse an escape sequence as a primitive AST.
1440	///
1441	/// This assumes the parser is positioned at the start of the escape
1442	/// sequence, i.e., `\`. It advances the parser to the first position
1443	/// immediately following the escape sequence.
1444	#[inline(never)]
1445	fn parse_escape(&self) -> Result<Primitive> {
1446	assert_eq!(self.char(), '`\\`');
1447	let start = self.pos();
1448	if !self.bump() {
1449	return Err(self.error(
1450	Span::new(start, self.pos()),
1451	ast::ErrorKind::EscapeUnexpectedEof,
1452	));
1453	}
1454	let c = self.char();
1455	// Put some of the more complicated routines into helpers.
1456	match c {
1457	'0'..='7' => {
1458	if !self.parser().octal {
1459	return Err(self.error(
1460	Span::new(start, self.span_char().end),
1461	ast::ErrorKind::UnsupportedBackreference,
1462	));
1463	}
1464	let mut lit = self.parse_octal();
1465	lit.span.start = start;
1466	return Ok(Primitive::Literal(lit));
1467	}
1468	'8'..='9' if !self.parser().octal => {
1469	return Err(self.error(
1470	Span::new(start, self.span_char().end),
1471	ast::ErrorKind::UnsupportedBackreference,
1472	));
1473	}
1474	'x' \| 'u' \| 'U' => {
1475	let mut lit = self.parse_hex()?;
1476	lit.span.start = start;
1477	return Ok(Primitive::Literal(lit));
1478	}
1479	'p' \| 'P' => {
1480	let mut cls = self.parse_unicode_class()?;
1481	cls.span.start = start;
1482	return Ok(Primitive::Unicode(cls));
1483	}
1484	'd' \| 's' \| 'w' \| 'D' \| 'S' \| 'W' => {
1485	let mut cls = self.parse_perl_class();
1486	cls.span.start = start;
1487	return Ok(Primitive::Perl(cls));
1488	}
1489	_ => {}
1490	}
1491
1492	// Handle all of the one letter sequences inline.
1493	self.bump();
1494	let span = Span::new(start, self.pos());
1495	if is_meta_character(c) {
1496	return Ok(Primitive::Literal(ast::Literal {
1497	span,
1498	kind: ast::LiteralKind::Meta,
1499	c,
1500	}));
1501	}
1502	if is_escapeable_character(c) {
1503	return Ok(Primitive::Literal(ast::Literal {
1504	span,
1505	kind: ast::LiteralKind::Superfluous,
1506	c,
1507	}));
1508	}
1509	let special = \|kind, c\| {
1510	Ok(Primitive::Literal(ast::Literal {
1511	span,
1512	kind: ast::LiteralKind::Special(kind),
1513	c,
1514	}))
1515	};
1516	match c {
1517	'a' => special(ast::SpecialLiteralKind::Bell, '`\x07`'),
1518	'f' => special(ast::SpecialLiteralKind::FormFeed, '`\x0C`'),
1519	't' => special(ast::SpecialLiteralKind::Tab, '`\t`'),
1520	'n' => special(ast::SpecialLiteralKind::LineFeed, '`\n`'),
1521	'r' => special(ast::SpecialLiteralKind::CarriageReturn, '`\r`'),
1522	'v' => special(ast::SpecialLiteralKind::VerticalTab, '`\x0B`'),
1523	'A' => Ok(Primitive::Assertion(ast::Assertion {
1524	span,
1525	kind: ast::AssertionKind::StartText,
1526	})),
1527	'z' => Ok(Primitive::Assertion(ast::Assertion {
1528	span,
1529	kind: ast::AssertionKind::EndText,
1530	})),
1531	'b' => Ok(Primitive::Assertion(ast::Assertion {
1532	span,
1533	kind: ast::AssertionKind::WordBoundary,
1534	})),
1535	'B' => Ok(Primitive::Assertion(ast::Assertion {
1536	span,
1537	kind: ast::AssertionKind::NotWordBoundary,
1538	})),
1539	_ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1540	}
1541	}
1542
1543	/// Parse an octal representation of a Unicode codepoint up to 3 digits
1544	/// long. This expects the parser to be positioned at the first octal
1545	/// digit and advances the parser to the first character immediately
1546	/// following the octal number. This also assumes that parsing octal
1547	/// escapes is enabled.
1548	///
1549	/// Assuming the preconditions are met, this routine can never fail.
1550	#[inline(never)]
1551	fn parse_octal(&self) -> ast::Literal {
1552	assert!(self.parser().octal);
1553	assert!('0' <= self.char() && self.char() <= '7');
1554	let start = self.pos();
1555	// Parse up to two more digits.
1556	while self.bump()
1557	&& '0' <= self.char()
1558	&& self.char() <= '7'
1559	&& self.pos().offset - start.offset <= `2`
1560	{}
1561	let end = self.pos();
1562	let octal = &self.pattern()[start.offset..end.offset];
1563	// Parsing the octal should never fail since the above guarantees a
1564	// valid number.
1565	let codepoint =
1566	u32::from_str_radix(octal, `8`).expect("valid octal number");
1567	// The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1568	// invalid Unicode scalar values.
1569	let c = char::from_u32(codepoint).expect("Unicode scalar value");
1570	ast::Literal {
1571	span: Span::new(start, end),
1572	kind: ast::LiteralKind::Octal,
1573	c,
1574	}
1575	}
1576
1577	/// Parse a hex representation of a Unicode codepoint. This handles both
1578	/// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1579	/// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1580	/// the first character immediately following the hexadecimal literal.
1581	#[inline(never)]
1582	fn parse_hex(&self) -> Result<ast::Literal> {
1583	assert!(
1584	self.char() == 'x' \|\| self.char() == 'u' \|\| self.char() == 'U'
1585	);
1586
1587	let hex_kind = match self.char() {
1588	'x' => ast::HexLiteralKind::X,
1589	'u' => ast::HexLiteralKind::UnicodeShort,
1590	_ => ast::HexLiteralKind::UnicodeLong,
1591	};
1592	if !self.bump_and_bump_space() {
1593	return Err(
1594	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1595	);
1596	}
1597	if self.char() == '{' {
1598	self.parse_hex_brace(hex_kind)
1599	} else {
1600	self.parse_hex_digits(hex_kind)
1601	}
1602	}
1603
1604	/// Parse an N-digit hex representation of a Unicode codepoint. This
1605	/// expects the parser to be positioned at the first digit and will advance
1606	/// the parser to the first character immediately following the escape
1607	/// sequence.
1608	///
1609	/// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1610	/// or 8 (for `\UNNNNNNNN`).
1611	#[inline(never)]
1612	fn parse_hex_digits(
1613	&self,
1614	kind: ast::HexLiteralKind,
1615	) -> Result<ast::Literal> {
1616	let mut scratch = self.parser().scratch.borrow_mut();
1617	scratch.clear();
1618
1619	let start = self.pos();
1620	for i in `0`..kind.digits() {
1621	if i > `0` && !self.bump_and_bump_space() {
1622	return Err(self
1623	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1624	}
1625	if !is_hex(self.char()) {
1626	return Err(self.error(
1627	self.span_char(),
1628	ast::ErrorKind::EscapeHexInvalidDigit,
1629	));
1630	}
1631	scratch.push(self.char());
1632	}
1633	// The final bump just moves the parser past the literal, which may
1634	// be EOF.
1635	self.bump_and_bump_space();
1636	let end = self.pos();
1637	let hex = scratch.as_str();
1638	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1639	None => Err(self.error(
1640	Span::new(start, end),
1641	ast::ErrorKind::EscapeHexInvalid,
1642	)),
1643	Some(c) => Ok(ast::Literal {
1644	span: Span::new(start, end),
1645	kind: ast::LiteralKind::HexFixed(kind),
1646	c,
1647	}),
1648	}
1649	}
1650
1651	/// Parse a hex representation of any Unicode scalar value. This expects
1652	/// the parser to be positioned at the opening brace `{` and will advance
1653	/// the parser to the first character following the closing brace `}`.
1654	#[inline(never)]
1655	fn parse_hex_brace(
1656	&self,
1657	kind: ast::HexLiteralKind,
1658	) -> Result<ast::Literal> {
1659	let mut scratch = self.parser().scratch.borrow_mut();
1660	scratch.clear();
1661
1662	let brace_pos = self.pos();
1663	let start = self.span_char().end;
1664	while self.bump_and_bump_space() && self.char() != '}' {
1665	if !is_hex(self.char()) {
1666	return Err(self.error(
1667	self.span_char(),
1668	ast::ErrorKind::EscapeHexInvalidDigit,
1669	));
1670	}
1671	scratch.push(self.char());
1672	}
1673	if self.is_eof() {
1674	return Err(self.error(
1675	Span::new(brace_pos, self.pos()),
1676	ast::ErrorKind::EscapeUnexpectedEof,
1677	));
1678	}
1679	let end = self.pos();
1680	let hex = scratch.as_str();
1681	assert_eq!(self.char(), '}');
1682	self.bump_and_bump_space();
1683
1684	if hex.is_empty() {
1685	return Err(self.error(
1686	Span::new(brace_pos, self.pos()),
1687	ast::ErrorKind::EscapeHexEmpty,
1688	));
1689	}
1690	match u32::from_str_radix(hex, `16`).ok().and_then(char::from_u32) {
1691	None => Err(self.error(
1692	Span::new(start, end),
1693	ast::ErrorKind::EscapeHexInvalid,
1694	)),
1695	Some(c) => Ok(ast::Literal {
1696	span: Span::new(start, self.pos()),
1697	kind: ast::LiteralKind::HexBrace(kind),
1698	c,
1699	}),
1700	}
1701	}
1702
1703	/// Parse a decimal number into a u32 while trimming leading and trailing
1704	/// whitespace.
1705	///
1706	/// This expects the parser to be positioned at the first position where
1707	/// a decimal digit could occur. This will advance the parser to the byte
1708	/// immediately following the last contiguous decimal digit.
1709	///
1710	/// If no decimal digit could be found or if there was a problem parsing
1711	/// the complete set of digits into a u32, then an error is returned.
1712	fn parse_decimal(&self) -> Result<u32> {
1713	let mut scratch = self.parser().scratch.borrow_mut();
1714	scratch.clear();
1715
1716	while !self.is_eof() && self.char().is_whitespace() {
1717	self.bump();
1718	}
1719	let start = self.pos();
1720	while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1721	scratch.push(self.char());
1722	self.bump_and_bump_space();
1723	}
1724	let span = Span::new(start, self.pos());
1725	while !self.is_eof() && self.char().is_whitespace() {
1726	self.bump_and_bump_space();
1727	}
1728	let digits = scratch.as_str();
1729	if digits.is_empty() {
1730	return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1731	}
1732	match u32::from_str_radix(digits, `10`).ok() {
1733	Some(n) => Ok(n),
1734	None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1735	}
1736	}
1737
1738	/// Parse a standard character class consisting primarily of characters or
1739	/// character ranges, but can also contain nested character classes of
1740	/// any type (sans `.`).
1741	///
1742	/// This assumes the parser is positioned at the opening `[`. If parsing
1743	/// is successful, then the parser is advanced to the position immediately
1744	/// following the closing `]`.
1745	#[inline(never)]
1746	fn parse_set_class(&self) -> Result<ast::Class> {
1747	assert_eq!(self.char(), '[');
1748
1749	let mut union =
1750	ast::ClassSetUnion { span: self.span(), items: vec![] };
1751	loop {
1752	self.bump_space();
1753	if self.is_eof() {
1754	return Err(self.unclosed_class_error());
1755	}
1756	match self.char() {
1757	'[' => {
1758	// If we've already parsed the opening bracket, then
1759	// attempt to treat this as the beginning of an ASCII
1760	// class. If ASCII class parsing fails, then the parser
1761	// backs up to `[`.
1762	if !self.parser().stack_class.borrow().is_empty() {
1763	if let Some(cls) = self.maybe_parse_ascii_class() {
1764	union.push(ast::ClassSetItem::Ascii(cls));
1765	continue;
1766	}
1767	}
1768	union = self.push_class_open(union)?;
1769	}
1770	']' => match self.pop_class(union)? {
1771	Either::Left(nested_union) => {
1772	union = nested_union;
1773	}
1774	Either::Right(class) => return Ok(class),
1775	},
1776	'&' if self.peek() == Some('&') => {
1777	assert!(self.bump_if("&&"));
1778	union = self.push_class_op(
1779	ast::ClassSetBinaryOpKind::Intersection,
1780	union,
1781	);
1782	}
1783	'-' if self.peek() == Some('-') => {
1784	assert!(self.bump_if("--"));
1785	union = self.push_class_op(
1786	ast::ClassSetBinaryOpKind::Difference,
1787	union,
1788	);
1789	}
1790	'~' if self.peek() == Some('~') => {
1791	assert!(self.bump_if("~~"));
1792	union = self.push_class_op(
1793	ast::ClassSetBinaryOpKind::SymmetricDifference,
1794	union,
1795	);
1796	}
1797	_ => {
1798	union.push(self.parse_set_class_range()?);
1799	}
1800	}
1801	}
1802	}
1803
1804	/// Parse a single primitive item in a character class set. The item to
1805	/// be parsed can either be one of a simple literal character, a range
1806	/// between two simple literal characters or a "primitive" character
1807	/// class like \w or \p{Greek}.
1808	///
1809	/// If an invalid escape is found, or if a character class is found where
1810	/// a simple literal is expected (e.g., in a range), then an error is
1811	/// returned.
1812	#[inline(never)]
1813	fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1814	let prim1 = self.parse_set_class_item()?;
1815	self.bump_space();
1816	if self.is_eof() {
1817	return Err(self.unclosed_class_error());
1818	}
1819	// If the next char isn't a `-`, then we don't have a range.
1820	// There are two exceptions. If the char after a `-` is a `]`, then
1821	// `-` is interpreted as a literal `-`. Alternatively, if the char
1822	// after a `-` is a `-`, then `--` corresponds to a "difference"
1823	// operation.
1824	if self.char() != '-'
1825	\|\| self.peek_space() == Some(']')
1826	\|\| self.peek_space() == Some('-')
1827	{
1828	return prim1.into_class_set_item(self);
1829	}
1830	// OK, now we're parsing a range, so bump past the `-` and parse the
1831	// second half of the range.
1832	if !self.bump_and_bump_space() {
1833	return Err(self.unclosed_class_error());
1834	}
1835	let prim2 = self.parse_set_class_item()?;
1836	let range = ast::ClassSetRange {
1837	span: Span::new(prim1.span().start, prim2.span().end),
1838	start: prim1.into_class_literal(self)?,
1839	end: prim2.into_class_literal(self)?,
1840	};
1841	if !range.is_valid() {
1842	return Err(
1843	self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1844	);
1845	}
1846	Ok(ast::ClassSetItem::Range(range))
1847	}
1848
1849	/// Parse a single item in a character class as a primitive, where the
1850	/// primitive either consists of a verbatim literal or a single escape
1851	/// sequence.
1852	///
1853	/// This assumes the parser is positioned at the beginning of a primitive,
1854	/// and advances the parser to the first position after the primitive if
1855	/// successful.
1856	///
1857	/// Note that it is the caller's responsibility to report an error if an
1858	/// illegal primitive was parsed.
1859	#[inline(never)]
1860	fn parse_set_class_item(&self) -> Result<Primitive> {
1861	if self.char() == '`\\`' {
1862	self.parse_escape()
1863	} else {
1864	let x = Primitive::Literal(ast::Literal {
1865	span: self.span_char(),
1866	kind: ast::LiteralKind::Verbatim,
1867	c: self.char(),
1868	});
1869	self.bump();
1870	Ok(x)
1871	}
1872	}
1873
1874	/// Parses the opening of a character class set. This includes the opening
1875	/// bracket along with `^` if present to indicate negation. This also
1876	/// starts parsing the opening set of unioned items if applicable, since
1877	/// there are special rules applied to certain characters in the opening
1878	/// of a character class. For example, `[^]]` is the class of all
1879	/// characters not equal to `]`. (`]` would need to be escaped in any other
1880	/// position.) Similarly for `-`.
1881	///
1882	/// In all cases, the op inside the returned `ast::ClassBracketed` is an
1883	/// empty union. This empty union should be replaced with the actual item
1884	/// when it is popped from the parser's stack.
1885	///
1886	/// This assumes the parser is positioned at the opening `[` and advances
1887	/// the parser to the first non-special byte of the character class.
1888	///
1889	/// An error is returned if EOF is found.
1890	#[inline(never)]
1891	fn parse_set_class_open(
1892	&self,
1893	) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1894	assert_eq!(self.char(), '[');
1895	let start = self.pos();
1896	if !self.bump_and_bump_space() {
1897	return Err(self.error(
1898	Span::new(start, self.pos()),
1899	ast::ErrorKind::ClassUnclosed,
1900	));
1901	}
1902
1903	let negated = if self.char() != '^' {
1904	`false`
1905	} else {
1906	if !self.bump_and_bump_space() {
1907	return Err(self.error(
1908	Span::new(start, self.pos()),
1909	ast::ErrorKind::ClassUnclosed,
1910	));
1911	}
1912	`true`
1913	};
1914	// Accept any number of `-` as literal `-`.
1915	let mut union =
1916	ast::ClassSetUnion { span: self.span(), items: vec![] };
1917	while self.char() == '-' {
1918	union.push(ast::ClassSetItem::Literal(ast::Literal {
1919	span: self.span_char(),
1920	kind: ast::LiteralKind::Verbatim,
1921	c: '-',
1922	}));
1923	if !self.bump_and_bump_space() {
1924	return Err(self.error(
1925	Span::new(start, start),
1926	ast::ErrorKind::ClassUnclosed,
1927	));
1928	}
1929	}
1930	// If `]` is the first* char in a set, then interpret it as a literal*
1931	// `]`. That is, an empty class is impossible to write.
1932	if union.items.is_empty() && self.char() == ']' {
1933	union.push(ast::ClassSetItem::Literal(ast::Literal {
1934	span: self.span_char(),
1935	kind: ast::LiteralKind::Verbatim,
1936	c: ']',
1937	}));
1938	if !self.bump_and_bump_space() {
1939	return Err(self.error(
1940	Span::new(start, self.pos()),
1941	ast::ErrorKind::ClassUnclosed,
1942	));
1943	}
1944	}
1945	let set = ast::ClassBracketed {
1946	span: Span::new(start, self.pos()),
1947	negated,
1948	kind: ast::ClassSet::union(ast::ClassSetUnion {
1949	span: Span::new(union.span.start, union.span.start),
1950	items: vec![],
1951	}),
1952	};
1953	Ok((set, union))
1954	}
1955
1956	/// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1957	///
1958	/// This assumes the parser is positioned at the opening `[`.
1959	///
1960	/// If no valid ASCII character class could be found, then this does not
1961	/// advance the parser and `None` is returned. Otherwise, the parser is
1962	/// advanced to the first byte following the closing `]` and the
1963	/// corresponding ASCII class is returned.
1964	#[inline(never)]
1965	fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1966	// ASCII character classes are interesting from a parsing perspective
1967	// because parsing cannot fail with any interesting error. For example,
1968	// in order to use an ASCII character class, it must be enclosed in
1969	// double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1970	// of it as "ASCII character characters have the syntax `[:NAME:]`
1971	// which can only appear within character brackets." This means that
1972	// things like `[[:lower:]A]` are legal constructs.
1973	//
1974	// However, if one types an incorrect ASCII character class, e.g.,
1975	// `[[:loower:]]`, then we treat that as a normal nested character
1976	// class containing the characters `:elorw`. One might argue that we
1977	// should return an error instead since the repeated colons give away
1978	// the intent to write an ASCII class. But what if the user typed
1979	// `[[:lower]]` instead? How can we tell that was intended to be an
1980	// ASCII class and not just a normal nested class?
1981	//
1982	// Reasonable people can probably disagree over this, but for better
1983	// or worse, we implement semantics that never fails at the expense
1984	// of better failure modes.
1985	assert_eq!(self.char(), '[');
1986	// If parsing fails, then we back up the parser to this starting point.
1987	let start = self.pos();
1988	let mut negated = `false`;
1989	if !self.bump() \|\| self.char() != ':' {
1990	self.parser().pos.set(start);
1991	return None;
1992	}
1993	if !self.bump() {
1994	self.parser().pos.set(start);
1995	return None;
1996	}
1997	if self.char() == '^' {
1998	negated = `true`;
1999	if !self.bump() {
2000	self.parser().pos.set(start);
2001	return None;
2002	}
2003	}
2004	let name_start = self.offset();
2005	while self.char() != ':' && self.bump() {}
2006	if self.is_eof() {
2007	self.parser().pos.set(start);
2008	return None;
2009	}
2010	let name = &self.pattern()[name_start..self.offset()];
2011	if !self.bump_if(":]") {
2012	self.parser().pos.set(start);
2013	return None;
2014	}
2015	let kind = match ast::ClassAsciiKind::from_name(name) {
2016	Some(kind) => kind,
2017	None => {
2018	self.parser().pos.set(start);
2019	return None;
2020	}
2021	};
2022	Some(ast::ClassAscii {
2023	span: Span::new(start, self.pos()),
2024	kind,
2025	negated,
2026	})
2027	}
2028
2029	/// Parse a Unicode class in either the single character notation, `\pN`
2030	/// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2031	/// the parser is positioned at the `p` (or `P` for negation) and will
2032	/// advance the parser to the character immediately following the class.
2033	///
2034	/// Note that this does not check whether the class name is valid or not.
2035	#[inline(never)]
2036	fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2037	assert!(self.char() == 'p' \|\| self.char() == 'P');
2038
2039	let mut scratch = self.parser().scratch.borrow_mut();
2040	scratch.clear();
2041
2042	let negated = self.char() == 'P';
2043	if !self.bump_and_bump_space() {
2044	return Err(
2045	self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2046	);
2047	}
2048	let (start, kind) = if self.char() == '{' {
2049	let start = self.span_char().end;
2050	while self.bump_and_bump_space() && self.char() != '}' {
2051	scratch.push(self.char());
2052	}
2053	if self.is_eof() {
2054	return Err(self
2055	.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2056	}
2057	assert_eq!(self.char(), '}');
2058	self.bump();
2059
2060	let name = scratch.as_str();
2061	if let Some(i) = name.find("!=") {
2062	(
2063	start,
2064	ast::ClassUnicodeKind::NamedValue {
2065	op: ast::ClassUnicodeOpKind::NotEqual,
2066	name: name[..i].to_string(),
2067	value: name[i + `2`..].to_string(),
2068	},
2069	)
2070	} else if let Some(i) = name.find(':') {
2071	(
2072	start,
2073	ast::ClassUnicodeKind::NamedValue {
2074	op: ast::ClassUnicodeOpKind::Colon,
2075	name: name[..i].to_string(),
2076	value: name[i + `1`..].to_string(),
2077	},
2078	)
2079	} else if let Some(i) = name.find('=') {
2080	(
2081	start,
2082	ast::ClassUnicodeKind::NamedValue {
2083	op: ast::ClassUnicodeOpKind::Equal,
2084	name: name[..i].to_string(),
2085	value: name[i + `1`..].to_string(),
2086	},
2087	)
2088	} else {
2089	(start, ast::ClassUnicodeKind::Named(name.to_string()))
2090	}
2091	} else {
2092	let start = self.pos();
2093	let c = self.char();
2094	if c == '`\\`' {
2095	return Err(self.error(
2096	self.span_char(),
2097	ast::ErrorKind::UnicodeClassInvalid,
2098	));
2099	}
2100	self.bump_and_bump_space();
2101	let kind = ast::ClassUnicodeKind::OneLetter(c);
2102	(start, kind)
2103	};
2104	Ok(ast::ClassUnicode {
2105	span: Span::new(start, self.pos()),
2106	negated,
2107	kind,
2108	})
2109	}
2110
2111	/// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2112	/// parser is currently at a valid character class name and will be
2113	/// advanced to the character immediately following the class.
2114	#[inline(never)]
2115	fn parse_perl_class(&self) -> ast::ClassPerl {
2116	let c = self.char();
2117	let span = self.span_char();
2118	self.bump();
2119	let (negated, kind) = match c {
2120	'd' => (`false`, ast::ClassPerlKind::Digit),
2121	'D' => (`true`, ast::ClassPerlKind::Digit),
2122	's' => (`false`, ast::ClassPerlKind::Space),
2123	'S' => (`true`, ast::ClassPerlKind::Space),
2124	'w' => (`false`, ast::ClassPerlKind::Word),
2125	'W' => (`true`, ast::ClassPerlKind::Word),
2126	c => panic!("expected valid Perl class but got '{}'", c),
2127	};
2128	ast::ClassPerl { span, kind, negated }
2129	}
2130	}
2131
2132	/// A type that traverses a fully parsed Ast and checks whether its depth
2133	/// exceeds the specified nesting limit. If it does, then an error is returned.
2134	#[derive(Debug)]
2135	struct NestLimiter<'p, 's, P> {
2136	/// The parser that is checking the nest limit.
2137	p: &'p ParserI<'s, P>,
2138	/// The current depth while walking an Ast.
2139	depth: u32,
2140	}
2141
2142	impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2143	fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2144	NestLimiter { p, depth: `0` }
2145	}
2146
2147	#[inline(never)]
2148	fn check(self, ast: &Ast) -> Result<()> {
2149	ast::visit(ast, self)
2150	}
2151
2152	fn increment_depth(&mut self, span: &Span) -> Result<()> {
2153	let new = self.depth.checked_add(`1`).ok_or_else(\|\| {
2154	self.p.error(
2155	span.clone(),
2156	ast::ErrorKind::NestLimitExceeded(u32::MAX),
2157	)
2158	})?;
2159	let limit = self.p.parser().nest_limit;
2160	if new > limit {
2161	return Err(self.p.error(
2162	span.clone(),
2163	ast::ErrorKind::NestLimitExceeded(limit),
2164	));
2165	}
2166	self.depth = new;
2167	Ok(())
2168	}
2169
2170	fn decrement_depth(&mut self) {
2171	// Assuming the correctness of the visitor, this should never drop
2172	// below 0.
2173	self.depth = self.depth.checked_sub(`1`).unwrap();
2174	}
2175	}
2176
2177	impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2178	type Output = ();
2179	type Err = ast::Error;
2180
2181	fn finish(self) -> Result<()> {
2182	Ok(())
2183	}
2184
2185	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2186	let span = match *ast {
2187	Ast::Empty(_)
2188	\| Ast::Flags(_)
2189	\| Ast::Literal(_)
2190	\| Ast::Dot(_)
2191	\| Ast::Assertion(_)
2192	\| Ast::Class(ast::Class::Unicode(_))
2193	\| Ast::Class(ast::Class::Perl(_)) => {
2194	// These are all base cases, so we don't increment depth.
2195	return Ok(());
2196	}
2197	Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2198	Ast::Repetition(ref x) => &x.span,
2199	Ast::Group(ref x) => &x.span,
2200	Ast::Alternation(ref x) => &x.span,
2201	Ast::Concat(ref x) => &x.span,
2202	};
2203	self.increment_depth(span)
2204	}
2205
2206	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2207	match *ast {
2208	Ast::Empty(_)
2209	\| Ast::Flags(_)
2210	\| Ast::Literal(_)
2211	\| Ast::Dot(_)
2212	\| Ast::Assertion(_)
2213	\| Ast::Class(ast::Class::Unicode(_))
2214	\| Ast::Class(ast::Class::Perl(_)) => {
2215	// These are all base cases, so we don't decrement depth.
2216	Ok(())
2217	}
2218	Ast::Class(ast::Class::Bracketed(_))
2219	\| Ast::Repetition(_)
2220	\| Ast::Group(_)
2221	\| Ast::Alternation(_)
2222	\| Ast::Concat(_) => {
2223	self.decrement_depth();
2224	Ok(())
2225	}
2226	}
2227	}
2228
2229	fn visit_class_set_item_pre(
2230	&mut self,
2231	ast: &ast::ClassSetItem,
2232	) -> Result<()> {
2233	let span = match *ast {
2234	ast::ClassSetItem::Empty(_)
2235	\| ast::ClassSetItem::Literal(_)
2236	\| ast::ClassSetItem::Range(_)
2237	\| ast::ClassSetItem::Ascii(_)
2238	\| ast::ClassSetItem::Unicode(_)
2239	\| ast::ClassSetItem::Perl(_) => {
2240	// These are all base cases, so we don't increment depth.
2241	return Ok(());
2242	}
2243	ast::ClassSetItem::Bracketed(ref x) => &x.span,
2244	ast::ClassSetItem::Union(ref x) => &x.span,
2245	};
2246	self.increment_depth(span)
2247	}
2248
2249	fn visit_class_set_item_post(
2250	&mut self,
2251	ast: &ast::ClassSetItem,
2252	) -> Result<()> {
2253	match *ast {
2254	ast::ClassSetItem::Empty(_)
2255	\| ast::ClassSetItem::Literal(_)
2256	\| ast::ClassSetItem::Range(_)
2257	\| ast::ClassSetItem::Ascii(_)
2258	\| ast::ClassSetItem::Unicode(_)
2259	\| ast::ClassSetItem::Perl(_) => {
2260	// These are all base cases, so we don't decrement depth.
2261	Ok(())
2262	}
2263	ast::ClassSetItem::Bracketed(_) \| ast::ClassSetItem::Union(_) => {
2264	self.decrement_depth();
2265	Ok(())
2266	}
2267	}
2268	}
2269
2270	fn visit_class_set_binary_op_pre(
2271	&mut self,
2272	ast: &ast::ClassSetBinaryOp,
2273	) -> Result<()> {
2274	self.increment_depth(&ast.span)
2275	}
2276
2277	fn visit_class_set_binary_op_post(
2278	&mut self,
2279	_ast: &ast::ClassSetBinaryOp,
2280	) -> Result<()> {
2281	self.decrement_depth();
2282	Ok(())
2283	}
2284	}
2285
2286	/// When the result is an error, transforms the ast::ErrorKind from the source
2287	/// Result into another one. This function is used to return clearer error
2288	/// messages when possible.
2289	fn specialize_err<T>(
2290	result: Result<T>,
2291	from: ast::ErrorKind,
2292	to: ast::ErrorKind,
2293	) -> Result<T> {
2294	if let Err(e: Error) = result {
2295	if e.kind == from {
2296	Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2297	} else {
2298	Err(e)
2299	}
2300	} else {
2301	result
2302	}
2303	}
2304
2305	#[cfg(test)]
2306	mod tests {
2307	use core::ops::Range;
2308
2309	use alloc::format;
2310
2311	use crate::ast::{self, Ast, Position, Span};
2312
2313	use super::*;
2314
2315	// Our own assert_eq, which has slightly better formatting (but honestly
2316	// still kind of crappy).
2317	macro_rules! assert_eq {
2318	($left:expr, $right:expr) => {{
2319	match (&$left, &$right) {
2320	(left_val, right_val) => {
2321	if !(left_val == right_val) {
2322	panic!(
2323	"assertion failed: `(left == right)``\n\n`\
2324	left: `{:?}``\n`right: `{:?}``\n\n`",
2325	left_val, right_val
2326	)
2327	}
2328	}
2329	}
2330	}};
2331	}
2332
2333	// We create these errors to compare with real ast::Errors in the tests.
2334	// We define equality between TestError and ast::Error to disregard the
2335	// pattern string in ast::Error, which is annoying to provide in tests.
2336	#[derive(Clone, Debug)]
2337	struct TestError {
2338	span: Span,
2339	kind: ast::ErrorKind,
2340	}
2341
2342	impl PartialEq<ast::Error> for TestError {
2343	fn eq(&self, other: &ast::Error) -> bool {
2344	self.span == other.span && self.kind == other.kind
2345	}
2346	}
2347
2348	impl PartialEq<TestError> for ast::Error {
2349	fn eq(&self, other: &TestError) -> bool {
2350	self.span == other.span && self.kind == other.kind
2351	}
2352	}
2353
2354	fn s(str: &str) -> String {
2355	str.to_string()
2356	}
2357
2358	fn parser(pattern: &str) -> ParserI<'_, Parser> {
2359	ParserI::new(Parser::new(), pattern)
2360	}
2361
2362	fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2363	let parser = ParserBuilder::new().octal(`true`).build();
2364	ParserI::new(parser, pattern)
2365	}
2366
2367	fn parser_nest_limit(
2368	pattern: &str,
2369	nest_limit: u32,
2370	) -> ParserI<'_, Parser> {
2371	let p = ParserBuilder::new().nest_limit(nest_limit).build();
2372	ParserI::new(p, pattern)
2373	}
2374
2375	fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2376	let p = ParserBuilder::new().ignore_whitespace(`true`).build();
2377	ParserI::new(p, pattern)
2378	}
2379
2380	/// Short alias for creating a new span.
2381	fn nspan(start: Position, end: Position) -> Span {
2382	Span::new(start, end)
2383	}
2384
2385	/// Short alias for creating a new position.
2386	fn npos(offset: usize, line: usize, column: usize) -> Position {
2387	Position::new(offset, line, column)
2388	}
2389
2390	/// Create a new span from the given offset range. This assumes a single
2391	/// line and sets the columns based on the offsets. i.e., This only works
2392	/// out of the box for ASCII, which is fine for most tests.
2393	fn span(range: Range<usize>) -> Span {
2394	let start = Position::new(range.start, `1`, range.start + `1`);
2395	let end = Position::new(range.end, `1`, range.end + `1`);
2396	Span::new(start, end)
2397	}
2398
2399	/// Create a new span for the corresponding byte range in the given string.
2400	fn span_range(subject: &str, range: Range<usize>) -> Span {
2401	let start = Position {
2402	offset: range.start,
2403	line: `1` + subject[..range.start].matches('`\n`').count(),
2404	column: `1` + subject[..range.start]
2405	.chars()
2406	.rev()
2407	.position(\|c\| c == '`\n`')
2408	.unwrap_or(subject[..range.start].chars().count()),
2409	};
2410	let end = Position {
2411	offset: range.end,
2412	line: `1` + subject[..range.end].matches('`\n`').count(),
2413	column: `1` + subject[..range.end]
2414	.chars()
2415	.rev()
2416	.position(\|c\| c == '`\n`')
2417	.unwrap_or(subject[..range.end].chars().count()),
2418	};
2419	Span::new(start, end)
2420	}
2421
2422	/// Create a verbatim literal starting at the given position.
2423	fn lit(c: char, start: usize) -> Ast {
2424	lit_with(c, span(start..start + c.len_utf8()))
2425	}
2426
2427	/// Create a meta literal starting at the given position.
2428	fn meta_lit(c: char, span: Span) -> Ast {
2429	Ast::Literal(ast::Literal { span, kind: ast::LiteralKind::Meta, c })
2430	}
2431
2432	/// Create a verbatim literal with the given span.
2433	fn lit_with(c: char, span: Span) -> Ast {
2434	Ast::Literal(ast::Literal {
2435	span,
2436	kind: ast::LiteralKind::Verbatim,
2437	c,
2438	})
2439	}
2440
2441	/// Create a concatenation with the given range.
2442	fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2443	concat_with(span(range), asts)
2444	}
2445
2446	/// Create a concatenation with the given span.
2447	fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2448	Ast::Concat(ast::Concat { span, asts })
2449	}
2450
2451	/// Create an alternation with the given span.
2452	fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2453	Ast::Alternation(ast::Alternation { span: span(range), asts })
2454	}
2455
2456	/// Create a capturing group with the given span.
2457	fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2458	Ast::Group(ast::Group {
2459	span: span(range),
2460	kind: ast::GroupKind::CaptureIndex(index),
2461	ast: Box::new(ast),
2462	})
2463	}
2464
2465	/// Create an ast::SetFlags.
2466	///
2467	/// The given pattern should be the full pattern string. The range given
2468	/// should correspond to the byte offsets where the flag set occurs.
2469	///
2470	/// If negated is true, then the set is interpreted as beginning with a
2471	/// negation.
2472	fn flag_set(
2473	pat: &str,
2474	range: Range<usize>,
2475	flag: ast::Flag,
2476	negated: bool,
2477	) -> Ast {
2478	let mut items = vec![ast::FlagsItem {
2479	span: span_range(pat, (range.end - `2`)..(range.end - `1`)),
2480	kind: ast::FlagsItemKind::Flag(flag),
2481	}];
2482	if negated {
2483	items.insert(
2484	`0`,
2485	ast::FlagsItem {
2486	span: span_range(pat, (range.start + `2`)..(range.end - `2`)),
2487	kind: ast::FlagsItemKind::Negation,
2488	},
2489	);
2490	}
2491	Ast::Flags(ast::SetFlags {
2492	span: span_range(pat, range.clone()),
2493	flags: ast::Flags {
2494	span: span_range(pat, (range.start + `2`)..(range.end - `1`)),
2495	items,
2496	},
2497	})
2498	}
2499
2500	#[test]
2501	fn parse_nest_limit() {
2502	// A nest limit of 0 still allows some types of regexes.
2503	assert_eq!(
2504	parser_nest_limit("", `0`).parse(),
2505	Ok(Ast::Empty(span(`0`..`0`)))
2506	);
2507	assert_eq!(parser_nest_limit("a", `0`).parse(), Ok(lit('a', `0`)));
2508
2509	// Test repetition operations, which require one level of nesting.
2510	assert_eq!(
2511	parser_nest_limit("a+", `0`).parse().unwrap_err(),
2512	TestError {
2513	span: span(`0`..`2`),
2514	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2515	}
2516	);
2517	assert_eq!(
2518	parser_nest_limit("a+", `1`).parse(),
2519	Ok(Ast::Repetition(ast::Repetition {
2520	span: span(`0`..`2`),
2521	op: ast::RepetitionOp {
2522	span: span(`1`..`2`),
2523	kind: ast::RepetitionKind::OneOrMore,
2524	},
2525	greedy: `true`,
2526	ast: Box::new(lit('a', `0`)),
2527	}))
2528	);
2529	assert_eq!(
2530	parser_nest_limit("(a)+", `1`).parse().unwrap_err(),
2531	TestError {
2532	span: span(`0`..`3`),
2533	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2534	}
2535	);
2536	assert_eq!(
2537	parser_nest_limit("a+*", `1`).parse().unwrap_err(),
2538	TestError {
2539	span: span(`0`..`2`),
2540	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2541	}
2542	);
2543	assert_eq!(
2544	parser_nest_limit("a+*", `2`).parse(),
2545	Ok(Ast::Repetition(ast::Repetition {
2546	span: span(`0`..`3`),
2547	op: ast::RepetitionOp {
2548	span: span(`2`..`3`),
2549	kind: ast::RepetitionKind::ZeroOrMore,
2550	},
2551	greedy: `true`,
2552	ast: Box::new(Ast::Repetition(ast::Repetition {
2553	span: span(`0`..`2`),
2554	op: ast::RepetitionOp {
2555	span: span(`1`..`2`),
2556	kind: ast::RepetitionKind::OneOrMore,
2557	},
2558	greedy: `true`,
2559	ast: Box::new(lit('a', `0`)),
2560	})),
2561	}))
2562	);
2563
2564	// Test concatenations. A concatenation requires one level of nesting.
2565	assert_eq!(
2566	parser_nest_limit("ab", `0`).parse().unwrap_err(),
2567	TestError {
2568	span: span(`0`..`2`),
2569	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2570	}
2571	);
2572	assert_eq!(
2573	parser_nest_limit("ab", `1`).parse(),
2574	Ok(concat(`0`..`2`, vec![lit('a', `0`), lit('b', `1`)]))
2575	);
2576	assert_eq!(
2577	parser_nest_limit("abc", `1`).parse(),
2578	Ok(concat(`0`..`3`, vec![lit('a', `0`), lit('b', `1`), lit('c', `2`)]))
2579	);
2580
2581	// Test alternations. An alternation requires one level of nesting.
2582	assert_eq!(
2583	parser_nest_limit("a\|b", `0`).parse().unwrap_err(),
2584	TestError {
2585	span: span(`0`..`3`),
2586	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2587	}
2588	);
2589	assert_eq!(
2590	parser_nest_limit("a\|b", `1`).parse(),
2591	Ok(alt(`0`..`3`, vec![lit('a', `0`), lit('b', `2`)]))
2592	);
2593	assert_eq!(
2594	parser_nest_limit("a\|b\|c", `1`).parse(),
2595	Ok(alt(`0`..`5`, vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)]))
2596	);
2597
2598	// Test character classes. Classes form their own mini-recursive
2599	// syntax!
2600	assert_eq!(
2601	parser_nest_limit("[a]", `0`).parse().unwrap_err(),
2602	TestError {
2603	span: span(`0`..`3`),
2604	kind: ast::ErrorKind::NestLimitExceeded(`0`),
2605	}
2606	);
2607	assert_eq!(
2608	parser_nest_limit("[a]", `1`).parse(),
2609	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2610	span: span(`0`..`3`),
2611	negated: `false`,
2612	kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2613	ast::Literal {
2614	span: span(`1`..`2`),
2615	kind: ast::LiteralKind::Verbatim,
2616	c: 'a',
2617	}
2618	)),
2619	})))
2620	);
2621	assert_eq!(
2622	parser_nest_limit("[ab]", `1`).parse().unwrap_err(),
2623	TestError {
2624	span: span(`1`..`3`),
2625	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2626	}
2627	);
2628	assert_eq!(
2629	parser_nest_limit("[ab[cd]]", `2`).parse().unwrap_err(),
2630	TestError {
2631	span: span(`3`..`7`),
2632	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2633	}
2634	);
2635	assert_eq!(
2636	parser_nest_limit("[ab[cd]]", `3`).parse().unwrap_err(),
2637	TestError {
2638	span: span(`4`..`6`),
2639	kind: ast::ErrorKind::NestLimitExceeded(`3`),
2640	}
2641	);
2642	assert_eq!(
2643	parser_nest_limit("[a--b]", `1`).parse().unwrap_err(),
2644	TestError {
2645	span: span(`1`..`5`),
2646	kind: ast::ErrorKind::NestLimitExceeded(`1`),
2647	}
2648	);
2649	assert_eq!(
2650	parser_nest_limit("[a--bc]", `2`).parse().unwrap_err(),
2651	TestError {
2652	span: span(`4`..`6`),
2653	kind: ast::ErrorKind::NestLimitExceeded(`2`),
2654	}
2655	);
2656	}
2657
2658	#[test]
2659	fn parse_comments() {
2660	let pat = "(?x)
2661	# This is comment 1.
2662	foo # This is comment 2.
2663	# This is comment 3.
2664	bar
2665	# This is comment 4.";
2666	let astc = parser(pat).parse_with_comments().unwrap();
2667	assert_eq!(
2668	astc.ast,
2669	concat_with(
2670	span_range(pat, `0`..pat.len()),
2671	vec![
2672	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2673	lit_with('f', span_range(pat, `26`..`27`)),
2674	lit_with('o', span_range(pat, `27`..`28`)),
2675	lit_with('o', span_range(pat, `28`..`29`)),
2676	lit_with('b', span_range(pat, `74`..`75`)),
2677	lit_with('a', span_range(pat, `75`..`76`)),
2678	lit_with('r', span_range(pat, `76`..`77`)),
2679	]
2680	)
2681	);
2682	assert_eq!(
2683	astc.comments,
2684	vec![
2685	ast::Comment {
2686	span: span_range(pat, `5`..`26`),
2687	comment: s(" This is comment 1."),
2688	},
2689	ast::Comment {
2690	span: span_range(pat, `30`..`51`),
2691	comment: s(" This is comment 2."),
2692	},
2693	ast::Comment {
2694	span: span_range(pat, `53`..`74`),
2695	comment: s(" This is comment 3."),
2696	},
2697	ast::Comment {
2698	span: span_range(pat, `78`..`98`),
2699	comment: s(" This is comment 4."),
2700	},
2701	]
2702	);
2703	}
2704
2705	#[test]
2706	fn parse_holistic() {
2707	assert_eq!(parser("]").parse(), Ok(lit(']', `0`)));
2708	assert_eq!(
2709	parser(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#\&\-\~").parse(),
2710	Ok(concat(
2711	`0`..`36`,
2712	vec![
2713	meta_lit('`\\`', span(`0`..`2`)),
2714	meta_lit('.', span(`2`..`4`)),
2715	meta_lit('+', span(`4`..`6`)),
2716	meta_lit('*', span(`6`..`8`)),
2717	meta_lit('?', span(`8`..`10`)),
2718	meta_lit('(', span(`10`..`12`)),
2719	meta_lit(')', span(`12`..`14`)),
2720	meta_lit('\|', span(`14`..`16`)),
2721	meta_lit('[', span(`16`..`18`)),
2722	meta_lit(']', span(`18`..`20`)),
2723	meta_lit('{', span(`20`..`22`)),
2724	meta_lit('}', span(`22`..`24`)),
2725	meta_lit('^', span(`24`..`26`)),
2726	meta_lit('$', span(`26`..`28`)),
2727	meta_lit('#', span(`28`..`30`)),
2728	meta_lit('&', span(`30`..`32`)),
2729	meta_lit('-', span(`32`..`34`)),
2730	meta_lit('~', span(`34`..`36`)),
2731	]
2732	))
2733	);
2734	}
2735
2736	#[test]
2737	fn parse_ignore_whitespace() {
2738	// Test that basic whitespace insensitivity works.
2739	let pat = "(?x)a b";
2740	assert_eq!(
2741	parser(pat).parse(),
2742	Ok(concat_with(
2743	nspan(npos(`0`, `1`, `1`), npos(`7`, `1`, `8`)),
2744	vec![
2745	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2746	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2747	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2748	]
2749	))
2750	);
2751
2752	// Test that we can toggle whitespace insensitivity.
2753	let pat = "(?x)a b(?-x)a b";
2754	assert_eq!(
2755	parser(pat).parse(),
2756	Ok(concat_with(
2757	nspan(npos(`0`, `1`, `1`), npos(`15`, `1`, `16`)),
2758	vec![
2759	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2760	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2761	lit_with('b', nspan(npos(`6`, `1`, `7`), npos(`7`, `1`, `8`))),
2762	flag_set(pat, `7`..`12`, ast::Flag::IgnoreWhitespace, `true`),
2763	lit_with('a', nspan(npos(`12`, `1`, `13`), npos(`13`, `1`, `14`))),
2764	lit_with(' ', nspan(npos(`13`, `1`, `14`), npos(`14`, `1`, `15`))),
2765	lit_with('b', nspan(npos(`14`, `1`, `15`), npos(`15`, `1`, `16`))),
2766	]
2767	))
2768	);
2769
2770	// Test that nesting whitespace insensitive flags works.
2771	let pat = "a (?x:a )a ";
2772	assert_eq!(
2773	parser(pat).parse(),
2774	Ok(concat_with(
2775	span_range(pat, `0`..`11`),
2776	vec![
2777	lit_with('a', span_range(pat, `0`..`1`)),
2778	lit_with(' ', span_range(pat, `1`..`2`)),
2779	Ast::Group(ast::Group {
2780	span: span_range(pat, `2`..`9`),
2781	kind: ast::GroupKind::NonCapturing(ast::Flags {
2782	span: span_range(pat, `4`..`5`),
2783	items: vec![ast::FlagsItem {
2784	span: span_range(pat, `4`..`5`),
2785	kind: ast::FlagsItemKind::Flag(
2786	ast::Flag::IgnoreWhitespace
2787	),
2788	},],
2789	}),
2790	ast: Box::new(lit_with('a', span_range(pat, `6`..`7`))),
2791	}),
2792	lit_with('a', span_range(pat, `9`..`10`)),
2793	lit_with(' ', span_range(pat, `10`..`11`)),
2794	]
2795	))
2796	);
2797
2798	// Test that whitespace after an opening paren is insignificant.
2799	let pat = "(?x)( ?P<foo> a )";
2800	assert_eq!(
2801	parser(pat).parse(),
2802	Ok(concat_with(
2803	span_range(pat, `0`..pat.len()),
2804	vec![
2805	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2806	Ast::Group(ast::Group {
2807	span: span_range(pat, `4`..pat.len()),
2808	kind: ast::GroupKind::CaptureName {
2809	starts_with_p: `true`,
2810	name: ast::CaptureName {
2811	span: span_range(pat, `9`..`12`),
2812	name: s("foo"),
2813	index: `1`,
2814	}
2815	},
2816	ast: Box::new(lit_with('a', span_range(pat, `14`..`15`))),
2817	}),
2818	]
2819	))
2820	);
2821	let pat = "(?x)( a )";
2822	assert_eq!(
2823	parser(pat).parse(),
2824	Ok(concat_with(
2825	span_range(pat, `0`..pat.len()),
2826	vec![
2827	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2828	Ast::Group(ast::Group {
2829	span: span_range(pat, `4`..pat.len()),
2830	kind: ast::GroupKind::CaptureIndex(`1`),
2831	ast: Box::new(lit_with('a', span_range(pat, `7`..`8`))),
2832	}),
2833	]
2834	))
2835	);
2836	let pat = "(?x)( ?: a )";
2837	assert_eq!(
2838	parser(pat).parse(),
2839	Ok(concat_with(
2840	span_range(pat, `0`..pat.len()),
2841	vec![
2842	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2843	Ast::Group(ast::Group {
2844	span: span_range(pat, `4`..pat.len()),
2845	kind: ast::GroupKind::NonCapturing(ast::Flags {
2846	span: span_range(pat, `8`..`8`),
2847	items: vec![],
2848	}),
2849	ast: Box::new(lit_with('a', span_range(pat, `11`..`12`))),
2850	}),
2851	]
2852	))
2853	);
2854	let pat = r"(?x)\x { 53 }";
2855	assert_eq!(
2856	parser(pat).parse(),
2857	Ok(concat_with(
2858	span_range(pat, `0`..pat.len()),
2859	vec![
2860	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2861	Ast::Literal(ast::Literal {
2862	span: span(`4`..`13`),
2863	kind: ast::LiteralKind::HexBrace(
2864	ast::HexLiteralKind::X
2865	),
2866	c: 'S',
2867	}),
2868	]
2869	))
2870	);
2871
2872	// Test that whitespace after an escape is OK.
2873	let pat = r"(?x)\ ";
2874	assert_eq!(
2875	parser(pat).parse(),
2876	Ok(concat_with(
2877	span_range(pat, `0`..pat.len()),
2878	vec![
2879	flag_set(pat, `0`..`4`, ast::Flag::IgnoreWhitespace, `false`),
2880	Ast::Literal(ast::Literal {
2881	span: span_range(pat, `4`..`6`),
2882	kind: ast::LiteralKind::Superfluous,
2883	c: ' ',
2884	}),
2885	]
2886	))
2887	);
2888	}
2889
2890	#[test]
2891	fn parse_newlines() {
2892	let pat = ".`\n`.";
2893	assert_eq!(
2894	parser(pat).parse(),
2895	Ok(concat_with(
2896	span_range(pat, `0`..`3`),
2897	vec![
2898	Ast::Dot(span_range(pat, `0`..`1`)),
2899	lit_with('`\n`', span_range(pat, `1`..`2`)),
2900	Ast::Dot(span_range(pat, `2`..`3`)),
2901	]
2902	))
2903	);
2904
2905	let pat = "foobar`\n`baz`\n`quux`\n`";
2906	assert_eq!(
2907	parser(pat).parse(),
2908	Ok(concat_with(
2909	span_range(pat, `0`..pat.len()),
2910	vec![
2911	lit_with('f', nspan(npos(`0`, `1`, `1`), npos(`1`, `1`, `2`))),
2912	lit_with('o', nspan(npos(`1`, `1`, `2`), npos(`2`, `1`, `3`))),
2913	lit_with('o', nspan(npos(`2`, `1`, `3`), npos(`3`, `1`, `4`))),
2914	lit_with('b', nspan(npos(`3`, `1`, `4`), npos(`4`, `1`, `5`))),
2915	lit_with('a', nspan(npos(`4`, `1`, `5`), npos(`5`, `1`, `6`))),
2916	lit_with('r', nspan(npos(`5`, `1`, `6`), npos(`6`, `1`, `7`))),
2917	lit_with('`\n`', nspan(npos(`6`, `1`, `7`), npos(`7`, `2`, `1`))),
2918	lit_with('b', nspan(npos(`7`, `2`, `1`), npos(`8`, `2`, `2`))),
2919	lit_with('a', nspan(npos(`8`, `2`, `2`), npos(`9`, `2`, `3`))),
2920	lit_with('z', nspan(npos(`9`, `2`, `3`), npos(`10`, `2`, `4`))),
2921	lit_with('`\n`', nspan(npos(`10`, `2`, `4`), npos(`11`, `3`, `1`))),
2922	lit_with('q', nspan(npos(`11`, `3`, `1`), npos(`12`, `3`, `2`))),
2923	lit_with('u', nspan(npos(`12`, `3`, `2`), npos(`13`, `3`, `3`))),
2924	lit_with('u', nspan(npos(`13`, `3`, `3`), npos(`14`, `3`, `4`))),
2925	lit_with('x', nspan(npos(`14`, `3`, `4`), npos(`15`, `3`, `5`))),
2926	lit_with('`\n`', nspan(npos(`15`, `3`, `5`), npos(`16`, `4`, `1`))),
2927	]
2928	))
2929	);
2930	}
2931
2932	#[test]
2933	fn parse_uncounted_repetition() {
2934	assert_eq!(
2935	parser(r"a*").parse(),
2936	Ok(Ast::Repetition(ast::Repetition {
2937	span: span(`0`..`2`),
2938	op: ast::RepetitionOp {
2939	span: span(`1`..`2`),
2940	kind: ast::RepetitionKind::ZeroOrMore,
2941	},
2942	greedy: `true`,
2943	ast: Box::new(lit('a', `0`)),
2944	}))
2945	);
2946	assert_eq!(
2947	parser(r"a+").parse(),
2948	Ok(Ast::Repetition(ast::Repetition {
2949	span: span(`0`..`2`),
2950	op: ast::RepetitionOp {
2951	span: span(`1`..`2`),
2952	kind: ast::RepetitionKind::OneOrMore,
2953	},
2954	greedy: `true`,
2955	ast: Box::new(lit('a', `0`)),
2956	}))
2957	);
2958
2959	assert_eq!(
2960	parser(r"a?").parse(),
2961	Ok(Ast::Repetition(ast::Repetition {
2962	span: span(`0`..`2`),
2963	op: ast::RepetitionOp {
2964	span: span(`1`..`2`),
2965	kind: ast::RepetitionKind::ZeroOrOne,
2966	},
2967	greedy: `true`,
2968	ast: Box::new(lit('a', `0`)),
2969	}))
2970	);
2971	assert_eq!(
2972	parser(r"a??").parse(),
2973	Ok(Ast::Repetition(ast::Repetition {
2974	span: span(`0`..`3`),
2975	op: ast::RepetitionOp {
2976	span: span(`1`..`3`),
2977	kind: ast::RepetitionKind::ZeroOrOne,
2978	},
2979	greedy: `false`,
2980	ast: Box::new(lit('a', `0`)),
2981	}))
2982	);
2983	assert_eq!(
2984	parser(r"a?").parse(),
2985	Ok(Ast::Repetition(ast::Repetition {
2986	span: span(`0`..`2`),
2987	op: ast::RepetitionOp {
2988	span: span(`1`..`2`),
2989	kind: ast::RepetitionKind::ZeroOrOne,
2990	},
2991	greedy: `true`,
2992	ast: Box::new(lit('a', `0`)),
2993	}))
2994	);
2995	assert_eq!(
2996	parser(r"a?b").parse(),
2997	Ok(concat(
2998	`0`..`3`,
2999	vec![
3000	Ast::Repetition(ast::Repetition {
3001	span: span(`0`..`2`),
3002	op: ast::RepetitionOp {
3003	span: span(`1`..`2`),
3004	kind: ast::RepetitionKind::ZeroOrOne,
3005	},
3006	greedy: `true`,
3007	ast: Box::new(lit('a', `0`)),
3008	}),
3009	lit('b', `2`),
3010	]
3011	))
3012	);
3013	assert_eq!(
3014	parser(r"a??b").parse(),
3015	Ok(concat(
3016	`0`..`4`,
3017	vec![
3018	Ast::Repetition(ast::Repetition {
3019	span: span(`0`..`3`),
3020	op: ast::RepetitionOp {
3021	span: span(`1`..`3`),
3022	kind: ast::RepetitionKind::ZeroOrOne,
3023	},
3024	greedy: `false`,
3025	ast: Box::new(lit('a', `0`)),
3026	}),
3027	lit('b', `3`),
3028	]
3029	))
3030	);
3031	assert_eq!(
3032	parser(r"ab?").parse(),
3033	Ok(concat(
3034	`0`..`3`,
3035	vec![
3036	lit('a', `0`),
3037	Ast::Repetition(ast::Repetition {
3038	span: span(`1`..`3`),
3039	op: ast::RepetitionOp {
3040	span: span(`2`..`3`),
3041	kind: ast::RepetitionKind::ZeroOrOne,
3042	},
3043	greedy: `true`,
3044	ast: Box::new(lit('b', `1`)),
3045	}),
3046	]
3047	))
3048	);
3049	assert_eq!(
3050	parser(r"(ab)?").parse(),
3051	Ok(Ast::Repetition(ast::Repetition {
3052	span: span(`0`..`5`),
3053	op: ast::RepetitionOp {
3054	span: span(`4`..`5`),
3055	kind: ast::RepetitionKind::ZeroOrOne,
3056	},
3057	greedy: `true`,
3058	ast: Box::new(group(
3059	`0`..`4`,
3060	`1`,
3061	concat(`1`..`3`, vec![lit('a', `1`), lit('b', `2`),])
3062	)),
3063	}))
3064	);
3065	assert_eq!(
3066	parser(r"\|a?").parse(),
3067	Ok(alt(
3068	`0`..`3`,
3069	vec![
3070	Ast::Empty(span(`0`..`0`)),
3071	Ast::Repetition(ast::Repetition {
3072	span: span(`1`..`3`),
3073	op: ast::RepetitionOp {
3074	span: span(`2`..`3`),
3075	kind: ast::RepetitionKind::ZeroOrOne,
3076	},
3077	greedy: `true`,
3078	ast: Box::new(lit('a', `1`)),
3079	}),
3080	]
3081	))
3082	);
3083
3084	assert_eq!(
3085	parser(r"*").parse().unwrap_err(),
3086	TestError {
3087	span: span(`0`..`0`),
3088	kind: ast::ErrorKind::RepetitionMissing,
3089	}
3090	);
3091	assert_eq!(
3092	parser(r"(?i)*").parse().unwrap_err(),
3093	TestError {
3094	span: span(`4`..`4`),
3095	kind: ast::ErrorKind::RepetitionMissing,
3096	}
3097	);
3098	assert_eq!(
3099	parser(r"(*)").parse().unwrap_err(),
3100	TestError {
3101	span: span(`1`..`1`),
3102	kind: ast::ErrorKind::RepetitionMissing,
3103	}
3104	);
3105	assert_eq!(
3106	parser(r"(?:?)").parse().unwrap_err(),
3107	TestError {
3108	span: span(`3`..`3`),
3109	kind: ast::ErrorKind::RepetitionMissing,
3110	}
3111	);
3112	assert_eq!(
3113	parser(r"+").parse().unwrap_err(),
3114	TestError {
3115	span: span(`0`..`0`),
3116	kind: ast::ErrorKind::RepetitionMissing,
3117	}
3118	);
3119	assert_eq!(
3120	parser(r"?").parse().unwrap_err(),
3121	TestError {
3122	span: span(`0`..`0`),
3123	kind: ast::ErrorKind::RepetitionMissing,
3124	}
3125	);
3126	assert_eq!(
3127	parser(r"(?)").parse().unwrap_err(),
3128	TestError {
3129	span: span(`1`..`1`),
3130	kind: ast::ErrorKind::RepetitionMissing,
3131	}
3132	);
3133	assert_eq!(
3134	parser(r"\|*").parse().unwrap_err(),
3135	TestError {
3136	span: span(`1`..`1`),
3137	kind: ast::ErrorKind::RepetitionMissing,
3138	}
3139	);
3140	assert_eq!(
3141	parser(r"\|+").parse().unwrap_err(),
3142	TestError {
3143	span: span(`1`..`1`),
3144	kind: ast::ErrorKind::RepetitionMissing,
3145	}
3146	);
3147	assert_eq!(
3148	parser(r"\|?").parse().unwrap_err(),
3149	TestError {
3150	span: span(`1`..`1`),
3151	kind: ast::ErrorKind::RepetitionMissing,
3152	}
3153	);
3154	}
3155
3156	#[test]
3157	fn parse_counted_repetition() {
3158	assert_eq!(
3159	parser(r"a{5}").parse(),
3160	Ok(Ast::Repetition(ast::Repetition {
3161	span: span(`0`..`4`),
3162	op: ast::RepetitionOp {
3163	span: span(`1`..`4`),
3164	kind: ast::RepetitionKind::Range(
3165	ast::RepetitionRange::Exactly(`5`)
3166	),
3167	},
3168	greedy: `true`,
3169	ast: Box::new(lit('a', `0`)),
3170	}))
3171	);
3172	assert_eq!(
3173	parser(r"a{5,}").parse(),
3174	Ok(Ast::Repetition(ast::Repetition {
3175	span: span(`0`..`5`),
3176	op: ast::RepetitionOp {
3177	span: span(`1`..`5`),
3178	kind: ast::RepetitionKind::Range(
3179	ast::RepetitionRange::AtLeast(`5`)
3180	),
3181	},
3182	greedy: `true`,
3183	ast: Box::new(lit('a', `0`)),
3184	}))
3185	);
3186	assert_eq!(
3187	parser(r"a{5,9}").parse(),
3188	Ok(Ast::Repetition(ast::Repetition {
3189	span: span(`0`..`6`),
3190	op: ast::RepetitionOp {
3191	span: span(`1`..`6`),
3192	kind: ast::RepetitionKind::Range(
3193	ast::RepetitionRange::Bounded(`5`, `9`)
3194	),
3195	},
3196	greedy: `true`,
3197	ast: Box::new(lit('a', `0`)),
3198	}))
3199	);
3200	assert_eq!(
3201	parser(r"a{5}?").parse(),
3202	Ok(Ast::Repetition(ast::Repetition {
3203	span: span(`0`..`5`),
3204	op: ast::RepetitionOp {
3205	span: span(`1`..`5`),
3206	kind: ast::RepetitionKind::Range(
3207	ast::RepetitionRange::Exactly(`5`)
3208	),
3209	},
3210	greedy: `false`,
3211	ast: Box::new(lit('a', `0`)),
3212	}))
3213	);
3214	assert_eq!(
3215	parser(r"ab{5}").parse(),
3216	Ok(concat(
3217	`0`..`5`,
3218	vec![
3219	lit('a', `0`),
3220	Ast::Repetition(ast::Repetition {
3221	span: span(`1`..`5`),
3222	op: ast::RepetitionOp {
3223	span: span(`2`..`5`),
3224	kind: ast::RepetitionKind::Range(
3225	ast::RepetitionRange::Exactly(`5`)
3226	),
3227	},
3228	greedy: `true`,
3229	ast: Box::new(lit('b', `1`)),
3230	}),
3231	]
3232	))
3233	);
3234	assert_eq!(
3235	parser(r"ab{5}c").parse(),
3236	Ok(concat(
3237	`0`..`6`,
3238	vec![
3239	lit('a', `0`),
3240	Ast::Repetition(ast::Repetition {
3241	span: span(`1`..`5`),
3242	op: ast::RepetitionOp {
3243	span: span(`2`..`5`),
3244	kind: ast::RepetitionKind::Range(
3245	ast::RepetitionRange::Exactly(`5`)
3246	),
3247	},
3248	greedy: `true`,
3249	ast: Box::new(lit('b', `1`)),
3250	}),
3251	lit('c', `5`),
3252	]
3253	))
3254	);
3255
3256	assert_eq!(
3257	parser(r"a{ 5 }").parse(),
3258	Ok(Ast::Repetition(ast::Repetition {
3259	span: span(`0`..`6`),
3260	op: ast::RepetitionOp {
3261	span: span(`1`..`6`),
3262	kind: ast::RepetitionKind::Range(
3263	ast::RepetitionRange::Exactly(`5`)
3264	),
3265	},
3266	greedy: `true`,
3267	ast: Box::new(lit('a', `0`)),
3268	}))
3269	);
3270	assert_eq!(
3271	parser(r"a{ 5 , 9 }").parse(),
3272	Ok(Ast::Repetition(ast::Repetition {
3273	span: span(`0`..`10`),
3274	op: ast::RepetitionOp {
3275	span: span(`1`..`10`),
3276	kind: ast::RepetitionKind::Range(
3277	ast::RepetitionRange::Bounded(`5`, `9`)
3278	),
3279	},
3280	greedy: `true`,
3281	ast: Box::new(lit('a', `0`)),
3282	}))
3283	);
3284	assert_eq!(
3285	parser_ignore_whitespace(r"a{5,9} ?").parse(),
3286	Ok(Ast::Repetition(ast::Repetition {
3287	span: span(`0`..`8`),
3288	op: ast::RepetitionOp {
3289	span: span(`1`..`8`),
3290	kind: ast::RepetitionKind::Range(
3291	ast::RepetitionRange::Bounded(`5`, `9`)
3292	),
3293	},
3294	greedy: `false`,
3295	ast: Box::new(lit('a', `0`)),
3296	}))
3297	);
3298
3299	assert_eq!(
3300	parser(r"(?i){0}").parse().unwrap_err(),
3301	TestError {
3302	span: span(`4`..`4`),
3303	kind: ast::ErrorKind::RepetitionMissing,
3304	}
3305	);
3306	assert_eq!(
3307	parser(r"(?m){1,1}").parse().unwrap_err(),
3308	TestError {
3309	span: span(`4`..`4`),
3310	kind: ast::ErrorKind::RepetitionMissing,
3311	}
3312	);
3313	assert_eq!(
3314	parser(r"a{]}").parse().unwrap_err(),
3315	TestError {
3316	span: span(`2`..`2`),
3317	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3318	}
3319	);
3320	assert_eq!(
3321	parser(r"a{1,]}").parse().unwrap_err(),
3322	TestError {
3323	span: span(`4`..`4`),
3324	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3325	}
3326	);
3327	assert_eq!(
3328	parser(r"a{").parse().unwrap_err(),
3329	TestError {
3330	span: span(`1`..`2`),
3331	kind: ast::ErrorKind::RepetitionCountUnclosed,
3332	}
3333	);
3334	assert_eq!(
3335	parser(r"a{}").parse().unwrap_err(),
3336	TestError {
3337	span: span(`2`..`2`),
3338	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3339	}
3340	);
3341	assert_eq!(
3342	parser(r"a{a").parse().unwrap_err(),
3343	TestError {
3344	span: span(`2`..`2`),
3345	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3346	}
3347	);
3348	assert_eq!(
3349	parser(r"a{9999999999}").parse().unwrap_err(),
3350	TestError {
3351	span: span(`2`..`12`),
3352	kind: ast::ErrorKind::DecimalInvalid,
3353	}
3354	);
3355	assert_eq!(
3356	parser(r"a{9").parse().unwrap_err(),
3357	TestError {
3358	span: span(`1`..`3`),
3359	kind: ast::ErrorKind::RepetitionCountUnclosed,
3360	}
3361	);
3362	assert_eq!(
3363	parser(r"a{9,a").parse().unwrap_err(),
3364	TestError {
3365	span: span(`4`..`4`),
3366	kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3367	}
3368	);
3369	assert_eq!(
3370	parser(r"a{9,9999999999}").parse().unwrap_err(),
3371	TestError {
3372	span: span(`4`..`14`),
3373	kind: ast::ErrorKind::DecimalInvalid,
3374	}
3375	);
3376	assert_eq!(
3377	parser(r"a{9,").parse().unwrap_err(),
3378	TestError {
3379	span: span(`1`..`4`),
3380	kind: ast::ErrorKind::RepetitionCountUnclosed,
3381	}
3382	);
3383	assert_eq!(
3384	parser(r"a{9,11").parse().unwrap_err(),
3385	TestError {
3386	span: span(`1`..`6`),
3387	kind: ast::ErrorKind::RepetitionCountUnclosed,
3388	}
3389	);
3390	assert_eq!(
3391	parser(r"a{2,1}").parse().unwrap_err(),
3392	TestError {
3393	span: span(`1`..`6`),
3394	kind: ast::ErrorKind::RepetitionCountInvalid,
3395	}
3396	);
3397	assert_eq!(
3398	parser(r"{5}").parse().unwrap_err(),
3399	TestError {
3400	span: span(`0`..`0`),
3401	kind: ast::ErrorKind::RepetitionMissing,
3402	}
3403	);
3404	assert_eq!(
3405	parser(r"\|{5}").parse().unwrap_err(),
3406	TestError {
3407	span: span(`1`..`1`),
3408	kind: ast::ErrorKind::RepetitionMissing,
3409	}
3410	);
3411	}
3412
3413	#[test]
3414	fn parse_alternate() {
3415	assert_eq!(
3416	parser(r"a\|b").parse(),
3417	Ok(Ast::Alternation(ast::Alternation {
3418	span: span(`0`..`3`),
3419	asts: vec![lit('a', `0`), lit('b', `2`)],
3420	}))
3421	);
3422	assert_eq!(
3423	parser(r"(a\|b)").parse(),
3424	Ok(group(
3425	`0`..`5`,
3426	`1`,
3427	Ast::Alternation(ast::Alternation {
3428	span: span(`1`..`4`),
3429	asts: vec![lit('a', `1`), lit('b', `3`)],
3430	})
3431	))
3432	);
3433
3434	assert_eq!(
3435	parser(r"a\|b\|c").parse(),
3436	Ok(Ast::Alternation(ast::Alternation {
3437	span: span(`0`..`5`),
3438	asts: vec![lit('a', `0`), lit('b', `2`), lit('c', `4`)],
3439	}))
3440	);
3441	assert_eq!(
3442	parser(r"ax\|by\|cz").parse(),
3443	Ok(Ast::Alternation(ast::Alternation {
3444	span: span(`0`..`8`),
3445	asts: vec![
3446	concat(`0`..`2`, vec![lit('a', `0`), lit('x', `1`)]),
3447	concat(`3`..`5`, vec![lit('b', `3`), lit('y', `4`)]),
3448	concat(`6`..`8`, vec![lit('c', `6`), lit('z', `7`)]),
3449	],
3450	}))
3451	);
3452	assert_eq!(
3453	parser(r"(ax\|by\|cz)").parse(),
3454	Ok(group(
3455	`0`..`10`,
3456	`1`,
3457	Ast::Alternation(ast::Alternation {
3458	span: span(`1`..`9`),
3459	asts: vec![
3460	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3461	concat(`4`..`6`, vec![lit('b', `4`), lit('y', `5`)]),
3462	concat(`7`..`9`, vec![lit('c', `7`), lit('z', `8`)]),
3463	],
3464	})
3465	))
3466	);
3467	assert_eq!(
3468	parser(r"(ax\|(by\|(cz)))").parse(),
3469	Ok(group(
3470	`0`..`14`,
3471	`1`,
3472	alt(
3473	`1`..`13`,
3474	vec![
3475	concat(`1`..`3`, vec![lit('a', `1`), lit('x', `2`)]),
3476	group(
3477	`4`..`13`,
3478	`2`,
3479	alt(
3480	`5`..`12`,
3481	vec![
3482	concat(
3483	`5`..`7`,
3484	vec![lit('b', `5`), lit('y', `6`)]
3485	),
3486	group(
3487	`8`..`12`,
3488	`3`,
3489	concat(
3490	`9`..`11`,
3491	vec![lit('c', `9`), lit('z', `10`),]
3492	)
3493	),
3494	]
3495	)
3496	),
3497	]
3498	)
3499	))
3500	);
3501
3502	assert_eq!(
3503	parser(r"\|").parse(),
3504	Ok(alt(
3505	`0`..`1`,
3506	vec![Ast::Empty(span(`0`..`0`)), Ast::Empty(span(`1`..`1`)),]
3507	))
3508	);
3509	assert_eq!(
3510	parser(r"\|\|").parse(),
3511	Ok(alt(
3512	`0`..`2`,
3513	vec![
3514	Ast::Empty(span(`0`..`0`)),
3515	Ast::Empty(span(`1`..`1`)),
3516	Ast::Empty(span(`2`..`2`)),
3517	]
3518	))
3519	);
3520	assert_eq!(
3521	parser(r"a\|").parse(),
3522	Ok(alt(`0`..`2`, vec![lit('a', `0`), Ast::Empty(span(`2`..`2`)),]))
3523	);
3524	assert_eq!(
3525	parser(r"\|a").parse(),
3526	Ok(alt(`0`..`2`, vec![Ast::Empty(span(`0`..`0`)), lit('a', `1`),]))
3527	);
3528
3529	assert_eq!(
3530	parser(r"(\|)").parse(),
3531	Ok(group(
3532	`0`..`3`,
3533	`1`,
3534	alt(
3535	`1`..`2`,
3536	vec![Ast::Empty(span(`1`..`1`)), Ast::Empty(span(`2`..`2`)),]
3537	)
3538	))
3539	);
3540	assert_eq!(
3541	parser(r"(a\|)").parse(),
3542	Ok(group(
3543	`0`..`4`,
3544	`1`,
3545	alt(`1`..`3`, vec![lit('a', `1`), Ast::Empty(span(`3`..`3`)),])
3546	))
3547	);
3548	assert_eq!(
3549	parser(r"(\|a)").parse(),
3550	Ok(group(
3551	`0`..`4`,
3552	`1`,
3553	alt(`1`..`3`, vec![Ast::Empty(span(`1`..`1`)), lit('a', `2`),])
3554	))
3555	);
3556
3557	assert_eq!(
3558	parser(r"a\|b)").parse().unwrap_err(),
3559	TestError {
3560	span: span(`3`..`4`),
3561	kind: ast::ErrorKind::GroupUnopened,
3562	}
3563	);
3564	assert_eq!(
3565	parser(r"(a\|b").parse().unwrap_err(),
3566	TestError {
3567	span: span(`0`..`1`),
3568	kind: ast::ErrorKind::GroupUnclosed,
3569	}
3570	);
3571	}
3572
3573	#[test]
3574	fn parse_unsupported_lookaround() {
3575	assert_eq!(
3576	parser(r"(?=a)").parse().unwrap_err(),
3577	TestError {
3578	span: span(`0`..`3`),
3579	kind: ast::ErrorKind::UnsupportedLookAround,
3580	}
3581	);
3582	assert_eq!(
3583	parser(r"(?!a)").parse().unwrap_err(),
3584	TestError {
3585	span: span(`0`..`3`),
3586	kind: ast::ErrorKind::UnsupportedLookAround,
3587	}
3588	);
3589	assert_eq!(
3590	parser(r"(?<=a)").parse().unwrap_err(),
3591	TestError {
3592	span: span(`0`..`4`),
3593	kind: ast::ErrorKind::UnsupportedLookAround,
3594	}
3595	);
3596	assert_eq!(
3597	parser(r"(?<!a)").parse().unwrap_err(),
3598	TestError {
3599	span: span(`0`..`4`),
3600	kind: ast::ErrorKind::UnsupportedLookAround,
3601	}
3602	);
3603	}
3604
3605	#[test]
3606	fn parse_group() {
3607	assert_eq!(
3608	parser("(?i)").parse(),
3609	Ok(Ast::Flags(ast::SetFlags {
3610	span: span(`0`..`4`),
3611	flags: ast::Flags {
3612	span: span(`2`..`3`),
3613	items: vec![ast::FlagsItem {
3614	span: span(`2`..`3`),
3615	kind: ast::FlagsItemKind::Flag(
3616	ast::Flag::CaseInsensitive
3617	),
3618	}],
3619	},
3620	}))
3621	);
3622	assert_eq!(
3623	parser("(?iU)").parse(),
3624	Ok(Ast::Flags(ast::SetFlags {
3625	span: span(`0`..`5`),
3626	flags: ast::Flags {
3627	span: span(`2`..`4`),
3628	items: vec![
3629	ast::FlagsItem {
3630	span: span(`2`..`3`),
3631	kind: ast::FlagsItemKind::Flag(
3632	ast::Flag::CaseInsensitive
3633	),
3634	},
3635	ast::FlagsItem {
3636	span: span(`3`..`4`),
3637	kind: ast::FlagsItemKind::Flag(
3638	ast::Flag::SwapGreed
3639	),
3640	},
3641	],
3642	},
3643	}))
3644	);
3645	assert_eq!(
3646	parser("(?i-U)").parse(),
3647	Ok(Ast::Flags(ast::SetFlags {
3648	span: span(`0`..`6`),
3649	flags: ast::Flags {
3650	span: span(`2`..`5`),
3651	items: vec![
3652	ast::FlagsItem {
3653	span: span(`2`..`3`),
3654	kind: ast::FlagsItemKind::Flag(
3655	ast::Flag::CaseInsensitive
3656	),
3657	},
3658	ast::FlagsItem {
3659	span: span(`3`..`4`),
3660	kind: ast::FlagsItemKind::Negation,
3661	},
3662	ast::FlagsItem {
3663	span: span(`4`..`5`),
3664	kind: ast::FlagsItemKind::Flag(
3665	ast::Flag::SwapGreed
3666	),
3667	},
3668	],
3669	},
3670	}))
3671	);
3672
3673	assert_eq!(
3674	parser("()").parse(),
3675	Ok(Ast::Group(ast::Group {
3676	span: span(`0`..`2`),
3677	kind: ast::GroupKind::CaptureIndex(`1`),
3678	ast: Box::new(Ast::Empty(span(`1`..`1`))),
3679	}))
3680	);
3681	assert_eq!(
3682	parser("(a)").parse(),
3683	Ok(Ast::Group(ast::Group {
3684	span: span(`0`..`3`),
3685	kind: ast::GroupKind::CaptureIndex(`1`),
3686	ast: Box::new(lit('a', `1`)),
3687	}))
3688	);
3689	assert_eq!(
3690	parser("(())").parse(),
3691	Ok(Ast::Group(ast::Group {
3692	span: span(`0`..`4`),
3693	kind: ast::GroupKind::CaptureIndex(`1`),
3694	ast: Box::new(Ast::Group(ast::Group {
3695	span: span(`1`..`3`),
3696	kind: ast::GroupKind::CaptureIndex(`2`),
3697	ast: Box::new(Ast::Empty(span(`2`..`2`))),
3698	})),
3699	}))
3700	);
3701
3702	assert_eq!(
3703	parser("(?:a)").parse(),
3704	Ok(Ast::Group(ast::Group {
3705	span: span(`0`..`5`),
3706	kind: ast::GroupKind::NonCapturing(ast::Flags {
3707	span: span(`2`..`2`),
3708	items: vec![],
3709	}),
3710	ast: Box::new(lit('a', `3`)),
3711	}))
3712	);
3713
3714	assert_eq!(
3715	parser("(?i:a)").parse(),
3716	Ok(Ast::Group(ast::Group {
3717	span: span(`0`..`6`),
3718	kind: ast::GroupKind::NonCapturing(ast::Flags {
3719	span: span(`2`..`3`),
3720	items: vec![ast::FlagsItem {
3721	span: span(`2`..`3`),
3722	kind: ast::FlagsItemKind::Flag(
3723	ast::Flag::CaseInsensitive
3724	),
3725	},],
3726	}),
3727	ast: Box::new(lit('a', `4`)),
3728	}))
3729	);
3730	assert_eq!(
3731	parser("(?i-U:a)").parse(),
3732	Ok(Ast::Group(ast::Group {
3733	span: span(`0`..`8`),
3734	kind: ast::GroupKind::NonCapturing(ast::Flags {
3735	span: span(`2`..`5`),
3736	items: vec![
3737	ast::FlagsItem {
3738	span: span(`2`..`3`),
3739	kind: ast::FlagsItemKind::Flag(
3740	ast::Flag::CaseInsensitive
3741	),
3742	},
3743	ast::FlagsItem {
3744	span: span(`3`..`4`),
3745	kind: ast::FlagsItemKind::Negation,
3746	},
3747	ast::FlagsItem {
3748	span: span(`4`..`5`),
3749	kind: ast::FlagsItemKind::Flag(
3750	ast::Flag::SwapGreed
3751	),
3752	},
3753	],
3754	}),
3755	ast: Box::new(lit('a', `6`)),
3756	}))
3757	);
3758
3759	assert_eq!(
3760	parser("(").parse().unwrap_err(),
3761	TestError {
3762	span: span(`0`..`1`),
3763	kind: ast::ErrorKind::GroupUnclosed,
3764	}
3765	);
3766	assert_eq!(
3767	parser("(?").parse().unwrap_err(),
3768	TestError {
3769	span: span(`0`..`1`),
3770	kind: ast::ErrorKind::GroupUnclosed,
3771	}
3772	);
3773	assert_eq!(
3774	parser("(?P").parse().unwrap_err(),
3775	TestError {
3776	span: span(`2`..`3`),
3777	kind: ast::ErrorKind::FlagUnrecognized,
3778	}
3779	);
3780	assert_eq!(
3781	parser("(?P<").parse().unwrap_err(),
3782	TestError {
3783	span: span(`4`..`4`),
3784	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3785	}
3786	);
3787	assert_eq!(
3788	parser("(a").parse().unwrap_err(),
3789	TestError {
3790	span: span(`0`..`1`),
3791	kind: ast::ErrorKind::GroupUnclosed,
3792	}
3793	);
3794	assert_eq!(
3795	parser("(()").parse().unwrap_err(),
3796	TestError {
3797	span: span(`0`..`1`),
3798	kind: ast::ErrorKind::GroupUnclosed,
3799	}
3800	);
3801	assert_eq!(
3802	parser(")").parse().unwrap_err(),
3803	TestError {
3804	span: span(`0`..`1`),
3805	kind: ast::ErrorKind::GroupUnopened,
3806	}
3807	);
3808	assert_eq!(
3809	parser("a)").parse().unwrap_err(),
3810	TestError {
3811	span: span(`1`..`2`),
3812	kind: ast::ErrorKind::GroupUnopened,
3813	}
3814	);
3815	}
3816
3817	#[test]
3818	fn parse_capture_name() {
3819	assert_eq!(
3820	parser("(?<a>z)").parse(),
3821	Ok(Ast::Group(ast::Group {
3822	span: span(`0`..`7`),
3823	kind: ast::GroupKind::CaptureName {
3824	starts_with_p: `false`,
3825	name: ast::CaptureName {
3826	span: span(`3`..`4`),
3827	name: s("a"),
3828	index: `1`,
3829	}
3830	},
3831	ast: Box::new(lit('z', `5`)),
3832	}))
3833	);
3834	assert_eq!(
3835	parser("(?P<a>z)").parse(),
3836	Ok(Ast::Group(ast::Group {
3837	span: span(`0`..`8`),
3838	kind: ast::GroupKind::CaptureName {
3839	starts_with_p: `true`,
3840	name: ast::CaptureName {
3841	span: span(`4`..`5`),
3842	name: s("a"),
3843	index: `1`,
3844	}
3845	},
3846	ast: Box::new(lit('z', `6`)),
3847	}))
3848	);
3849	assert_eq!(
3850	parser("(?P<abc>z)").parse(),
3851	Ok(Ast::Group(ast::Group {
3852	span: span(`0`..`10`),
3853	kind: ast::GroupKind::CaptureName {
3854	starts_with_p: `true`,
3855	name: ast::CaptureName {
3856	span: span(`4`..`7`),
3857	name: s("abc"),
3858	index: `1`,
3859	}
3860	},
3861	ast: Box::new(lit('z', `8`)),
3862	}))
3863	);
3864
3865	assert_eq!(
3866	parser("(?P<a_1>z)").parse(),
3867	Ok(Ast::Group(ast::Group {
3868	span: span(`0`..`10`),
3869	kind: ast::GroupKind::CaptureName {
3870	starts_with_p: `true`,
3871	name: ast::CaptureName {
3872	span: span(`4`..`7`),
3873	name: s("a_1"),
3874	index: `1`,
3875	}
3876	},
3877	ast: Box::new(lit('z', `8`)),
3878	}))
3879	);
3880
3881	assert_eq!(
3882	parser("(?P<a.1>z)").parse(),
3883	Ok(Ast::Group(ast::Group {
3884	span: span(`0`..`10`),
3885	kind: ast::GroupKind::CaptureName {
3886	starts_with_p: `true`,
3887	name: ast::CaptureName {
3888	span: span(`4`..`7`),
3889	name: s("a.1"),
3890	index: `1`,
3891	}
3892	},
3893	ast: Box::new(lit('z', `8`)),
3894	}))
3895	);
3896
3897	assert_eq!(
3898	parser("(?P<a[1]>z)").parse(),
3899	Ok(Ast::Group(ast::Group {
3900	span: span(`0`..`11`),
3901	kind: ast::GroupKind::CaptureName {
3902	starts_with_p: `true`,
3903	name: ast::CaptureName {
3904	span: span(`4`..`8`),
3905	name: s("a[1]"),
3906	index: `1`,
3907	}
3908	},
3909	ast: Box::new(lit('z', `9`)),
3910	}))
3911	);
3912
3913	assert_eq!(
3914	parser("(?P<a¾>)").parse(),
3915	Ok(Ast::Group(ast::Group {
3916	span: Span::new(
3917	Position::new(`0`, `1`, `1`),
3918	Position::new(`9`, `1`, `9`),
3919	),
3920	kind: ast::GroupKind::CaptureName {
3921	starts_with_p: `true`,
3922	name: ast::CaptureName {
3923	span: Span::new(
3924	Position::new(`4`, `1`, `5`),
3925	Position::new(`7`, `1`, `7`),
3926	),
3927	name: s("a¾"),
3928	index: `1`,
3929	}
3930	},
3931	ast: Box::new(Ast::Empty(Span::new(
3932	Position::new(`8`, `1`, `8`),
3933	Position::new(`8`, `1`, `8`),
3934	))),
3935	}))
3936	);
3937	assert_eq!(
3938	parser("(?P<名字>)").parse(),
3939	Ok(Ast::Group(ast::Group {
3940	span: Span::new(
3941	Position::new(`0`, `1`, `1`),
3942	Position::new(`12`, `1`, `9`),
3943	),
3944	kind: ast::GroupKind::CaptureName {
3945	starts_with_p: `true`,
3946	name: ast::CaptureName {
3947	span: Span::new(
3948	Position::new(`4`, `1`, `5`),
3949	Position::new(`10`, `1`, `7`),
3950	),
3951	name: s("名字"),
3952	index: `1`,
3953	}
3954	},
3955	ast: Box::new(Ast::Empty(Span::new(
3956	Position::new(`11`, `1`, `8`),
3957	Position::new(`11`, `1`, `8`),
3958	))),
3959	}))
3960	);
3961
3962	assert_eq!(
3963	parser("(?P<").parse().unwrap_err(),
3964	TestError {
3965	span: span(`4`..`4`),
3966	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3967	}
3968	);
3969	assert_eq!(
3970	parser("(?P<>z)").parse().unwrap_err(),
3971	TestError {
3972	span: span(`4`..`4`),
3973	kind: ast::ErrorKind::GroupNameEmpty,
3974	}
3975	);
3976	assert_eq!(
3977	parser("(?P<a").parse().unwrap_err(),
3978	TestError {
3979	span: span(`5`..`5`),
3980	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3981	}
3982	);
3983	assert_eq!(
3984	parser("(?P<ab").parse().unwrap_err(),
3985	TestError {
3986	span: span(`6`..`6`),
3987	kind: ast::ErrorKind::GroupNameUnexpectedEof,
3988	}
3989	);
3990	assert_eq!(
3991	parser("(?P<0a").parse().unwrap_err(),
3992	TestError {
3993	span: span(`4`..`5`),
3994	kind: ast::ErrorKind::GroupNameInvalid,
3995	}
3996	);
3997	assert_eq!(
3998	parser("(?P<~").parse().unwrap_err(),
3999	TestError {
4000	span: span(`4`..`5`),
4001	kind: ast::ErrorKind::GroupNameInvalid,
4002	}
4003	);
4004	assert_eq!(
4005	parser("(?P<abc~").parse().unwrap_err(),
4006	TestError {
4007	span: span(`7`..`8`),
4008	kind: ast::ErrorKind::GroupNameInvalid,
4009	}
4010	);
4011	assert_eq!(
4012	parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
4013	TestError {
4014	span: span(`12`..`13`),
4015	kind: ast::ErrorKind::GroupNameDuplicate {
4016	original: span(`4`..`5`),
4017	},
4018	}
4019	);
4020	assert_eq!(
4021	parser("(?P<5>)").parse().unwrap_err(),
4022	TestError {
4023	span: span(`4`..`5`),
4024	kind: ast::ErrorKind::GroupNameInvalid,
4025	}
4026	);
4027	assert_eq!(
4028	parser("(?P<5a>)").parse().unwrap_err(),
4029	TestError {
4030	span: span(`4`..`5`),
4031	kind: ast::ErrorKind::GroupNameInvalid,
4032	}
4033	);
4034	assert_eq!(
4035	parser("(?P<¾>)").parse().unwrap_err(),
4036	TestError {
4037	span: Span::new(
4038	Position::new(`4`, `1`, `5`),
4039	Position::new(`6`, `1`, `6`),
4040	),
4041	kind: ast::ErrorKind::GroupNameInvalid,
4042	}
4043	);
4044	assert_eq!(
4045	parser("(?P<¾a>)").parse().unwrap_err(),
4046	TestError {
4047	span: Span::new(
4048	Position::new(`4`, `1`, `5`),
4049	Position::new(`6`, `1`, `6`),
4050	),
4051	kind: ast::ErrorKind::GroupNameInvalid,
4052	}
4053	);
4054	assert_eq!(
4055	parser("(?P<☃>)").parse().unwrap_err(),
4056	TestError {
4057	span: Span::new(
4058	Position::new(`4`, `1`, `5`),
4059	Position::new(`7`, `1`, `6`),
4060	),
4061	kind: ast::ErrorKind::GroupNameInvalid,
4062	}
4063	);
4064	assert_eq!(
4065	parser("(?P<a☃>)").parse().unwrap_err(),
4066	TestError {
4067	span: Span::new(
4068	Position::new(`5`, `1`, `6`),
4069	Position::new(`8`, `1`, `7`),
4070	),
4071	kind: ast::ErrorKind::GroupNameInvalid,
4072	}
4073	);
4074	}
4075
4076	#[test]
4077	fn parse_flags() {
4078	assert_eq!(
4079	parser("i:").parse_flags(),
4080	Ok(ast::Flags {
4081	span: span(`0`..`1`),
4082	items: vec![ast::FlagsItem {
4083	span: span(`0`..`1`),
4084	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4085	}],
4086	})
4087	);
4088	assert_eq!(
4089	parser("i)").parse_flags(),
4090	Ok(ast::Flags {
4091	span: span(`0`..`1`),
4092	items: vec![ast::FlagsItem {
4093	span: span(`0`..`1`),
4094	kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
4095	}],
4096	})
4097	);
4098
4099	assert_eq!(
4100	parser("isU:").parse_flags(),
4101	Ok(ast::Flags {
4102	span: span(`0`..`3`),
4103	items: vec![
4104	ast::FlagsItem {
4105	span: span(`0`..`1`),
4106	kind: ast::FlagsItemKind::Flag(
4107	ast::Flag::CaseInsensitive
4108	),
4109	},
4110	ast::FlagsItem {
4111	span: span(`1`..`2`),
4112	kind: ast::FlagsItemKind::Flag(
4113	ast::Flag::DotMatchesNewLine
4114	),
4115	},
4116	ast::FlagsItem {
4117	span: span(`2`..`3`),
4118	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4119	},
4120	],
4121	})
4122	);
4123
4124	assert_eq!(
4125	parser("-isU:").parse_flags(),
4126	Ok(ast::Flags {
4127	span: span(`0`..`4`),
4128	items: vec![
4129	ast::FlagsItem {
4130	span: span(`0`..`1`),
4131	kind: ast::FlagsItemKind::Negation,
4132	},
4133	ast::FlagsItem {
4134	span: span(`1`..`2`),
4135	kind: ast::FlagsItemKind::Flag(
4136	ast::Flag::CaseInsensitive
4137	),
4138	},
4139	ast::FlagsItem {
4140	span: span(`2`..`3`),
4141	kind: ast::FlagsItemKind::Flag(
4142	ast::Flag::DotMatchesNewLine
4143	),
4144	},
4145	ast::FlagsItem {
4146	span: span(`3`..`4`),
4147	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4148	},
4149	],
4150	})
4151	);
4152	assert_eq!(
4153	parser("i-sU:").parse_flags(),
4154	Ok(ast::Flags {
4155	span: span(`0`..`4`),
4156	items: vec![
4157	ast::FlagsItem {
4158	span: span(`0`..`1`),
4159	kind: ast::FlagsItemKind::Flag(
4160	ast::Flag::CaseInsensitive
4161	),
4162	},
4163	ast::FlagsItem {
4164	span: span(`1`..`2`),
4165	kind: ast::FlagsItemKind::Negation,
4166	},
4167	ast::FlagsItem {
4168	span: span(`2`..`3`),
4169	kind: ast::FlagsItemKind::Flag(
4170	ast::Flag::DotMatchesNewLine
4171	),
4172	},
4173	ast::FlagsItem {
4174	span: span(`3`..`4`),
4175	kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4176	},
4177	],
4178	})
4179	);
4180	assert_eq!(
4181	parser("i-sR:").parse_flags(),
4182	Ok(ast::Flags {
4183	span: span(`0`..`4`),
4184	items: vec![
4185	ast::FlagsItem {
4186	span: span(`0`..`1`),
4187	kind: ast::FlagsItemKind::Flag(
4188	ast::Flag::CaseInsensitive
4189	),
4190	},
4191	ast::FlagsItem {
4192	span: span(`1`..`2`),
4193	kind: ast::FlagsItemKind::Negation,
4194	},
4195	ast::FlagsItem {
4196	span: span(`2`..`3`),
4197	kind: ast::FlagsItemKind::Flag(
4198	ast::Flag::DotMatchesNewLine
4199	),
4200	},
4201	ast::FlagsItem {
4202	span: span(`3`..`4`),
4203	kind: ast::FlagsItemKind::Flag(ast::Flag::CRLF),
4204	},
4205	],
4206	})
4207	);
4208
4209	assert_eq!(
4210	parser("isU").parse_flags().unwrap_err(),
4211	TestError {
4212	span: span(`3`..`3`),
4213	kind: ast::ErrorKind::FlagUnexpectedEof,
4214	}
4215	);
4216	assert_eq!(
4217	parser("isUa:").parse_flags().unwrap_err(),
4218	TestError {
4219	span: span(`3`..`4`),
4220	kind: ast::ErrorKind::FlagUnrecognized,
4221	}
4222	);
4223	assert_eq!(
4224	parser("isUi:").parse_flags().unwrap_err(),
4225	TestError {
4226	span: span(`3`..`4`),
4227	kind: ast::ErrorKind::FlagDuplicate { original: span(`0`..`1`) },
4228	}
4229	);
4230	assert_eq!(
4231	parser("i-sU-i:").parse_flags().unwrap_err(),
4232	TestError {
4233	span: span(`4`..`5`),
4234	kind: ast::ErrorKind::FlagRepeatedNegation {
4235	original: span(`1`..`2`),
4236	},
4237	}
4238	);
4239	assert_eq!(
4240	parser("-)").parse_flags().unwrap_err(),
4241	TestError {
4242	span: span(`0`..`1`),
4243	kind: ast::ErrorKind::FlagDanglingNegation,
4244	}
4245	);
4246	assert_eq!(
4247	parser("i-)").parse_flags().unwrap_err(),
4248	TestError {
4249	span: span(`1`..`2`),
4250	kind: ast::ErrorKind::FlagDanglingNegation,
4251	}
4252	);
4253	assert_eq!(
4254	parser("iU-)").parse_flags().unwrap_err(),
4255	TestError {
4256	span: span(`2`..`3`),
4257	kind: ast::ErrorKind::FlagDanglingNegation,
4258	}
4259	);
4260	}
4261
4262	#[test]
4263	fn parse_flag() {
4264	assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4265	assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4266	assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4267	assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4268	assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4269	assert_eq!(parser("R").parse_flag(), Ok(ast::Flag::CRLF));
4270	assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4271
4272	assert_eq!(
4273	parser("a").parse_flag().unwrap_err(),
4274	TestError {
4275	span: span(`0`..`1`),
4276	kind: ast::ErrorKind::FlagUnrecognized,
4277	}
4278	);
4279	assert_eq!(
4280	parser("☃").parse_flag().unwrap_err(),
4281	TestError {
4282	span: span_range("☃", `0`..`3`),
4283	kind: ast::ErrorKind::FlagUnrecognized,
4284	}
4285	);
4286	}
4287
4288	#[test]
4289	fn parse_primitive_non_escape() {
4290	assert_eq!(
4291	parser(r".").parse_primitive(),
4292	Ok(Primitive::Dot(span(`0`..`1`)))
4293	);
4294	assert_eq!(
4295	parser(r"^").parse_primitive(),
4296	Ok(Primitive::Assertion(ast::Assertion {
4297	span: span(`0`..`1`),
4298	kind: ast::AssertionKind::StartLine,
4299	}))
4300	);
4301	assert_eq!(
4302	parser(r"$").parse_primitive(),
4303	Ok(Primitive::Assertion(ast::Assertion {
4304	span: span(`0`..`1`),
4305	kind: ast::AssertionKind::EndLine,
4306	}))
4307	);
4308
4309	assert_eq!(
4310	parser(r"a").parse_primitive(),
4311	Ok(Primitive::Literal(ast::Literal {
4312	span: span(`0`..`1`),
4313	kind: ast::LiteralKind::Verbatim,
4314	c: 'a',
4315	}))
4316	);
4317	assert_eq!(
4318	parser(r"\|").parse_primitive(),
4319	Ok(Primitive::Literal(ast::Literal {
4320	span: span(`0`..`1`),
4321	kind: ast::LiteralKind::Verbatim,
4322	c: '\|',
4323	}))
4324	);
4325	assert_eq!(
4326	parser(r"☃").parse_primitive(),
4327	Ok(Primitive::Literal(ast::Literal {
4328	span: span_range("☃", `0`..`3`),
4329	kind: ast::LiteralKind::Verbatim,
4330	c: '☃',
4331	}))
4332	);
4333	}
4334
4335	#[test]
4336	fn parse_escape() {
4337	assert_eq!(
4338	parser(r"\\|").parse_primitive(),
4339	Ok(Primitive::Literal(ast::Literal {
4340	span: span(`0`..`2`),
4341	kind: ast::LiteralKind::Meta,
4342	c: '\|',
4343	}))
4344	);
4345	let specials = &[
4346	(r"\a", '`\x07`', ast::SpecialLiteralKind::Bell),
4347	(r"\f", '`\x0C`', ast::SpecialLiteralKind::FormFeed),
4348	(r"\t", '`\t`', ast::SpecialLiteralKind::Tab),
4349	(r"\n", '`\n`', ast::SpecialLiteralKind::LineFeed),
4350	(r"\r", '`\r`', ast::SpecialLiteralKind::CarriageReturn),
4351	(r"\v", '`\x0B`', ast::SpecialLiteralKind::VerticalTab),
4352	];
4353	for &(pat, c, ref kind) in specials {
4354	assert_eq!(
4355	parser(pat).parse_primitive(),
4356	Ok(Primitive::Literal(ast::Literal {
4357	span: span(`0`..`2`),
4358	kind: ast::LiteralKind::Special(kind.clone()),
4359	c,
4360	}))
4361	);
4362	}
4363	assert_eq!(
4364	parser(r"\A").parse_primitive(),
4365	Ok(Primitive::Assertion(ast::Assertion {
4366	span: span(`0`..`2`),
4367	kind: ast::AssertionKind::StartText,
4368	}))
4369	);
4370	assert_eq!(
4371	parser(r"\z").parse_primitive(),
4372	Ok(Primitive::Assertion(ast::Assertion {
4373	span: span(`0`..`2`),
4374	kind: ast::AssertionKind::EndText,
4375	}))
4376	);
4377	assert_eq!(
4378	parser(r"\b").parse_primitive(),
4379	Ok(Primitive::Assertion(ast::Assertion {
4380	span: span(`0`..`2`),
4381	kind: ast::AssertionKind::WordBoundary,
4382	}))
4383	);
4384	assert_eq!(
4385	parser(r"\B").parse_primitive(),
4386	Ok(Primitive::Assertion(ast::Assertion {
4387	span: span(`0`..`2`),
4388	kind: ast::AssertionKind::NotWordBoundary,
4389	}))
4390	);
4391
4392	// We also support superfluous escapes in most cases now too.
4393	for c in ['!', '@', '%', '"', '`\'`', '/', ' '] {
4394	let pat = format!(r"\{}", c);
4395	assert_eq!(
4396	parser(&pat).parse_primitive(),
4397	Ok(Primitive::Literal(ast::Literal {
4398	span: span(`0`..`2`),
4399	kind: ast::LiteralKind::Superfluous,
4400	c,
4401	}))
4402	);
4403	}
4404
4405	// Some superfluous escapes, namely [0-9A-Za-z], are still banned. This
4406	// gives flexibility for future evolution.
4407	assert_eq!(
4408	parser(r"\e").parse_escape().unwrap_err(),
4409	TestError {
4410	span: span(`0`..`2`),
4411	kind: ast::ErrorKind::EscapeUnrecognized,
4412	}
4413	);
4414	assert_eq!(
4415	parser(r"\y").parse_escape().unwrap_err(),
4416	TestError {
4417	span: span(`0`..`2`),
4418	kind: ast::ErrorKind::EscapeUnrecognized,
4419	}
4420	);
4421	// But also, < and > are banned, so that we may evolve them into
4422	// start/end word boundary assertions. (Not sure if we will...)
4423	assert_eq!(
4424	parser(r"\<").parse_escape().unwrap_err(),
4425	TestError {
4426	span: span(`0`..`2`),
4427	kind: ast::ErrorKind::EscapeUnrecognized,
4428	}
4429	);
4430	assert_eq!(
4431	parser(r"\>").parse_escape().unwrap_err(),
4432	TestError {
4433	span: span(`0`..`2`),
4434	kind: ast::ErrorKind::EscapeUnrecognized,
4435	}
4436	);
4437
4438	// An unfinished escape is illegal.
4439	assert_eq!(
4440	parser(r"\").parse_escape().unwrap_err(),
4441	TestError {
4442	span: span(`0`..`1`),
4443	kind: ast::ErrorKind::EscapeUnexpectedEof,
4444	}
4445	);
4446	}
4447
4448	#[test]
4449	fn parse_unsupported_backreference() {
4450	assert_eq!(
4451	parser(r"\0").parse_escape().unwrap_err(),
4452	TestError {
4453	span: span(`0`..`2`),
4454	kind: ast::ErrorKind::UnsupportedBackreference,
4455	}
4456	);
4457	assert_eq!(
4458	parser(r"\9").parse_escape().unwrap_err(),
4459	TestError {
4460	span: span(`0`..`2`),
4461	kind: ast::ErrorKind::UnsupportedBackreference,
4462	}
4463	);
4464	}
4465
4466	#[test]
4467	fn parse_octal() {
4468	for i in `0`..`511` {
4469	let pat = format!(r"\{:o}", i);
4470	assert_eq!(
4471	parser_octal(&pat).parse_escape(),
4472	Ok(Primitive::Literal(ast::Literal {
4473	span: span(`0`..pat.len()),
4474	kind: ast::LiteralKind::Octal,
4475	c: char::from_u32(i).unwrap(),
4476	}))
4477	);
4478	}
4479	assert_eq!(
4480	parser_octal(r"\778").parse_escape(),
4481	Ok(Primitive::Literal(ast::Literal {
4482	span: span(`0`..`3`),
4483	kind: ast::LiteralKind::Octal,
4484	c: '?',
4485	}))
4486	);
4487	assert_eq!(
4488	parser_octal(r"\7777").parse_escape(),
4489	Ok(Primitive::Literal(ast::Literal {
4490	span: span(`0`..`4`),
4491	kind: ast::LiteralKind::Octal,
4492	c: '`\u{01FF}`',
4493	}))
4494	);
4495	assert_eq!(
4496	parser_octal(r"\778").parse(),
4497	Ok(Ast::Concat(ast::Concat {
4498	span: span(`0`..`4`),
4499	asts: vec![
4500	Ast::Literal(ast::Literal {
4501	span: span(`0`..`3`),
4502	kind: ast::LiteralKind::Octal,
4503	c: '?',
4504	}),
4505	Ast::Literal(ast::Literal {
4506	span: span(`3`..`4`),
4507	kind: ast::LiteralKind::Verbatim,
4508	c: '8',
4509	}),
4510	],
4511	}))
4512	);
4513	assert_eq!(
4514	parser_octal(r"\7777").parse(),
4515	Ok(Ast::Concat(ast::Concat {
4516	span: span(`0`..`5`),
4517	asts: vec![
4518	Ast::Literal(ast::Literal {
4519	span: span(`0`..`4`),
4520	kind: ast::LiteralKind::Octal,
4521	c: '`\u{01FF}`',
4522	}),
4523	Ast::Literal(ast::Literal {
4524	span: span(`4`..`5`),
4525	kind: ast::LiteralKind::Verbatim,
4526	c: '7',
4527	}),
4528	],
4529	}))
4530	);
4531
4532	assert_eq!(
4533	parser_octal(r"\8").parse_escape().unwrap_err(),
4534	TestError {
4535	span: span(`0`..`2`),
4536	kind: ast::ErrorKind::EscapeUnrecognized,
4537	}
4538	);
4539	}
4540
4541	#[test]
4542	fn parse_hex_two() {
4543	for i in `0`..`256` {
4544	let pat = format!(r"\x{:02x}", i);
4545	assert_eq!(
4546	parser(&pat).parse_escape(),
4547	Ok(Primitive::Literal(ast::Literal {
4548	span: span(`0`..pat.len()),
4549	kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4550	c: char::from_u32(i).unwrap(),
4551	}))
4552	);
4553	}
4554
4555	assert_eq!(
4556	parser(r"\xF").parse_escape().unwrap_err(),
4557	TestError {
4558	span: span(`3`..`3`),
4559	kind: ast::ErrorKind::EscapeUnexpectedEof,
4560	}
4561	);
4562	assert_eq!(
4563	parser(r"\xG").parse_escape().unwrap_err(),
4564	TestError {
4565	span: span(`2`..`3`),
4566	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4567	}
4568	);
4569	assert_eq!(
4570	parser(r"\xFG").parse_escape().unwrap_err(),
4571	TestError {
4572	span: span(`3`..`4`),
4573	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4574	}
4575	);
4576	}
4577
4578	#[test]
4579	fn parse_hex_four() {
4580	for i in `0`..`65536` {
4581	let c = match char::from_u32(i) {
4582	None => continue,
4583	Some(c) => c,
4584	};
4585	let pat = format!(r"\u{:04x}", i);
4586	assert_eq!(
4587	parser(&pat).parse_escape(),
4588	Ok(Primitive::Literal(ast::Literal {
4589	span: span(`0`..pat.len()),
4590	kind: ast::LiteralKind::HexFixed(
4591	ast::HexLiteralKind::UnicodeShort
4592	),
4593	c,
4594	}))
4595	);
4596	}
4597
4598	assert_eq!(
4599	parser(r"\uF").parse_escape().unwrap_err(),
4600	TestError {
4601	span: span(`3`..`3`),
4602	kind: ast::ErrorKind::EscapeUnexpectedEof,
4603	}
4604	);
4605	assert_eq!(
4606	parser(r"\uG").parse_escape().unwrap_err(),
4607	TestError {
4608	span: span(`2`..`3`),
4609	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4610	}
4611	);
4612	assert_eq!(
4613	parser(r"\uFG").parse_escape().unwrap_err(),
4614	TestError {
4615	span: span(`3`..`4`),
4616	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4617	}
4618	);
4619	assert_eq!(
4620	parser(r"\uFFG").parse_escape().unwrap_err(),
4621	TestError {
4622	span: span(`4`..`5`),
4623	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4624	}
4625	);
4626	assert_eq!(
4627	parser(r"\uFFFG").parse_escape().unwrap_err(),
4628	TestError {
4629	span: span(`5`..`6`),
4630	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4631	}
4632	);
4633	assert_eq!(
4634	parser(r"\uD800").parse_escape().unwrap_err(),
4635	TestError {
4636	span: span(`2`..`6`),
4637	kind: ast::ErrorKind::EscapeHexInvalid,
4638	}
4639	);
4640	}
4641
4642	#[test]
4643	fn parse_hex_eight() {
4644	for i in `0`..`65536` {
4645	let c = match char::from_u32(i) {
4646	None => continue,
4647	Some(c) => c,
4648	};
4649	let pat = format!(r"\U{:08x}", i);
4650	assert_eq!(
4651	parser(&pat).parse_escape(),
4652	Ok(Primitive::Literal(ast::Literal {
4653	span: span(`0`..pat.len()),
4654	kind: ast::LiteralKind::HexFixed(
4655	ast::HexLiteralKind::UnicodeLong
4656	),
4657	c,
4658	}))
4659	);
4660	}
4661
4662	assert_eq!(
4663	parser(r"\UF").parse_escape().unwrap_err(),
4664	TestError {
4665	span: span(`3`..`3`),
4666	kind: ast::ErrorKind::EscapeUnexpectedEof,
4667	}
4668	);
4669	assert_eq!(
4670	parser(r"\UG").parse_escape().unwrap_err(),
4671	TestError {
4672	span: span(`2`..`3`),
4673	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4674	}
4675	);
4676	assert_eq!(
4677	parser(r"\UFG").parse_escape().unwrap_err(),
4678	TestError {
4679	span: span(`3`..`4`),
4680	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4681	}
4682	);
4683	assert_eq!(
4684	parser(r"\UFFG").parse_escape().unwrap_err(),
4685	TestError {
4686	span: span(`4`..`5`),
4687	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4688	}
4689	);
4690	assert_eq!(
4691	parser(r"\UFFFG").parse_escape().unwrap_err(),
4692	TestError {
4693	span: span(`5`..`6`),
4694	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4695	}
4696	);
4697	assert_eq!(
4698	parser(r"\UFFFFG").parse_escape().unwrap_err(),
4699	TestError {
4700	span: span(`6`..`7`),
4701	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4702	}
4703	);
4704	assert_eq!(
4705	parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4706	TestError {
4707	span: span(`7`..`8`),
4708	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4709	}
4710	);
4711	assert_eq!(
4712	parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4713	TestError {
4714	span: span(`8`..`9`),
4715	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4716	}
4717	);
4718	assert_eq!(
4719	parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4720	TestError {
4721	span: span(`9`..`10`),
4722	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4723	}
4724	);
4725	}
4726
4727	#[test]
4728	fn parse_hex_brace() {
4729	assert_eq!(
4730	parser(r"\u{26c4}").parse_escape(),
4731	Ok(Primitive::Literal(ast::Literal {
4732	span: span(`0`..`8`),
4733	kind: ast::LiteralKind::HexBrace(
4734	ast::HexLiteralKind::UnicodeShort
4735	),
4736	c: '⛄',
4737	}))
4738	);
4739	assert_eq!(
4740	parser(r"\U{26c4}").parse_escape(),
4741	Ok(Primitive::Literal(ast::Literal {
4742	span: span(`0`..`8`),
4743	kind: ast::LiteralKind::HexBrace(
4744	ast::HexLiteralKind::UnicodeLong
4745	),
4746	c: '⛄',
4747	}))
4748	);
4749	assert_eq!(
4750	parser(r"\x{26c4}").parse_escape(),
4751	Ok(Primitive::Literal(ast::Literal {
4752	span: span(`0`..`8`),
4753	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4754	c: '⛄',
4755	}))
4756	);
4757	assert_eq!(
4758	parser(r"\x{26C4}").parse_escape(),
4759	Ok(Primitive::Literal(ast::Literal {
4760	span: span(`0`..`8`),
4761	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4762	c: '⛄',
4763	}))
4764	);
4765	assert_eq!(
4766	parser(r"\x{10fFfF}").parse_escape(),
4767	Ok(Primitive::Literal(ast::Literal {
4768	span: span(`0`..`10`),
4769	kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4770	c: '`\u{10FFFF}`',
4771	}))
4772	);
4773
4774	assert_eq!(
4775	parser(r"\x").parse_escape().unwrap_err(),
4776	TestError {
4777	span: span(`2`..`2`),
4778	kind: ast::ErrorKind::EscapeUnexpectedEof,
4779	}
4780	);
4781	assert_eq!(
4782	parser(r"\x{").parse_escape().unwrap_err(),
4783	TestError {
4784	span: span(`2`..`3`),
4785	kind: ast::ErrorKind::EscapeUnexpectedEof,
4786	}
4787	);
4788	assert_eq!(
4789	parser(r"\x{FF").parse_escape().unwrap_err(),
4790	TestError {
4791	span: span(`2`..`5`),
4792	kind: ast::ErrorKind::EscapeUnexpectedEof,
4793	}
4794	);
4795	assert_eq!(
4796	parser(r"\x{}").parse_escape().unwrap_err(),
4797	TestError {
4798	span: span(`2`..`4`),
4799	kind: ast::ErrorKind::EscapeHexEmpty,
4800	}
4801	);
4802	assert_eq!(
4803	parser(r"\x{FGF}").parse_escape().unwrap_err(),
4804	TestError {
4805	span: span(`4`..`5`),
4806	kind: ast::ErrorKind::EscapeHexInvalidDigit,
4807	}
4808	);
4809	assert_eq!(
4810	parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4811	TestError {
4812	span: span(`3`..`9`),
4813	kind: ast::ErrorKind::EscapeHexInvalid,
4814	}
4815	);
4816	assert_eq!(
4817	parser(r"\x{D800}").parse_escape().unwrap_err(),
4818	TestError {
4819	span: span(`3`..`7`),
4820	kind: ast::ErrorKind::EscapeHexInvalid,
4821	}
4822	);
4823	assert_eq!(
4824	parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4825	TestError {
4826	span: span(`3`..`12`),
4827	kind: ast::ErrorKind::EscapeHexInvalid,
4828	}
4829	);
4830	}
4831
4832	#[test]
4833	fn parse_decimal() {
4834	assert_eq!(parser("123").parse_decimal(), Ok(`123`));
4835	assert_eq!(parser("0").parse_decimal(), Ok(`0`));
4836	assert_eq!(parser("01").parse_decimal(), Ok(`1`));
4837
4838	assert_eq!(
4839	parser("-1").parse_decimal().unwrap_err(),
4840	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
4841	);
4842	assert_eq!(
4843	parser("").parse_decimal().unwrap_err(),
4844	TestError { span: span(`0`..`0`), kind: ast::ErrorKind::DecimalEmpty }
4845	);
4846	assert_eq!(
4847	parser("9999999999").parse_decimal().unwrap_err(),
4848	TestError {
4849	span: span(`0`..`10`),
4850	kind: ast::ErrorKind::DecimalInvalid,
4851	}
4852	);
4853	}
4854
4855	#[test]
4856	fn parse_set_class() {
4857	fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4858	ast::ClassSet::union(ast::ClassSetUnion { span, items })
4859	}
4860
4861	fn intersection(
4862	span: Span,
4863	lhs: ast::ClassSet,
4864	rhs: ast::ClassSet,
4865	) -> ast::ClassSet {
4866	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4867	span,
4868	kind: ast::ClassSetBinaryOpKind::Intersection,
4869	lhs: Box::new(lhs),
4870	rhs: Box::new(rhs),
4871	})
4872	}
4873
4874	fn difference(
4875	span: Span,
4876	lhs: ast::ClassSet,
4877	rhs: ast::ClassSet,
4878	) -> ast::ClassSet {
4879	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4880	span,
4881	kind: ast::ClassSetBinaryOpKind::Difference,
4882	lhs: Box::new(lhs),
4883	rhs: Box::new(rhs),
4884	})
4885	}
4886
4887	fn symdifference(
4888	span: Span,
4889	lhs: ast::ClassSet,
4890	rhs: ast::ClassSet,
4891	) -> ast::ClassSet {
4892	ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4893	span,
4894	kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4895	lhs: Box::new(lhs),
4896	rhs: Box::new(rhs),
4897	})
4898	}
4899
4900	fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4901	ast::ClassSet::Item(item)
4902	}
4903
4904	fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4905	ast::ClassSetItem::Ascii(cls)
4906	}
4907
4908	fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4909	ast::ClassSetItem::Unicode(cls)
4910	}
4911
4912	fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4913	ast::ClassSetItem::Perl(cls)
4914	}
4915
4916	fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4917	ast::ClassSetItem::Bracketed(Box::new(cls))
4918	}
4919
4920	fn lit(span: Span, c: char) -> ast::ClassSetItem {
4921	ast::ClassSetItem::Literal(ast::Literal {
4922	span,
4923	kind: ast::LiteralKind::Verbatim,
4924	c,
4925	})
4926	}
4927
4928	fn empty(span: Span) -> ast::ClassSetItem {
4929	ast::ClassSetItem::Empty(span)
4930	}
4931
4932	fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4933	let pos1 = Position {
4934	offset: span.start.offset + start.len_utf8(),
4935	column: span.start.column + `1`,
4936	..span.start
4937	};
4938	let pos2 = Position {
4939	offset: span.end.offset - end.len_utf8(),
4940	column: span.end.column - `1`,
4941	..span.end
4942	};
4943	ast::ClassSetItem::Range(ast::ClassSetRange {
4944	span,
4945	start: ast::Literal {
4946	span: Span { end: pos1, ..span },
4947	kind: ast::LiteralKind::Verbatim,
4948	c: start,
4949	},
4950	end: ast::Literal {
4951	span: Span { start: pos2, ..span },
4952	kind: ast::LiteralKind::Verbatim,
4953	c: end,
4954	},
4955	})
4956	}
4957
4958	fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4959	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
4960	}
4961
4962	fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4963	ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
4964	}
4965
4966	assert_eq!(
4967	parser("[[:alnum:]]").parse(),
4968	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4969	span: span(`0`..`11`),
4970	negated: `false`,
4971	kind: itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
4972	})))
4973	);
4974	assert_eq!(
4975	parser("[[[:alnum:]]]").parse(),
4976	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4977	span: span(`0`..`13`),
4978	negated: `false`,
4979	kind: itemset(item_bracket(ast::ClassBracketed {
4980	span: span(`1`..`12`),
4981	negated: `false`,
4982	kind: itemset(item_ascii(alnum(span(`2`..`11`), `false`))),
4983	})),
4984	})))
4985	);
4986	assert_eq!(
4987	parser("[[:alnum:]&&[:lower:]]").parse(),
4988	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4989	span: span(`0`..`22`),
4990	negated: `false`,
4991	kind: intersection(
4992	span(`1`..`21`),
4993	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
4994	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
4995	),
4996	})))
4997	);
4998	assert_eq!(
4999	parser("[[:alnum:]--[:lower:]]").parse(),
5000	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5001	span: span(`0`..`22`),
5002	negated: `false`,
5003	kind: difference(
5004	span(`1`..`21`),
5005	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5006	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5007	),
5008	})))
5009	);
5010	assert_eq!(
5011	parser("[[:alnum:]~~[:lower:]]").parse(),
5012	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5013	span: span(`0`..`22`),
5014	negated: `false`,
5015	kind: symdifference(
5016	span(`1`..`21`),
5017	itemset(item_ascii(alnum(span(`1`..`10`), `false`))),
5018	itemset(item_ascii(lower(span(`12`..`21`), `false`))),
5019	),
5020	})))
5021	);
5022
5023	assert_eq!(
5024	parser("[a]").parse(),
5025	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5026	span: span(`0`..`3`),
5027	negated: `false`,
5028	kind: itemset(lit(span(`1`..`2`), 'a')),
5029	})))
5030	);
5031	assert_eq!(
5032	parser(r"[a\]]").parse(),
5033	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5034	span: span(`0`..`5`),
5035	negated: `false`,
5036	kind: union(
5037	span(`1`..`4`),
5038	vec![
5039	lit(span(`1`..`2`), 'a'),
5040	ast::ClassSetItem::Literal(ast::Literal {
5041	span: span(`2`..`4`),
5042	kind: ast::LiteralKind::Meta,
5043	c: ']',
5044	}),
5045	]
5046	),
5047	})))
5048	);
5049	assert_eq!(
5050	parser(r"[a\-z]").parse(),
5051	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5052	span: span(`0`..`6`),
5053	negated: `false`,
5054	kind: union(
5055	span(`1`..`5`),
5056	vec![
5057	lit(span(`1`..`2`), 'a'),
5058	ast::ClassSetItem::Literal(ast::Literal {
5059	span: span(`2`..`4`),
5060	kind: ast::LiteralKind::Meta,
5061	c: '-',
5062	}),
5063	lit(span(`4`..`5`), 'z'),
5064	]
5065	),
5066	})))
5067	);
5068	assert_eq!(
5069	parser("[ab]").parse(),
5070	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5071	span: span(`0`..`4`),
5072	negated: `false`,
5073	kind: union(
5074	span(`1`..`3`),
5075	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), 'b'),]
5076	),
5077	})))
5078	);
5079	assert_eq!(
5080	parser("[a-]").parse(),
5081	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5082	span: span(`0`..`4`),
5083	negated: `false`,
5084	kind: union(
5085	span(`1`..`3`),
5086	vec![lit(span(`1`..`2`), 'a'), lit(span(`2`..`3`), '-'),]
5087	),
5088	})))
5089	);
5090	assert_eq!(
5091	parser("[-a]").parse(),
5092	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5093	span: span(`0`..`4`),
5094	negated: `false`,
5095	kind: union(
5096	span(`1`..`3`),
5097	vec![lit(span(`1`..`2`), '-'), lit(span(`2`..`3`), 'a'),]
5098	),
5099	})))
5100	);
5101	assert_eq!(
5102	parser(r"[\pL]").parse(),
5103	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5104	span: span(`0`..`5`),
5105	negated: `false`,
5106	kind: itemset(item_unicode(ast::ClassUnicode {
5107	span: span(`1`..`4`),
5108	negated: `false`,
5109	kind: ast::ClassUnicodeKind::OneLetter('L'),
5110	})),
5111	})))
5112	);
5113	assert_eq!(
5114	parser(r"[\w]").parse(),
5115	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5116	span: span(`0`..`4`),
5117	negated: `false`,
5118	kind: itemset(item_perl(ast::ClassPerl {
5119	span: span(`1`..`3`),
5120	kind: ast::ClassPerlKind::Word,
5121	negated: `false`,
5122	})),
5123	})))
5124	);
5125	assert_eq!(
5126	parser(r"[a\wz]").parse(),
5127	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5128	span: span(`0`..`6`),
5129	negated: `false`,
5130	kind: union(
5131	span(`1`..`5`),
5132	vec![
5133	lit(span(`1`..`2`), 'a'),
5134	item_perl(ast::ClassPerl {
5135	span: span(`2`..`4`),
5136	kind: ast::ClassPerlKind::Word,
5137	negated: `false`,
5138	}),
5139	lit(span(`4`..`5`), 'z'),
5140	]
5141	),
5142	})))
5143	);
5144
5145	assert_eq!(
5146	parser("[a-z]").parse(),
5147	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5148	span: span(`0`..`5`),
5149	negated: `false`,
5150	kind: itemset(range(span(`1`..`4`), 'a', 'z')),
5151	})))
5152	);
5153	assert_eq!(
5154	parser("[a-cx-z]").parse(),
5155	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5156	span: span(`0`..`8`),
5157	negated: `false`,
5158	kind: union(
5159	span(`1`..`7`),
5160	vec![
5161	range(span(`1`..`4`), 'a', 'c'),
5162	range(span(`4`..`7`), 'x', 'z'),
5163	]
5164	),
5165	})))
5166	);
5167	assert_eq!(
5168	parser(r"[\w&&a-cx-z]").parse(),
5169	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5170	span: span(`0`..`12`),
5171	negated: `false`,
5172	kind: intersection(
5173	span(`1`..`11`),
5174	itemset(item_perl(ast::ClassPerl {
5175	span: span(`1`..`3`),
5176	kind: ast::ClassPerlKind::Word,
5177	negated: `false`,
5178	})),
5179	union(
5180	span(`5`..`11`),
5181	vec![
5182	range(span(`5`..`8`), 'a', 'c'),
5183	range(span(`8`..`11`), 'x', 'z'),
5184	]
5185	),
5186	),
5187	})))
5188	);
5189	assert_eq!(
5190	parser(r"[a-cx-z&&\w]").parse(),
5191	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5192	span: span(`0`..`12`),
5193	negated: `false`,
5194	kind: intersection(
5195	span(`1`..`11`),
5196	union(
5197	span(`1`..`7`),
5198	vec![
5199	range(span(`1`..`4`), 'a', 'c'),
5200	range(span(`4`..`7`), 'x', 'z'),
5201	]
5202	),
5203	itemset(item_perl(ast::ClassPerl {
5204	span: span(`9`..`11`),
5205	kind: ast::ClassPerlKind::Word,
5206	negated: `false`,
5207	})),
5208	),
5209	})))
5210	);
5211	assert_eq!(
5212	parser(r"[a--b--c]").parse(),
5213	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5214	span: span(`0`..`9`),
5215	negated: `false`,
5216	kind: difference(
5217	span(`1`..`8`),
5218	difference(
5219	span(`1`..`5`),
5220	itemset(lit(span(`1`..`2`), 'a')),
5221	itemset(lit(span(`4`..`5`), 'b')),
5222	),
5223	itemset(lit(span(`7`..`8`), 'c')),
5224	),
5225	})))
5226	);
5227	assert_eq!(
5228	parser(r"[a~~b~~c]").parse(),
5229	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5230	span: span(`0`..`9`),
5231	negated: `false`,
5232	kind: symdifference(
5233	span(`1`..`8`),
5234	symdifference(
5235	span(`1`..`5`),
5236	itemset(lit(span(`1`..`2`), 'a')),
5237	itemset(lit(span(`4`..`5`), 'b')),
5238	),
5239	itemset(lit(span(`7`..`8`), 'c')),
5240	),
5241	})))
5242	);
5243	assert_eq!(
5244	parser(r"[\^&&^]").parse(),
5245	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5246	span: span(`0`..`7`),
5247	negated: `false`,
5248	kind: intersection(
5249	span(`1`..`6`),
5250	itemset(ast::ClassSetItem::Literal(ast::Literal {
5251	span: span(`1`..`3`),
5252	kind: ast::LiteralKind::Meta,
5253	c: '^',
5254	})),
5255	itemset(lit(span(`5`..`6`), '^')),
5256	),
5257	})))
5258	);
5259	assert_eq!(
5260	parser(r"[\&&&&]").parse(),
5261	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5262	span: span(`0`..`7`),
5263	negated: `false`,
5264	kind: intersection(
5265	span(`1`..`6`),
5266	itemset(ast::ClassSetItem::Literal(ast::Literal {
5267	span: span(`1`..`3`),
5268	kind: ast::LiteralKind::Meta,
5269	c: '&',
5270	})),
5271	itemset(lit(span(`5`..`6`), '&')),
5272	),
5273	})))
5274	);
5275	assert_eq!(
5276	parser(r"[&&&&]").parse(),
5277	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5278	span: span(`0`..`6`),
5279	negated: `false`,
5280	kind: intersection(
5281	span(`1`..`5`),
5282	intersection(
5283	span(`1`..`3`),
5284	itemset(empty(span(`1`..`1`))),
5285	itemset(empty(span(`3`..`3`))),
5286	),
5287	itemset(empty(span(`5`..`5`))),
5288	),
5289	})))
5290	);
5291
5292	let pat = "[☃-⛄]";
5293	assert_eq!(
5294	parser(pat).parse(),
5295	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5296	span: span_range(pat, `0`..`9`),
5297	negated: `false`,
5298	kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5299	span: span_range(pat, `1`..`8`),
5300	start: ast::Literal {
5301	span: span_range(pat, `1`..`4`),
5302	kind: ast::LiteralKind::Verbatim,
5303	c: '☃',
5304	},
5305	end: ast::Literal {
5306	span: span_range(pat, `5`..`8`),
5307	kind: ast::LiteralKind::Verbatim,
5308	c: '⛄',
5309	},
5310	})),
5311	})))
5312	);
5313
5314	assert_eq!(
5315	parser(r"[]]").parse(),
5316	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5317	span: span(`0`..`3`),
5318	negated: `false`,
5319	kind: itemset(lit(span(`1`..`2`), ']')),
5320	})))
5321	);
5322	assert_eq!(
5323	parser(r"[]\[]").parse(),
5324	Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5325	span: span(`0`..`5`),
5326	negated: `false`,
5327	kind: union(
5328	span(`1`..`4`),
5329	vec![
5330	lit(span(`1`..`2`), ']'),
5331	ast::ClassSetItem::Literal(ast::Literal {
5332	span: span(`2`..`4`),
5333	kind: ast::LiteralKind::Meta,
5334	c: '[',
5335	}),
5336	]
5337	),
5338	})))
5339	);
5340	assert_eq!(
5341	parser(r"[\[]]").parse(),
5342	Ok(concat(
5343	`0`..`5`,
5344	vec![
5345	Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5346	span: span(`0`..`4`),
5347	negated: `false`,
5348	kind: itemset(ast::ClassSetItem::Literal(
5349	ast::Literal {
5350	span: span(`1`..`3`),
5351	kind: ast::LiteralKind::Meta,
5352	c: '[',
5353	}
5354	)),
5355	})),
5356	Ast::Literal(ast::Literal {
5357	span: span(`4`..`5`),
5358	kind: ast::LiteralKind::Verbatim,
5359	c: ']',
5360	}),
5361	]
5362	))
5363	);
5364
5365	assert_eq!(
5366	parser("[").parse().unwrap_err(),
5367	TestError {
5368	span: span(`0`..`1`),
5369	kind: ast::ErrorKind::ClassUnclosed,
5370	}
5371	);
5372	assert_eq!(
5373	parser("[[").parse().unwrap_err(),
5374	TestError {
5375	span: span(`1`..`2`),
5376	kind: ast::ErrorKind::ClassUnclosed,
5377	}
5378	);
5379	assert_eq!(
5380	parser("[[-]").parse().unwrap_err(),
5381	TestError {
5382	span: span(`0`..`1`),
5383	kind: ast::ErrorKind::ClassUnclosed,
5384	}
5385	);
5386	assert_eq!(
5387	parser("[[[:alnum:]").parse().unwrap_err(),
5388	TestError {
5389	span: span(`1`..`2`),
5390	kind: ast::ErrorKind::ClassUnclosed,
5391	}
5392	);
5393	assert_eq!(
5394	parser(r"[\b]").parse().unwrap_err(),
5395	TestError {
5396	span: span(`1`..`3`),
5397	kind: ast::ErrorKind::ClassEscapeInvalid,
5398	}
5399	);
5400	assert_eq!(
5401	parser(r"[\w-a]").parse().unwrap_err(),
5402	TestError {
5403	span: span(`1`..`3`),
5404	kind: ast::ErrorKind::ClassRangeLiteral,
5405	}
5406	);
5407	assert_eq!(
5408	parser(r"[a-\w]").parse().unwrap_err(),
5409	TestError {
5410	span: span(`3`..`5`),
5411	kind: ast::ErrorKind::ClassRangeLiteral,
5412	}
5413	);
5414	assert_eq!(
5415	parser(r"[z-a]").parse().unwrap_err(),
5416	TestError {
5417	span: span(`1`..`4`),
5418	kind: ast::ErrorKind::ClassRangeInvalid,
5419	}
5420	);
5421
5422	assert_eq!(
5423	parser_ignore_whitespace("[a ").parse().unwrap_err(),
5424	TestError {
5425	span: span(`0`..`1`),
5426	kind: ast::ErrorKind::ClassUnclosed,
5427	}
5428	);
5429	assert_eq!(
5430	parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5431	TestError {
5432	span: span(`0`..`1`),
5433	kind: ast::ErrorKind::ClassUnclosed,
5434	}
5435	);
5436	}
5437
5438	#[test]
5439	fn parse_set_class_open() {
5440	assert_eq!(parser("[a]").parse_set_class_open(), {
5441	let set = ast::ClassBracketed {
5442	span: span(`0`..`1`),
5443	negated: `false`,
5444	kind: ast::ClassSet::union(ast::ClassSetUnion {
5445	span: span(`1`..`1`),
5446	items: vec![],
5447	}),
5448	};
5449	let union = ast::ClassSetUnion { span: span(`1`..`1`), items: vec![] };
5450	Ok((set, union))
5451	});
5452	assert_eq!(
5453	parser_ignore_whitespace("[ a]").parse_set_class_open(),
5454	{
5455	let set = ast::ClassBracketed {
5456	span: span(`0`..`4`),
5457	negated: `false`,
5458	kind: ast::ClassSet::union(ast::ClassSetUnion {
5459	span: span(`4`..`4`),
5460	items: vec![],
5461	}),
5462	};
5463	let union =
5464	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5465	Ok((set, union))
5466	}
5467	);
5468	assert_eq!(parser("[^a]").parse_set_class_open(), {
5469	let set = ast::ClassBracketed {
5470	span: span(`0`..`2`),
5471	negated: `true`,
5472	kind: ast::ClassSet::union(ast::ClassSetUnion {
5473	span: span(`2`..`2`),
5474	items: vec![],
5475	}),
5476	};
5477	let union = ast::ClassSetUnion { span: span(`2`..`2`), items: vec![] };
5478	Ok((set, union))
5479	});
5480	assert_eq!(
5481	parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5482	{
5483	let set = ast::ClassBracketed {
5484	span: span(`0`..`4`),
5485	negated: `true`,
5486	kind: ast::ClassSet::union(ast::ClassSetUnion {
5487	span: span(`4`..`4`),
5488	items: vec![],
5489	}),
5490	};
5491	let union =
5492	ast::ClassSetUnion { span: span(`4`..`4`), items: vec![] };
5493	Ok((set, union))
5494	}
5495	);
5496	assert_eq!(parser("[-a]").parse_set_class_open(), {
5497	let set = ast::ClassBracketed {
5498	span: span(`0`..`2`),
5499	negated: `false`,
5500	kind: ast::ClassSet::union(ast::ClassSetUnion {
5501	span: span(`1`..`1`),
5502	items: vec![],
5503	}),
5504	};
5505	let union = ast::ClassSetUnion {
5506	span: span(`1`..`2`),
5507	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5508	span: span(`1`..`2`),
5509	kind: ast::LiteralKind::Verbatim,
5510	c: '-',
5511	})],
5512	};
5513	Ok((set, union))
5514	});
5515	assert_eq!(
5516	parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5517	{
5518	let set = ast::ClassBracketed {
5519	span: span(`0`..`4`),
5520	negated: `false`,
5521	kind: ast::ClassSet::union(ast::ClassSetUnion {
5522	span: span(`2`..`2`),
5523	items: vec![],
5524	}),
5525	};
5526	let union = ast::ClassSetUnion {
5527	span: span(`2`..`3`),
5528	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5529	span: span(`2`..`3`),
5530	kind: ast::LiteralKind::Verbatim,
5531	c: '-',
5532	})],
5533	};
5534	Ok((set, union))
5535	}
5536	);
5537	assert_eq!(parser("[^-a]").parse_set_class_open(), {
5538	let set = ast::ClassBracketed {
5539	span: span(`0`..`3`),
5540	negated: `true`,
5541	kind: ast::ClassSet::union(ast::ClassSetUnion {
5542	span: span(`2`..`2`),
5543	items: vec![],
5544	}),
5545	};
5546	let union = ast::ClassSetUnion {
5547	span: span(`2`..`3`),
5548	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5549	span: span(`2`..`3`),
5550	kind: ast::LiteralKind::Verbatim,
5551	c: '-',
5552	})],
5553	};
5554	Ok((set, union))
5555	});
5556	assert_eq!(parser("[--a]").parse_set_class_open(), {
5557	let set = ast::ClassBracketed {
5558	span: span(`0`..`3`),
5559	negated: `false`,
5560	kind: ast::ClassSet::union(ast::ClassSetUnion {
5561	span: span(`1`..`1`),
5562	items: vec![],
5563	}),
5564	};
5565	let union = ast::ClassSetUnion {
5566	span: span(`1`..`3`),
5567	items: vec![
5568	ast::ClassSetItem::Literal(ast::Literal {
5569	span: span(`1`..`2`),
5570	kind: ast::LiteralKind::Verbatim,
5571	c: '-',
5572	}),
5573	ast::ClassSetItem::Literal(ast::Literal {
5574	span: span(`2`..`3`),
5575	kind: ast::LiteralKind::Verbatim,
5576	c: '-',
5577	}),
5578	],
5579	};
5580	Ok((set, union))
5581	});
5582	assert_eq!(parser("[]a]").parse_set_class_open(), {
5583	let set = ast::ClassBracketed {
5584	span: span(`0`..`2`),
5585	negated: `false`,
5586	kind: ast::ClassSet::union(ast::ClassSetUnion {
5587	span: span(`1`..`1`),
5588	items: vec![],
5589	}),
5590	};
5591	let union = ast::ClassSetUnion {
5592	span: span(`1`..`2`),
5593	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5594	span: span(`1`..`2`),
5595	kind: ast::LiteralKind::Verbatim,
5596	c: ']',
5597	})],
5598	};
5599	Ok((set, union))
5600	});
5601	assert_eq!(
5602	parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5603	{
5604	let set = ast::ClassBracketed {
5605	span: span(`0`..`4`),
5606	negated: `false`,
5607	kind: ast::ClassSet::union(ast::ClassSetUnion {
5608	span: span(`2`..`2`),
5609	items: vec![],
5610	}),
5611	};
5612	let union = ast::ClassSetUnion {
5613	span: span(`2`..`3`),
5614	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5615	span: span(`2`..`3`),
5616	kind: ast::LiteralKind::Verbatim,
5617	c: ']',
5618	})],
5619	};
5620	Ok((set, union))
5621	}
5622	);
5623	assert_eq!(parser("[^]a]").parse_set_class_open(), {
5624	let set = ast::ClassBracketed {
5625	span: span(`0`..`3`),
5626	negated: `true`,
5627	kind: ast::ClassSet::union(ast::ClassSetUnion {
5628	span: span(`2`..`2`),
5629	items: vec![],
5630	}),
5631	};
5632	let union = ast::ClassSetUnion {
5633	span: span(`2`..`3`),
5634	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5635	span: span(`2`..`3`),
5636	kind: ast::LiteralKind::Verbatim,
5637	c: ']',
5638	})],
5639	};
5640	Ok((set, union))
5641	});
5642	assert_eq!(parser("[-]a]").parse_set_class_open(), {
5643	let set = ast::ClassBracketed {
5644	span: span(`0`..`2`),
5645	negated: `false`,
5646	kind: ast::ClassSet::union(ast::ClassSetUnion {
5647	span: span(`1`..`1`),
5648	items: vec![],
5649	}),
5650	};
5651	let union = ast::ClassSetUnion {
5652	span: span(`1`..`2`),
5653	items: vec![ast::ClassSetItem::Literal(ast::Literal {
5654	span: span(`1`..`2`),
5655	kind: ast::LiteralKind::Verbatim,
5656	c: '-',
5657	})],
5658	};
5659	Ok((set, union))
5660	});
5661
5662	assert_eq!(
5663	parser("[").parse_set_class_open().unwrap_err(),
5664	TestError {
5665	span: span(`0`..`1`),
5666	kind: ast::ErrorKind::ClassUnclosed,
5667	}
5668	);
5669	assert_eq!(
5670	parser_ignore_whitespace("[ ")
5671	.parse_set_class_open()
5672	.unwrap_err(),
5673	TestError {
5674	span: span(`0`..`5`),
5675	kind: ast::ErrorKind::ClassUnclosed,
5676	}
5677	);
5678	assert_eq!(
5679	parser("[^").parse_set_class_open().unwrap_err(),
5680	TestError {
5681	span: span(`0`..`2`),
5682	kind: ast::ErrorKind::ClassUnclosed,
5683	}
5684	);
5685	assert_eq!(
5686	parser("[]").parse_set_class_open().unwrap_err(),
5687	TestError {
5688	span: span(`0`..`2`),
5689	kind: ast::ErrorKind::ClassUnclosed,
5690	}
5691	);
5692	assert_eq!(
5693	parser("[-").parse_set_class_open().unwrap_err(),
5694	TestError {
5695	span: span(`0`..`0`),
5696	kind: ast::ErrorKind::ClassUnclosed,
5697	}
5698	);
5699	assert_eq!(
5700	parser("[--").parse_set_class_open().unwrap_err(),
5701	TestError {
5702	span: span(`0`..`0`),
5703	kind: ast::ErrorKind::ClassUnclosed,
5704	}
5705	);
5706
5707	// See: https://github.com/rust-lang/regex/issues/792
5708	assert_eq!(
5709	parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5710	TestError {
5711	span: span(`4`..`4`),
5712	kind: ast::ErrorKind::ClassUnclosed,
5713	}
5714	);
5715	}
5716
5717	#[test]
5718	fn maybe_parse_ascii_class() {
5719	assert_eq!(
5720	parser(r"[:alnum:]").maybe_parse_ascii_class(),
5721	Some(ast::ClassAscii {
5722	span: span(`0`..`9`),
5723	kind: ast::ClassAsciiKind::Alnum,
5724	negated: `false`,
5725	})
5726	);
5727	assert_eq!(
5728	parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5729	Some(ast::ClassAscii {
5730	span: span(`0`..`9`),
5731	kind: ast::ClassAsciiKind::Alnum,
5732	negated: `false`,
5733	})
5734	);
5735	assert_eq!(
5736	parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5737	Some(ast::ClassAscii {
5738	span: span(`0`..`10`),
5739	kind: ast::ClassAsciiKind::Alnum,
5740	negated: `true`,
5741	})
5742	);
5743
5744	let p = parser(r"[:");
5745	assert_eq!(p.maybe_parse_ascii_class(), None);
5746	assert_eq!(p.offset(), `0`);
5747
5748	let p = parser(r"[:^");
5749	assert_eq!(p.maybe_parse_ascii_class(), None);
5750	assert_eq!(p.offset(), `0`);
5751
5752	let p = parser(r"[^:alnum:]");
5753	assert_eq!(p.maybe_parse_ascii_class(), None);
5754	assert_eq!(p.offset(), `0`);
5755
5756	let p = parser(r"[:alnnum:]");
5757	assert_eq!(p.maybe_parse_ascii_class(), None);
5758	assert_eq!(p.offset(), `0`);
5759
5760	let p = parser(r"[:alnum]");
5761	assert_eq!(p.maybe_parse_ascii_class(), None);
5762	assert_eq!(p.offset(), `0`);
5763
5764	let p = parser(r"[:alnum:");
5765	assert_eq!(p.maybe_parse_ascii_class(), None);
5766	assert_eq!(p.offset(), `0`);
5767	}
5768
5769	#[test]
5770	fn parse_unicode_class() {
5771	assert_eq!(
5772	parser(r"\pN").parse_escape(),
5773	Ok(Primitive::Unicode(ast::ClassUnicode {
5774	span: span(`0`..`3`),
5775	negated: `false`,
5776	kind: ast::ClassUnicodeKind::OneLetter('N'),
5777	}))
5778	);
5779	assert_eq!(
5780	parser(r"\PN").parse_escape(),
5781	Ok(Primitive::Unicode(ast::ClassUnicode {
5782	span: span(`0`..`3`),
5783	negated: `true`,
5784	kind: ast::ClassUnicodeKind::OneLetter('N'),
5785	}))
5786	);
5787	assert_eq!(
5788	parser(r"\p{N}").parse_escape(),
5789	Ok(Primitive::Unicode(ast::ClassUnicode {
5790	span: span(`0`..`5`),
5791	negated: `false`,
5792	kind: ast::ClassUnicodeKind::Named(s("N")),
5793	}))
5794	);
5795	assert_eq!(
5796	parser(r"\P{N}").parse_escape(),
5797	Ok(Primitive::Unicode(ast::ClassUnicode {
5798	span: span(`0`..`5`),
5799	negated: `true`,
5800	kind: ast::ClassUnicodeKind::Named(s("N")),
5801	}))
5802	);
5803	assert_eq!(
5804	parser(r"\p{Greek}").parse_escape(),
5805	Ok(Primitive::Unicode(ast::ClassUnicode {
5806	span: span(`0`..`9`),
5807	negated: `false`,
5808	kind: ast::ClassUnicodeKind::Named(s("Greek")),
5809	}))
5810	);
5811
5812	assert_eq!(
5813	parser(r"\p{scx:Katakana}").parse_escape(),
5814	Ok(Primitive::Unicode(ast::ClassUnicode {
5815	span: span(`0`..`16`),
5816	negated: `false`,
5817	kind: ast::ClassUnicodeKind::NamedValue {
5818	op: ast::ClassUnicodeOpKind::Colon,
5819	name: s("scx"),
5820	value: s("Katakana"),
5821	},
5822	}))
5823	);
5824	assert_eq!(
5825	parser(r"\p{scx=Katakana}").parse_escape(),
5826	Ok(Primitive::Unicode(ast::ClassUnicode {
5827	span: span(`0`..`16`),
5828	negated: `false`,
5829	kind: ast::ClassUnicodeKind::NamedValue {
5830	op: ast::ClassUnicodeOpKind::Equal,
5831	name: s("scx"),
5832	value: s("Katakana"),
5833	},
5834	}))
5835	);
5836	assert_eq!(
5837	parser(r"\p{scx!=Katakana}").parse_escape(),
5838	Ok(Primitive::Unicode(ast::ClassUnicode {
5839	span: span(`0`..`17`),
5840	negated: `false`,
5841	kind: ast::ClassUnicodeKind::NamedValue {
5842	op: ast::ClassUnicodeOpKind::NotEqual,
5843	name: s("scx"),
5844	value: s("Katakana"),
5845	},
5846	}))
5847	);
5848
5849	assert_eq!(
5850	parser(r"\p{:}").parse_escape(),
5851	Ok(Primitive::Unicode(ast::ClassUnicode {
5852	span: span(`0`..`5`),
5853	negated: `false`,
5854	kind: ast::ClassUnicodeKind::NamedValue {
5855	op: ast::ClassUnicodeOpKind::Colon,
5856	name: s(""),
5857	value: s(""),
5858	},
5859	}))
5860	);
5861	assert_eq!(
5862	parser(r"\p{=}").parse_escape(),
5863	Ok(Primitive::Unicode(ast::ClassUnicode {
5864	span: span(`0`..`5`),
5865	negated: `false`,
5866	kind: ast::ClassUnicodeKind::NamedValue {
5867	op: ast::ClassUnicodeOpKind::Equal,
5868	name: s(""),
5869	value: s(""),
5870	},
5871	}))
5872	);
5873	assert_eq!(
5874	parser(r"\p{!=}").parse_escape(),
5875	Ok(Primitive::Unicode(ast::ClassUnicode {
5876	span: span(`0`..`6`),
5877	negated: `false`,
5878	kind: ast::ClassUnicodeKind::NamedValue {
5879	op: ast::ClassUnicodeOpKind::NotEqual,
5880	name: s(""),
5881	value: s(""),
5882	},
5883	}))
5884	);
5885
5886	assert_eq!(
5887	parser(r"\p").parse_escape().unwrap_err(),
5888	TestError {
5889	span: span(`2`..`2`),
5890	kind: ast::ErrorKind::EscapeUnexpectedEof,
5891	}
5892	);
5893	assert_eq!(
5894	parser(r"\p{").parse_escape().unwrap_err(),
5895	TestError {
5896	span: span(`3`..`3`),
5897	kind: ast::ErrorKind::EscapeUnexpectedEof,
5898	}
5899	);
5900	assert_eq!(
5901	parser(r"\p{N").parse_escape().unwrap_err(),
5902	TestError {
5903	span: span(`4`..`4`),
5904	kind: ast::ErrorKind::EscapeUnexpectedEof,
5905	}
5906	);
5907	assert_eq!(
5908	parser(r"\p{Greek").parse_escape().unwrap_err(),
5909	TestError {
5910	span: span(`8`..`8`),
5911	kind: ast::ErrorKind::EscapeUnexpectedEof,
5912	}
5913	);
5914
5915	assert_eq!(
5916	parser(r"\pNz").parse(),
5917	Ok(Ast::Concat(ast::Concat {
5918	span: span(`0`..`4`),
5919	asts: vec![
5920	Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5921	span: span(`0`..`3`),
5922	negated: `false`,
5923	kind: ast::ClassUnicodeKind::OneLetter('N'),
5924	})),
5925	Ast::Literal(ast::Literal {
5926	span: span(`3`..`4`),
5927	kind: ast::LiteralKind::Verbatim,
5928	c: 'z',
5929	}),
5930	],
5931	}))
5932	);
5933	assert_eq!(
5934	parser(r"\p{Greek}z").parse(),
5935	Ok(Ast::Concat(ast::Concat {
5936	span: span(`0`..`10`),
5937	asts: vec![
5938	Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5939	span: span(`0`..`9`),
5940	negated: `false`,
5941	kind: ast::ClassUnicodeKind::Named(s("Greek")),
5942	})),
5943	Ast::Literal(ast::Literal {
5944	span: span(`9`..`10`),
5945	kind: ast::LiteralKind::Verbatim,
5946	c: 'z',
5947	}),
5948	],
5949	}))
5950	);
5951	assert_eq!(
5952	parser(r"\p\{").parse().unwrap_err(),
5953	TestError {
5954	span: span(`2`..`3`),
5955	kind: ast::ErrorKind::UnicodeClassInvalid,
5956	}
5957	);
5958	assert_eq!(
5959	parser(r"\P\{").parse().unwrap_err(),
5960	TestError {
5961	span: span(`2`..`3`),
5962	kind: ast::ErrorKind::UnicodeClassInvalid,
5963	}
5964	);
5965	}
5966
5967	#[test]
5968	fn parse_perl_class() {
5969	assert_eq!(
5970	parser(r"\d").parse_escape(),
5971	Ok(Primitive::Perl(ast::ClassPerl {
5972	span: span(`0`..`2`),
5973	kind: ast::ClassPerlKind::Digit,
5974	negated: `false`,
5975	}))
5976	);
5977	assert_eq!(
5978	parser(r"\D").parse_escape(),
5979	Ok(Primitive::Perl(ast::ClassPerl {
5980	span: span(`0`..`2`),
5981	kind: ast::ClassPerlKind::Digit,
5982	negated: `true`,
5983	}))
5984	);
5985	assert_eq!(
5986	parser(r"\s").parse_escape(),
5987	Ok(Primitive::Perl(ast::ClassPerl {
5988	span: span(`0`..`2`),
5989	kind: ast::ClassPerlKind::Space,
5990	negated: `false`,
5991	}))
5992	);
5993	assert_eq!(
5994	parser(r"\S").parse_escape(),
5995	Ok(Primitive::Perl(ast::ClassPerl {
5996	span: span(`0`..`2`),
5997	kind: ast::ClassPerlKind::Space,
5998	negated: `true`,
5999	}))
6000	);
6001	assert_eq!(
6002	parser(r"\w").parse_escape(),
6003	Ok(Primitive::Perl(ast::ClassPerl {
6004	span: span(`0`..`2`),
6005	kind: ast::ClassPerlKind::Word,
6006	negated: `false`,
6007	}))
6008	);
6009	assert_eq!(
6010	parser(r"\W").parse_escape(),
6011	Ok(Primitive::Perl(ast::ClassPerl {
6012	span: span(`0`..`2`),
6013	kind: ast::ClassPerlKind::Word,
6014	negated: `true`,
6015	}))
6016	);
6017
6018	assert_eq!(
6019	parser(r"\d").parse(),
6020	Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
6021	span: span(`0`..`2`),
6022	kind: ast::ClassPerlKind::Digit,
6023	negated: `false`,
6024	})))
6025	);
6026	assert_eq!(
6027	parser(r"\dz").parse(),
6028	Ok(Ast::Concat(ast::Concat {
6029	span: span(`0`..`3`),
6030	asts: vec![
6031	Ast::Class(ast::Class::Perl(ast::ClassPerl {
6032	span: span(`0`..`2`),
6033	kind: ast::ClassPerlKind::Digit,
6034	negated: `false`,
6035	})),
6036	Ast::Literal(ast::Literal {
6037	span: span(`2`..`3`),
6038	kind: ast::LiteralKind::Verbatim,
6039	c: 'z',
6040	}),
6041	],
6042	}))
6043	);
6044	}
6045
6046	// This tests a bug fix where the nest limit checker wasn't decrementing
6047	// its depth during post-traversal, which causes long regexes to trip
6048	// the default limit too aggressively.
6049	#[test]
6050	fn regression_454_nest_too_big() {
6051	let pattern = r#"
6052	2(?:
6053	[45]\d{3}\|
6054	7(?:
6055	1[0-267]\|
6056	2[0-289]\|
6057	3[0-29]\|
6058	4[01]\|
6059	5[1-3]\|
6060	6[013]\|
6061	7[0178]\|
6062	91
6063	)\|
6064	8(?:
6065	0[125]\|
6066	[139][1-6]\|
6067	2[0157-9]\|
6068	41\|
6069	6[1-35]\|
6070	7[1-5]\|
6071	8[1-8]\|
6072	90
6073	)\|
6074	9(?:
6075	0[0-2]\|
6076	1[0-4]\|
6077	2[568]\|
6078	3[3-6]\|
6079	5[5-7]\|
6080	6[0167]\|
6081	7[15]\|
6082	8[0146-9]
6083	)
6084	)\d{4}
6085	"#;
6086	assert!(parser_nest_limit(pattern, `50`).parse().is_ok());
6087	}
6088
6089	// This tests that we treat a trailing `-` in a character class as a
6090	// literal `-` even when whitespace mode is enabled and there is whitespace
6091	// after the trailing `-`.
6092	#[test]
6093	fn regression_455_trailing_dash_ignore_whitespace() {
6094	assert!(parser("(?x)[ / - ]").parse().is_ok());
6095	assert!(parser("(?x)[ a - ]").parse().is_ok());
6096	assert!(parser(
6097	"(?x)[
6098	a
6099	- ]
6100	"
6101	)
6102	.parse()
6103	.is_ok());
6104	assert!(parser(
6105	"(?x)[
6106	a # wat
6107	- ]
6108	"
6109	)
6110	.parse()
6111	.is_ok());
6112
6113	assert!(parser("(?x)[ / -").parse().is_err());
6114	assert!(parser("(?x)[ / - ").parse().is_err());
6115	assert!(parser(
6116	"(?x)[
6117	/ -
6118	"
6119	)
6120	.parse()
6121	.is_err());
6122	assert!(parser(
6123	"(?x)[
6124	/ - # wat
6125	"
6126	)
6127	.parse()
6128	.is_err());
6129	}
6130	}
6131