translate.rs source code [crates/regex_syntax/src/hir/translate.rs]

1	/!*
2	Defines a translator that converts an `Ast` to an `Hir`.
3	*/
4
5	use core::cell::{Cell, RefCell};
6
7	use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
8
9	use crate::{
10	ast::{self, Ast, Span, Visitor},
11	either::Either,
12	hir::{self, Error, ErrorKind, Hir, HirKind},
13	unicode::{self, ClassQuery},
14	};
15
16	type Result<T> = core::result::Result<T, Error>;
17
18	/// A builder for constructing an AST->HIR translator.
19	#[derive(Clone, Debug)]
20	pub struct TranslatorBuilder {
21	utf8: bool,
22	line_terminator: u8,
23	flags: Flags,
24	}
25
26	impl Default for TranslatorBuilder {
27	fn default() -> TranslatorBuilder {
28	TranslatorBuilder::new()
29	}
30	}
31
32	impl TranslatorBuilder {
33	/// Create a new translator builder with a default c onfiguration.
34	pub fn new() -> TranslatorBuilder {
35	TranslatorBuilder {
36	utf8: `true`,
37	line_terminator: b'`\n`',
38	flags: Flags::default(),
39	}
40	}
41
42	/// Build a translator using the current configuration.
43	pub fn build(&self) -> Translator {
44	Translator {
45	stack: RefCell::new(vec![]),
46	flags: Cell::new(self.flags),
47	utf8: self.utf8,
48	line_terminator: self.line_terminator,
49	}
50	}
51
52	/// When disabled, translation will permit the construction of a regular
53	/// expression that may match invalid UTF-8.
54	///
55	/// When enabled (the default), the translator is guaranteed to produce an
56	/// expression that, for non-empty matches, will only ever produce spans
57	/// that are entirely valid UTF-8 (otherwise, the translator will return an
58	/// error).
59	///
60	/// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
61	/// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
62	/// syntax) will be allowed even though they can produce matches that split
63	/// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
64	/// matches, and it is expected that the regex engine itself must handle
65	/// these cases if necessary (perhaps by suppressing any zero-width matches
66	/// that split a codepoint).
67	pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
68	self.utf8 = yes;
69	self
70	}
71
72	/// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
73	///
74	/// Namely, instead of `.` (by default) matching everything except for `\n`,
75	/// this will cause `.` to match everything except for the byte given.
76	///
77	/// If `.` is used in a context where Unicode mode is enabled and this byte
78	/// isn't ASCII, then an error will be returned. When Unicode mode is
79	/// disabled, then any byte is permitted, but will return an error if UTF-8
80	/// mode is enabled and it is a non-ASCII byte.
81	///
82	/// In short, any ASCII value for a line terminator is always okay. But a
83	/// non-ASCII byte might result in an error depending on whether Unicode
84	/// mode or UTF-8 mode are enabled.
85	///
86	/// Note that if `R` mode is enabled then it always takes precedence and
87	/// the line terminator will be treated as `\r` and `\n` simultaneously.
88	///
89	/// Note also that this doesn't* impact the look-around assertions*
90	/// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
91	/// configuration in the regex engine itself.
92	pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
93	self.line_terminator = byte;
94	self
95	}
96
97	/// Enable or disable the case insensitive flag (`i`) by default.
98	pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
99	self.flags.case_insensitive = if yes { Some(`true`) } else { None };
100	self
101	}
102
103	/// Enable or disable the multi-line matching flag (`m`) by default.
104	pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
105	self.flags.multi_line = if yes { Some(`true`) } else { None };
106	self
107	}
108
109	/// Enable or disable the "dot matches any character" flag (`s`) by
110	/// default.
111	pub fn dot_matches_new_line(
112	&mut self,
113	yes: bool,
114	) -> &mut TranslatorBuilder {
115	self.flags.dot_matches_new_line = if yes { Some(`true`) } else { None };
116	self
117	}
118
119	/// Enable or disable the CRLF mode flag (`R`) by default.
120	pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
121	self.flags.crlf = if yes { Some(`true`) } else { None };
122	self
123	}
124
125	/// Enable or disable the "swap greed" flag (`U`) by default.
126	pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
127	self.flags.swap_greed = if yes { Some(`true`) } else { None };
128	self
129	}
130
131	/// Enable or disable the Unicode flag (`u`) by default.
132	pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
133	self.flags.unicode = if yes { None } else { Some(`false`) };
134	self
135	}
136	}
137
138	/// A translator maps abstract syntax to a high level intermediate
139	/// representation.
140	///
141	/// A translator may be benefit from reuse. That is, a translator can translate
142	/// many abstract syntax trees.
143	///
144	/// A `Translator` can be configured in more detail via a
145	/// [`TranslatorBuilder`].
146	#[derive(Clone, Debug)]
147	pub struct Translator {
148	/// Our call stack, but on the heap.
149	stack: RefCell<Vec<HirFrame>>,
150	/// The current flag settings.
151	flags: Cell<Flags>,
152	/// Whether we're allowed to produce HIR that can match arbitrary bytes.
153	utf8: bool,
154	/// The line terminator to use for `.`.
155	line_terminator: u8,
156	}
157
158	impl Translator {
159	/// Create a new translator using the default configuration.
160	pub fn new() -> Translator {
161	TranslatorBuilder::new().build()
162	}
163
164	/// Translate the given abstract syntax tree (AST) into a high level
165	/// intermediate representation (HIR).
166	///
167	/// If there was a problem doing the translation, then an HIR-specific
168	/// error is returned.
169	///
170	/// The original pattern string used to produce the `Ast` must* also be*
171	/// provided. The translator does not use the pattern string during any
172	/// correct translation, but is used for error reporting.
173	pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
174	ast::visit(ast, visitor:TranslatorI::new(self, pattern))
175	}
176	}
177
178	/// An HirFrame is a single stack frame, represented explicitly, which is
179	/// created for each item in the Ast that we traverse.
180	///
181	/// Note that technically, this type doesn't represent our entire stack
182	/// frame. In particular, the Ast visitor represents any state associated with
183	/// traversing the Ast itself.
184	#[derive(Clone, Debug)]
185	enum HirFrame {
186	/// An arbitrary HIR expression. These get pushed whenever we hit a base
187	/// case in the Ast. They get popped after an inductive (i.e., recursive)
188	/// step is complete.
189	Expr(Hir),
190	/// A literal that is being constructed, character by character, from the
191	/// AST. We need this because the AST gives each individual character its
192	/// own node. So as we see characters, we peek at the top-most HirFrame.
193	/// If it's a literal, then we add to it. Otherwise, we push a new literal.
194	/// When it comes time to pop it, we convert it to an Hir via Hir::literal.
195	Literal(Vec<u8>),
196	/// A Unicode character class. This frame is mutated as we descend into
197	/// the Ast of a character class (which is itself its own mini recursive
198	/// structure).
199	ClassUnicode(hir::ClassUnicode),
200	/// A byte-oriented character class. This frame is mutated as we descend
201	/// into the Ast of a character class (which is itself its own mini
202	/// recursive structure).
203	///
204	/// Byte character classes are created when Unicode mode (`u`) is disabled.
205	/// If `utf8` is enabled (the default), then a byte character is only
206	/// permitted to match ASCII text.
207	ClassBytes(hir::ClassBytes),
208	/// This is pushed whenever a repetition is observed. After visiting every
209	/// sub-expression in the repetition, the translator's stack is expected to
210	/// have this sentinel at the top.
211	///
212	/// This sentinel only exists to stop other things (like flattening
213	/// literals) from reaching across repetition operators.
214	Repetition,
215	/// This is pushed on to the stack upon first seeing any kind of capture,
216	/// indicated by parentheses (including non-capturing groups). It is popped
217	/// upon leaving a group.
218	Group {
219	/// The old active flags when this group was opened.
220	///
221	/// If this group sets flags, then the new active flags are set to the
222	/// result of merging the old flags with the flags introduced by this
223	/// group. If the group doesn't set any flags, then this is simply
224	/// equivalent to whatever flags were set when the group was opened.
225	///
226	/// When this group is popped, the active flags should be restored to
227	/// the flags set here.
228	///
229	/// The "active" flags correspond to whatever flags are set in the
230	/// Translator.
231	old_flags: Flags,
232	},
233	/// This is pushed whenever a concatenation is observed. After visiting
234	/// every sub-expression in the concatenation, the translator's stack is
235	/// popped until it sees a Concat frame.
236	Concat,
237	/// This is pushed whenever an alternation is observed. After visiting
238	/// every sub-expression in the alternation, the translator's stack is
239	/// popped until it sees an Alternation frame.
240	Alternation,
241	/// This is pushed immediately before each sub-expression in an
242	/// alternation. This separates the branches of an alternation on the
243	/// stack and prevents literal flattening from reaching across alternation
244	/// branches.
245	///
246	/// It is popped after each expression in a branch until an 'Alternation'
247	/// frame is observed when doing a post visit on an alternation.
248	AlternationBranch,
249	}
250
251	impl HirFrame {
252	/// Assert that the current stack frame is an Hir expression and return it.
253	fn unwrap_expr(self) -> Hir {
254	match self {
255	HirFrame::Expr(expr) => expr,
256	HirFrame::Literal(lit) => Hir::literal(lit),
257	_ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
258	}
259	}
260
261	/// Assert that the current stack frame is a Unicode class expression and
262	/// return it.
263	fn unwrap_class_unicode(self) -> hir::ClassUnicode {
264	match self {
265	HirFrame::ClassUnicode(cls) => cls,
266	_ => panic!(
267	"tried to unwrap Unicode class \
268	from HirFrame, got: {:?}",
269	self
270	),
271	}
272	}
273
274	/// Assert that the current stack frame is a byte class expression and
275	/// return it.
276	fn unwrap_class_bytes(self) -> hir::ClassBytes {
277	match self {
278	HirFrame::ClassBytes(cls) => cls,
279	_ => panic!(
280	"tried to unwrap byte class \
281	from HirFrame, got: {:?}",
282	self
283	),
284	}
285	}
286
287	/// Assert that the current stack frame is a repetition sentinel. If it
288	/// isn't, then panic.
289	fn unwrap_repetition(self) {
290	match self {
291	HirFrame::Repetition => {}
292	_ => {
293	panic!(
294	"tried to unwrap repetition from HirFrame, got: {:?}",
295	self
296	)
297	}
298	}
299	}
300
301	/// Assert that the current stack frame is a group indicator and return
302	/// its corresponding flags (the flags that were active at the time the
303	/// group was entered).
304	fn unwrap_group(self) -> Flags {
305	match self {
306	HirFrame::Group { old_flags } => old_flags,
307	_ => {
308	panic!("tried to unwrap group from HirFrame, got: {:?}", self)
309	}
310	}
311	}
312
313	/// Assert that the current stack frame is an alternation pipe sentinel. If
314	/// it isn't, then panic.
315	fn unwrap_alternation_pipe(self) {
316	match self {
317	HirFrame::AlternationBranch => {}
318	_ => {
319	panic!(
320	"tried to unwrap alt pipe from HirFrame, got: {:?}",
321	self
322	)
323	}
324	}
325	}
326	}
327
328	impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
329	type Output = Hir;
330	type Err = Error;
331
332	fn finish(self) -> Result<Hir> {
333	// ... otherwise, we should have exactly one HIR on the stack.
334	assert_eq!(self.trans().stack.borrow().len(), `1`);
335	Ok(self.pop().unwrap().unwrap_expr())
336	}
337
338	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
339	match *ast {
340	Ast::ClassBracketed(_) => {
341	if self.flags().unicode() {
342	let cls = hir::ClassUnicode::empty();
343	self.push(HirFrame::ClassUnicode(cls));
344	} else {
345	let cls = hir::ClassBytes::empty();
346	self.push(HirFrame::ClassBytes(cls));
347	}
348	}
349	Ast::Repetition(_) => self.push(HirFrame::Repetition),
350	Ast::Group(ref x) => {
351	let old_flags = x
352	.flags()
353	.map(\|ast\| self.set_flags(ast))
354	.unwrap_or_else(\|\| self.flags());
355	self.push(HirFrame::Group { old_flags });
356	}
357	Ast::Concat(_) => {
358	self.push(HirFrame::Concat);
359	}
360	Ast::Alternation(ref x) => {
361	self.push(HirFrame::Alternation);
362	if !x.asts.is_empty() {
363	self.push(HirFrame::AlternationBranch);
364	}
365	}
366	_ => {}
367	}
368	Ok(())
369	}
370
371	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
372	match *ast {
373	Ast::Empty(_) => {
374	self.push(HirFrame::Expr(Hir::empty()));
375	}
376	Ast::Flags(ref x) => {
377	self.set_flags(&x.flags);
378	// Flags in the AST are generally considered directives and
379	// not actual sub-expressions. However, they can be used in
380	// the concrete syntax like `((?i))`, and we need some kind of
381	// indication of an expression there, and Empty is the correct
382	// choice.
383	//
384	// There can also be things like `(?i)+`, but we rule those out
385	// in the parser. In the future, we might allow them for
386	// consistency sake.
387	self.push(HirFrame::Expr(Hir::empty()));
388	}
389	Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
390	Either::Right(byte) => self.push_byte(byte),
391	Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
392	None => self.push_char(ch),
393	Some(expr) => self.push(HirFrame::Expr(expr)),
394	},
395	},
396	Ast::Dot(ref span) => {
397	self.push(HirFrame::Expr(self.hir_dot(**span)?));
398	}
399	Ast::Assertion(ref x) => {
400	self.push(HirFrame::Expr(self.hir_assertion(x)?));
401	}
402	Ast::ClassPerl(ref x) => {
403	if self.flags().unicode() {
404	let cls = self.hir_perl_unicode_class(x)?;
405	let hcls = hir::Class::Unicode(cls);
406	self.push(HirFrame::Expr(Hir::class(hcls)));
407	} else {
408	let cls = self.hir_perl_byte_class(x)?;
409	let hcls = hir::Class::Bytes(cls);
410	self.push(HirFrame::Expr(Hir::class(hcls)));
411	}
412	}
413	Ast::ClassUnicode(ref x) => {
414	let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
415	self.push(HirFrame::Expr(Hir::class(cls)));
416	}
417	Ast::ClassBracketed(ref ast) => {
418	if self.flags().unicode() {
419	let mut cls = self.pop().unwrap().unwrap_class_unicode();
420	self.unicode_fold_and_negate(
421	&ast.span,
422	ast.negated,
423	&mut cls,
424	)?;
425	let expr = Hir::class(hir::Class::Unicode(cls));
426	self.push(HirFrame::Expr(expr));
427	} else {
428	let mut cls = self.pop().unwrap().unwrap_class_bytes();
429	self.bytes_fold_and_negate(
430	&ast.span,
431	ast.negated,
432	&mut cls,
433	)?;
434	let expr = Hir::class(hir::Class::Bytes(cls));
435	self.push(HirFrame::Expr(expr));
436	}
437	}
438	Ast::Repetition(ref x) => {
439	let expr = self.pop().unwrap().unwrap_expr();
440	self.pop().unwrap().unwrap_repetition();
441	self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
442	}
443	Ast::Group(ref x) => {
444	let expr = self.pop().unwrap().unwrap_expr();
445	let old_flags = self.pop().unwrap().unwrap_group();
446	self.trans().flags.set(old_flags);
447	self.push(HirFrame::Expr(self.hir_capture(x, expr)));
448	}
449	Ast::Concat(_) => {
450	let mut exprs = vec![];
451	while let Some(expr) = self.pop_concat_expr() {
452	if !matches!(*expr.kind(), HirKind::Empty) {
453	exprs.push(expr);
454	}
455	}
456	exprs.reverse();
457	self.push(HirFrame::Expr(Hir::concat(exprs)));
458	}
459	Ast::Alternation(_) => {
460	let mut exprs = vec![];
461	while let Some(expr) = self.pop_alt_expr() {
462	self.pop().unwrap().unwrap_alternation_pipe();
463	exprs.push(expr);
464	}
465	exprs.reverse();
466	self.push(HirFrame::Expr(Hir::alternation(exprs)));
467	}
468	}
469	Ok(())
470	}
471
472	fn visit_alternation_in(&mut self) -> Result<()> {
473	self.push(HirFrame::AlternationBranch);
474	Ok(())
475	}
476
477	fn visit_class_set_item_pre(
478	&mut self,
479	ast: &ast::ClassSetItem,
480	) -> Result<()> {
481	match *ast {
482	ast::ClassSetItem::Bracketed(_) => {
483	if self.flags().unicode() {
484	let cls = hir::ClassUnicode::empty();
485	self.push(HirFrame::ClassUnicode(cls));
486	} else {
487	let cls = hir::ClassBytes::empty();
488	self.push(HirFrame::ClassBytes(cls));
489	}
490	}
491	// We needn't handle the Union case here since the visitor will
492	// do it for us.
493	_ => {}
494	}
495	Ok(())
496	}
497
498	fn visit_class_set_item_post(
499	&mut self,
500	ast: &ast::ClassSetItem,
501	) -> Result<()> {
502	match *ast {
503	ast::ClassSetItem::Empty(_) => {}
504	ast::ClassSetItem::Literal(ref x) => {
505	if self.flags().unicode() {
506	let mut cls = self.pop().unwrap().unwrap_class_unicode();
507	cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
508	self.push(HirFrame::ClassUnicode(cls));
509	} else {
510	let mut cls = self.pop().unwrap().unwrap_class_bytes();
511	let byte = self.class_literal_byte(x)?;
512	cls.push(hir::ClassBytesRange::new(byte, byte));
513	self.push(HirFrame::ClassBytes(cls));
514	}
515	}
516	ast::ClassSetItem::Range(ref x) => {
517	if self.flags().unicode() {
518	let mut cls = self.pop().unwrap().unwrap_class_unicode();
519	cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
520	self.push(HirFrame::ClassUnicode(cls));
521	} else {
522	let mut cls = self.pop().unwrap().unwrap_class_bytes();
523	let start = self.class_literal_byte(&x.start)?;
524	let end = self.class_literal_byte(&x.end)?;
525	cls.push(hir::ClassBytesRange::new(start, end));
526	self.push(HirFrame::ClassBytes(cls));
527	}
528	}
529	ast::ClassSetItem::Ascii(ref x) => {
530	if self.flags().unicode() {
531	let xcls = self.hir_ascii_unicode_class(x)?;
532	let mut cls = self.pop().unwrap().unwrap_class_unicode();
533	cls.union(&xcls);
534	self.push(HirFrame::ClassUnicode(cls));
535	} else {
536	let xcls = self.hir_ascii_byte_class(x)?;
537	let mut cls = self.pop().unwrap().unwrap_class_bytes();
538	cls.union(&xcls);
539	self.push(HirFrame::ClassBytes(cls));
540	}
541	}
542	ast::ClassSetItem::Unicode(ref x) => {
543	let xcls = self.hir_unicode_class(x)?;
544	let mut cls = self.pop().unwrap().unwrap_class_unicode();
545	cls.union(&xcls);
546	self.push(HirFrame::ClassUnicode(cls));
547	}
548	ast::ClassSetItem::Perl(ref x) => {
549	if self.flags().unicode() {
550	let xcls = self.hir_perl_unicode_class(x)?;
551	let mut cls = self.pop().unwrap().unwrap_class_unicode();
552	cls.union(&xcls);
553	self.push(HirFrame::ClassUnicode(cls));
554	} else {
555	let xcls = self.hir_perl_byte_class(x)?;
556	let mut cls = self.pop().unwrap().unwrap_class_bytes();
557	cls.union(&xcls);
558	self.push(HirFrame::ClassBytes(cls));
559	}
560	}
561	ast::ClassSetItem::Bracketed(ref ast) => {
562	if self.flags().unicode() {
563	let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
564	self.unicode_fold_and_negate(
565	&ast.span,
566	ast.negated,
567	&mut cls1,
568	)?;
569
570	let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
571	cls2.union(&cls1);
572	self.push(HirFrame::ClassUnicode(cls2));
573	} else {
574	let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
575	self.bytes_fold_and_negate(
576	&ast.span,
577	ast.negated,
578	&mut cls1,
579	)?;
580
581	let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
582	cls2.union(&cls1);
583	self.push(HirFrame::ClassBytes(cls2));
584	}
585	}
586	// This is handled automatically by the visitor.
587	ast::ClassSetItem::Union(_) => {}
588	}
589	Ok(())
590	}
591
592	fn visit_class_set_binary_op_pre(
593	&mut self,
594	_op: &ast::ClassSetBinaryOp,
595	) -> Result<()> {
596	if self.flags().unicode() {
597	let cls = hir::ClassUnicode::empty();
598	self.push(HirFrame::ClassUnicode(cls));
599	} else {
600	let cls = hir::ClassBytes::empty();
601	self.push(HirFrame::ClassBytes(cls));
602	}
603	Ok(())
604	}
605
606	fn visit_class_set_binary_op_in(
607	&mut self,
608	_op: &ast::ClassSetBinaryOp,
609	) -> Result<()> {
610	if self.flags().unicode() {
611	let cls = hir::ClassUnicode::empty();
612	self.push(HirFrame::ClassUnicode(cls));
613	} else {
614	let cls = hir::ClassBytes::empty();
615	self.push(HirFrame::ClassBytes(cls));
616	}
617	Ok(())
618	}
619
620	fn visit_class_set_binary_op_post(
621	&mut self,
622	op: &ast::ClassSetBinaryOp,
623	) -> Result<()> {
624	use crate::ast::ClassSetBinaryOpKind::*;
625
626	if self.flags().unicode() {
627	let mut rhs = self.pop().unwrap().unwrap_class_unicode();
628	let mut lhs = self.pop().unwrap().unwrap_class_unicode();
629	let mut cls = self.pop().unwrap().unwrap_class_unicode();
630	if self.flags().case_insensitive() {
631	rhs.try_case_fold_simple().map_err(\|_\| {
632	self.error(
633	op.rhs.span().clone(),
634	ErrorKind::UnicodeCaseUnavailable,
635	)
636	})?;
637	lhs.try_case_fold_simple().map_err(\|_\| {
638	self.error(
639	op.lhs.span().clone(),
640	ErrorKind::UnicodeCaseUnavailable,
641	)
642	})?;
643	}
644	match op.kind {
645	Intersection => lhs.intersect(&rhs),
646	Difference => lhs.difference(&rhs),
647	SymmetricDifference => lhs.symmetric_difference(&rhs),
648	}
649	cls.union(&lhs);
650	self.push(HirFrame::ClassUnicode(cls));
651	} else {
652	let mut rhs = self.pop().unwrap().unwrap_class_bytes();
653	let mut lhs = self.pop().unwrap().unwrap_class_bytes();
654	let mut cls = self.pop().unwrap().unwrap_class_bytes();
655	if self.flags().case_insensitive() {
656	rhs.case_fold_simple();
657	lhs.case_fold_simple();
658	}
659	match op.kind {
660	Intersection => lhs.intersect(&rhs),
661	Difference => lhs.difference(&rhs),
662	SymmetricDifference => lhs.symmetric_difference(&rhs),
663	}
664	cls.union(&lhs);
665	self.push(HirFrame::ClassBytes(cls));
666	}
667	Ok(())
668	}
669	}
670
671	/// The internal implementation of a translator.
672	///
673	/// This type is responsible for carrying around the original pattern string,
674	/// which is not tied to the internal state of a translator.
675	///
676	/// A TranslatorI exists for the time it takes to translate a single Ast.
677	#[derive(Clone, Debug)]
678	struct TranslatorI<'t, 'p> {
679	trans: &'t Translator,
680	pattern: &'p str,
681	}
682
683	impl<'t, 'p> TranslatorI<'t, 'p> {
684	/// Build a new internal translator.
685	fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
686	TranslatorI { trans, pattern }
687	}
688
689	/// Return a reference to the underlying translator.
690	fn trans(&self) -> &Translator {
691	&self.trans
692	}
693
694	/// Push the given frame on to the call stack.
695	fn push(&self, frame: HirFrame) {
696	self.trans().stack.borrow_mut().push(frame);
697	}
698
699	/// Push the given literal char on to the call stack.
700	///
701	/// If the top-most element of the stack is a literal, then the char
702	/// is appended to the end of that literal. Otherwise, a new literal
703	/// containing just the given char is pushed to the top of the stack.
704	fn push_char(&self, ch: char) {
705	let mut buf = [`0`; `4`];
706	let bytes = ch.encode_utf8(&mut buf).as_bytes();
707	let mut stack = self.trans().stack.borrow_mut();
708	if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
709	literal.extend_from_slice(bytes);
710	} else {
711	stack.push(HirFrame::Literal(bytes.to_vec()));
712	}
713	}
714
715	/// Push the given literal byte on to the call stack.
716	///
717	/// If the top-most element of the stack is a literal, then the byte
718	/// is appended to the end of that literal. Otherwise, a new literal
719	/// containing just the given byte is pushed to the top of the stack.
720	fn push_byte(&self, byte: u8) {
721	let mut stack = self.trans().stack.borrow_mut();
722	if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
723	literal.push(byte);
724	} else {
725	stack.push(HirFrame::Literal(vec![byte]));
726	}
727	}
728
729	/// Pop the top of the call stack. If the call stack is empty, return None.
730	fn pop(&self) -> Option<HirFrame> {
731	self.trans().stack.borrow_mut().pop()
732	}
733
734	/// Pop an HIR expression from the top of the stack for a concatenation.
735	///
736	/// This returns None if the stack is empty or when a concat frame is seen.
737	/// Otherwise, it panics if it could not find an HIR expression.
738	fn pop_concat_expr(&self) -> Option<Hir> {
739	let frame = self.pop()?;
740	match frame {
741	HirFrame::Concat => None,
742	HirFrame::Expr(expr) => Some(expr),
743	HirFrame::Literal(lit) => Some(Hir::literal(lit)),
744	HirFrame::ClassUnicode(_) => {
745	unreachable!("expected expr or concat, got Unicode class")
746	}
747	HirFrame::ClassBytes(_) => {
748	unreachable!("expected expr or concat, got byte class")
749	}
750	HirFrame::Repetition => {
751	unreachable!("expected expr or concat, got repetition")
752	}
753	HirFrame::Group { .. } => {
754	unreachable!("expected expr or concat, got group")
755	}
756	HirFrame::Alternation => {
757	unreachable!("expected expr or concat, got alt marker")
758	}
759	HirFrame::AlternationBranch => {
760	unreachable!("expected expr or concat, got alt branch marker")
761	}
762	}
763	}
764
765	/// Pop an HIR expression from the top of the stack for an alternation.
766	///
767	/// This returns None if the stack is empty or when an alternation frame is
768	/// seen. Otherwise, it panics if it could not find an HIR expression.
769	fn pop_alt_expr(&self) -> Option<Hir> {
770	let frame = self.pop()?;
771	match frame {
772	HirFrame::Alternation => None,
773	HirFrame::Expr(expr) => Some(expr),
774	HirFrame::Literal(lit) => Some(Hir::literal(lit)),
775	HirFrame::ClassUnicode(_) => {
776	unreachable!("expected expr or alt, got Unicode class")
777	}
778	HirFrame::ClassBytes(_) => {
779	unreachable!("expected expr or alt, got byte class")
780	}
781	HirFrame::Repetition => {
782	unreachable!("expected expr or alt, got repetition")
783	}
784	HirFrame::Group { .. } => {
785	unreachable!("expected expr or alt, got group")
786	}
787	HirFrame::Concat => {
788	unreachable!("expected expr or alt, got concat marker")
789	}
790	HirFrame::AlternationBranch => {
791	unreachable!("expected expr or alt, got alt branch marker")
792	}
793	}
794	}
795
796	/// Create a new error with the given span and error type.
797	fn error(&self, span: Span, kind: ErrorKind) -> Error {
798	Error { kind, pattern: self.pattern.to_string(), span }
799	}
800
801	/// Return a copy of the active flags.
802	fn flags(&self) -> Flags {
803	self.trans().flags.get()
804	}
805
806	/// Set the flags of this translator from the flags set in the given AST.
807	/// Then, return the old flags.
808	fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
809	let old_flags = self.flags();
810	let mut new_flags = Flags::from_ast(ast_flags);
811	new_flags.merge(&old_flags);
812	self.trans().flags.set(new_flags);
813	old_flags
814	}
815
816	/// Convert an Ast literal to its scalar representation.
817	///
818	/// When Unicode mode is enabled, then this always succeeds and returns a
819	/// `char` (Unicode scalar value).
820	///
821	/// When Unicode mode is disabled, then a `char` will still be returned
822	/// whenever possible. A byte is returned only when invalid UTF-8 is
823	/// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
824	/// will result in an error when invalid UTF-8 is not allowed.
825	fn ast_literal_to_scalar(
826	&self,
827	lit: &ast::Literal,
828	) -> Result<Either<char, u8>> {
829	if self.flags().unicode() {
830	return Ok(Either::Left(lit.c));
831	}
832	let byte = match lit.byte() {
833	None => return Ok(Either::Left(lit.c)),
834	Some(byte) => byte,
835	};
836	if byte <= `0x7F` {
837	return Ok(Either::Left(char::try_from(byte).unwrap()));
838	}
839	if self.trans().utf8 {
840	return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
841	}
842	Ok(Either::Right(byte))
843	}
844
845	fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
846	if !self.flags().case_insensitive() {
847	return Ok(None);
848	}
849	if self.flags().unicode() {
850	// If case folding won't do anything, then don't bother trying.
851	let map = unicode::SimpleCaseFolder::new()
852	.map(\|f\| f.overlaps(c, c))
853	.map_err(\|_\| {
854	self.error(span, ErrorKind::UnicodeCaseUnavailable)
855	})?;
856	if !map {
857	return Ok(None);
858	}
859	let mut cls =
860	hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
861	c, c,
862	)]);
863	cls.try_case_fold_simple().map_err(\|_\| {
864	self.error(span, ErrorKind::UnicodeCaseUnavailable)
865	})?;
866	Ok(Some(Hir::class(hir::Class::Unicode(cls))))
867	} else {
868	if !c.is_ascii() {
869	return Ok(None);
870	}
871	// If case folding won't do anything, then don't bother trying.
872	match c {
873	'A'..='Z' \| 'a'..='z' => {}
874	_ => return Ok(None),
875	}
876	let mut cls =
877	hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
878	// OK because 'c.len_utf8() == 1' which in turn implies
879	// that 'c' is ASCII.
880	u8::try_from(c).unwrap(),
881	u8::try_from(c).unwrap(),
882	)]);
883	cls.case_fold_simple();
884	Ok(Some(Hir::class(hir::Class::Bytes(cls))))
885	}
886	}
887
888	fn hir_dot(&self, span: Span) -> Result<Hir> {
889	let (utf8, lineterm, flags) =
890	(self.trans().utf8, self.trans().line_terminator, self.flags());
891	if utf8 && (!flags.unicode() \|\| !lineterm.is_ascii()) {
892	return Err(self.error(span, ErrorKind::InvalidUtf8));
893	}
894	let dot = if flags.dot_matches_new_line() {
895	if flags.unicode() {
896	hir::Dot::AnyChar
897	} else {
898	hir::Dot::AnyByte
899	}
900	} else {
901	if flags.unicode() {
902	if flags.crlf() {
903	hir::Dot::AnyCharExceptCRLF
904	} else {
905	if !lineterm.is_ascii() {
906	return Err(
907	self.error(span, ErrorKind::InvalidLineTerminator)
908	);
909	}
910	hir::Dot::AnyCharExcept(char::from(lineterm))
911	}
912	} else {
913	if flags.crlf() {
914	hir::Dot::AnyByteExceptCRLF
915	} else {
916	hir::Dot::AnyByteExcept(lineterm)
917	}
918	}
919	};
920	Ok(Hir::dot(dot))
921	}
922
923	fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
924	let unicode = self.flags().unicode();
925	let multi_line = self.flags().multi_line();
926	let crlf = self.flags().crlf();
927	Ok(match asst.kind {
928	ast::AssertionKind::StartLine => Hir::look(if multi_line {
929	if crlf {
930	hir::Look::StartCRLF
931	} else {
932	hir::Look::StartLF
933	}
934	} else {
935	hir::Look::Start
936	}),
937	ast::AssertionKind::EndLine => Hir::look(if multi_line {
938	if crlf {
939	hir::Look::EndCRLF
940	} else {
941	hir::Look::EndLF
942	}
943	} else {
944	hir::Look::End
945	}),
946	ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
947	ast::AssertionKind::EndText => Hir::look(hir::Look::End),
948	ast::AssertionKind::WordBoundary => Hir::look(if unicode {
949	hir::Look::WordUnicode
950	} else {
951	hir::Look::WordAscii
952	}),
953	ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
954	hir::Look::WordUnicodeNegate
955	} else {
956	hir::Look::WordAsciiNegate
957	}),
958	ast::AssertionKind::WordBoundaryStart
959	\| ast::AssertionKind::WordBoundaryStartAngle => {
960	Hir::look(if unicode {
961	hir::Look::WordStartUnicode
962	} else {
963	hir::Look::WordStartAscii
964	})
965	}
966	ast::AssertionKind::WordBoundaryEnd
967	\| ast::AssertionKind::WordBoundaryEndAngle => {
968	Hir::look(if unicode {
969	hir::Look::WordEndUnicode
970	} else {
971	hir::Look::WordEndAscii
972	})
973	}
974	ast::AssertionKind::WordBoundaryStartHalf => {
975	Hir::look(if unicode {
976	hir::Look::WordStartHalfUnicode
977	} else {
978	hir::Look::WordStartHalfAscii
979	})
980	}
981	ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
982	hir::Look::WordEndHalfUnicode
983	} else {
984	hir::Look::WordEndHalfAscii
985	}),
986	})
987	}
988
989	fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
990	let (index, name) = match group.kind {
991	ast::GroupKind::CaptureIndex(index) => (index, None),
992	ast::GroupKind::CaptureName { ref name, .. } => {
993	(name.index, Some(name.name.clone().into_boxed_str()))
994	}
995	// The HIR doesn't need to use non-capturing groups, since the way
996	// in which the data type is defined handles this automatically.
997	ast::GroupKind::NonCapturing(_) => return expr,
998	};
999	Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
1000	}
1001
1002	fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
1003	let (min, max) = match rep.op.kind {
1004	ast::RepetitionKind::ZeroOrOne => (`0`, Some(`1`)),
1005	ast::RepetitionKind::ZeroOrMore => (`0`, None),
1006	ast::RepetitionKind::OneOrMore => (`1`, None),
1007	ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
1008	(m, Some(m))
1009	}
1010	ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
1011	(m, None)
1012	}
1013	ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
1014	m,
1015	n,
1016	)) => (m, Some(n)),
1017	};
1018	let greedy =
1019	if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
1020	Hir::repetition(hir::Repetition {
1021	min,
1022	max,
1023	greedy,
1024	sub: Box::new(expr),
1025	})
1026	}
1027
1028	fn hir_unicode_class(
1029	&self,
1030	ast_class: &ast::ClassUnicode,
1031	) -> Result<hir::ClassUnicode> {
1032	use crate::ast::ClassUnicodeKind::*;
1033
1034	if !self.flags().unicode() {
1035	return Err(
1036	self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
1037	);
1038	}
1039	let query = match ast_class.kind {
1040	OneLetter(name) => ClassQuery::OneLetter(name),
1041	Named(ref name) => ClassQuery::Binary(name),
1042	NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
1043	property_name: name,
1044	property_value: value,
1045	},
1046	};
1047	let mut result = self.convert_unicode_class_error(
1048	&ast_class.span,
1049	unicode::class(query),
1050	);
1051	if let Ok(ref mut class) = result {
1052	self.unicode_fold_and_negate(
1053	&ast_class.span,
1054	ast_class.negated,
1055	class,
1056	)?;
1057	}
1058	result
1059	}
1060
1061	fn hir_ascii_unicode_class(
1062	&self,
1063	ast: &ast::ClassAscii,
1064	) -> Result<hir::ClassUnicode> {
1065	let mut cls = hir::ClassUnicode::new(
1066	ascii_class_as_chars(&ast.kind)
1067	.map(\|(s, e)\| hir::ClassUnicodeRange::new(s, e)),
1068	);
1069	self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1070	Ok(cls)
1071	}
1072
1073	fn hir_ascii_byte_class(
1074	&self,
1075	ast: &ast::ClassAscii,
1076	) -> Result<hir::ClassBytes> {
1077	let mut cls = hir::ClassBytes::new(
1078	ascii_class(&ast.kind)
1079	.map(\|(s, e)\| hir::ClassBytesRange::new(s, e)),
1080	);
1081	self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1082	Ok(cls)
1083	}
1084
1085	fn hir_perl_unicode_class(
1086	&self,
1087	ast_class: &ast::ClassPerl,
1088	) -> Result<hir::ClassUnicode> {
1089	use crate::ast::ClassPerlKind::*;
1090
1091	assert!(self.flags().unicode());
1092	let result = match ast_class.kind {
1093	Digit => unicode::perl_digit(),
1094	Space => unicode::perl_space(),
1095	Word => unicode::perl_word(),
1096	};
1097	let mut class =
1098	self.convert_unicode_class_error(&ast_class.span, result)?;
1099	// We needn't apply case folding here because the Perl Unicode classes
1100	// are already closed under Unicode simple case folding.
1101	if ast_class.negated {
1102	class.negate();
1103	}
1104	Ok(class)
1105	}
1106
1107	fn hir_perl_byte_class(
1108	&self,
1109	ast_class: &ast::ClassPerl,
1110	) -> Result<hir::ClassBytes> {
1111	use crate::ast::ClassPerlKind::*;
1112
1113	assert!(!self.flags().unicode());
1114	let mut class = match ast_class.kind {
1115	Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
1116	Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
1117	Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
1118	};
1119	// We needn't apply case folding here because the Perl ASCII classes
1120	// are already closed (under ASCII case folding).
1121	if ast_class.negated {
1122	class.negate();
1123	}
1124	// Negating a Perl byte class is likely to cause it to match invalid
1125	// UTF-8. That's only OK if the translator is configured to allow such
1126	// things.
1127	if self.trans().utf8 && !class.is_ascii() {
1128	return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
1129	}
1130	Ok(class)
1131	}
1132
1133	/// Converts the given Unicode specific error to an HIR translation error.
1134	///
1135	/// The span given should approximate the position at which an error would
1136	/// occur.
1137	fn convert_unicode_class_error(
1138	&self,
1139	span: &Span,
1140	result: core::result::Result<hir::ClassUnicode, unicode::Error>,
1141	) -> Result<hir::ClassUnicode> {
1142	result.map_err(\|err\| {
1143	let sp = span.clone();
1144	match err {
1145	unicode::Error::PropertyNotFound => {
1146	self.error(sp, ErrorKind::UnicodePropertyNotFound)
1147	}
1148	unicode::Error::PropertyValueNotFound => {
1149	self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
1150	}
1151	unicode::Error::PerlClassNotFound => {
1152	self.error(sp, ErrorKind::UnicodePerlClassNotFound)
1153	}
1154	}
1155	})
1156	}
1157
1158	fn unicode_fold_and_negate(
1159	&self,
1160	span: &Span,
1161	negated: bool,
1162	class: &mut hir::ClassUnicode,
1163	) -> Result<()> {
1164	// Note that we must apply case folding before negation!
1165	// Consider `(?i)[^x]`. If we applied negation first, then
1166	// the result would be the character class that matched any
1167	// Unicode scalar value.
1168	if self.flags().case_insensitive() {
1169	class.try_case_fold_simple().map_err(\|_\| {
1170	self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
1171	})?;
1172	}
1173	if negated {
1174	class.negate();
1175	}
1176	Ok(())
1177	}
1178
1179	fn bytes_fold_and_negate(
1180	&self,
1181	span: &Span,
1182	negated: bool,
1183	class: &mut hir::ClassBytes,
1184	) -> Result<()> {
1185	// Note that we must apply case folding before negation!
1186	// Consider `(?i)[^x]`. If we applied negation first, then
1187	// the result would be the character class that matched any
1188	// Unicode scalar value.
1189	if self.flags().case_insensitive() {
1190	class.case_fold_simple();
1191	}
1192	if negated {
1193	class.negate();
1194	}
1195	if self.trans().utf8 && !class.is_ascii() {
1196	return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
1197	}
1198	Ok(())
1199	}
1200
1201	/// Return a scalar byte value suitable for use as a literal in a byte
1202	/// character class.
1203	fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
1204	match self.ast_literal_to_scalar(ast)? {
1205	Either::Right(byte) => Ok(byte),
1206	Either::Left(ch) => {
1207	if ch.is_ascii() {
1208	Ok(u8::try_from(ch).unwrap())
1209	} else {
1210	// We can't feasibly support Unicode in
1211	// byte oriented classes. Byte classes don't
1212	// do Unicode case folding.
1213	Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
1214	}
1215	}
1216	}
1217	}
1218	}
1219
1220	/// A translator's representation of a regular expression's flags at any given
1221	/// moment in time.
1222	///
1223	/// Each flag can be in one of three states: absent, present but disabled or
1224	/// present but enabled.
1225	#[derive(Clone, Copy, Debug, Default)]
1226	struct Flags {
1227	case_insensitive: Option<bool>,
1228	multi_line: Option<bool>,
1229	dot_matches_new_line: Option<bool>,
1230	swap_greed: Option<bool>,
1231	unicode: Option<bool>,
1232	crlf: Option<bool>,
1233	// Note that `ignore_whitespace` is omitted here because it is handled
1234	// entirely in the parser.
1235	}
1236
1237	impl Flags {
1238	fn from_ast(ast: &ast::Flags) -> Flags {
1239	let mut flags = Flags::default();
1240	let mut enable = `true`;
1241	for item in &ast.items {
1242	match item.kind {
1243	ast::FlagsItemKind::Negation => {
1244	enable = `false`;
1245	}
1246	ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1247	flags.case_insensitive = Some(enable);
1248	}
1249	ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1250	flags.multi_line = Some(enable);
1251	}
1252	ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1253	flags.dot_matches_new_line = Some(enable);
1254	}
1255	ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1256	flags.swap_greed = Some(enable);
1257	}
1258	ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1259	flags.unicode = Some(enable);
1260	}
1261	ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
1262	flags.crlf = Some(enable);
1263	}
1264	ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1265	}
1266	}
1267	flags
1268	}
1269
1270	fn merge(&mut self, previous: &Flags) {
1271	if self.case_insensitive.is_none() {
1272	self.case_insensitive = previous.case_insensitive;
1273	}
1274	if self.multi_line.is_none() {
1275	self.multi_line = previous.multi_line;
1276	}
1277	if self.dot_matches_new_line.is_none() {
1278	self.dot_matches_new_line = previous.dot_matches_new_line;
1279	}
1280	if self.swap_greed.is_none() {
1281	self.swap_greed = previous.swap_greed;
1282	}
1283	if self.unicode.is_none() {
1284	self.unicode = previous.unicode;
1285	}
1286	if self.crlf.is_none() {
1287	self.crlf = previous.crlf;
1288	}
1289	}
1290
1291	fn case_insensitive(&self) -> bool {
1292	self.case_insensitive.unwrap_or(`false`)
1293	}
1294
1295	fn multi_line(&self) -> bool {
1296	self.multi_line.unwrap_or(`false`)
1297	}
1298
1299	fn dot_matches_new_line(&self) -> bool {
1300	self.dot_matches_new_line.unwrap_or(`false`)
1301	}
1302
1303	fn swap_greed(&self) -> bool {
1304	self.swap_greed.unwrap_or(`false`)
1305	}
1306
1307	fn unicode(&self) -> bool {
1308	self.unicode.unwrap_or(`true`)
1309	}
1310
1311	fn crlf(&self) -> bool {
1312	self.crlf.unwrap_or(`false`)
1313	}
1314	}
1315
1316	fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1317	let ranges: Vec<_> = ascii_classimpl Iterator(kind)
1318	.map(\|(s: u8, e: u8)\| hir::ClassBytesRange::new(start:s, end:e))
1319	.collect();
1320	hir::ClassBytes::new(ranges)
1321	}
1322
1323	fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
1324	use crate::ast::ClassAsciiKind::*;
1325
1326	let slice: &'static [(u8, u8)] = match *kind {
1327	Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
1328	Alpha => &[(b'A', b'Z'), (b'a', b'z')],
1329	Ascii => &[(b'`\x00`', b'`\x7F`')],
1330	Blank => &[(b'`\t`', b'`\t`'), (b' ', b' ')],
1331	Cntrl => &[(b'`\x00`', b'`\x1F`'), (b'`\x7F`', b'`\x7F`')],
1332	Digit => &[(b'0', b'9')],
1333	Graph => &[(b'!', b'~')],
1334	Lower => &[(b'a', b'z')],
1335	Print => &[(b' ', b'~')],
1336	Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
1337	Space => &[
1338	(b'`\t`', b'`\t`'),
1339	(b'`\n`', b'`\n`'),
1340	(b'`\x0B`', b'`\x0B`'),
1341	(b'`\x0C`', b'`\x0C`'),
1342	(b'`\r`', b'`\r`'),
1343	(b' ', b' '),
1344	],
1345	Upper => &[(b'A', b'Z')],
1346	Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
1347	Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
1348	};
1349	slice.iter().copied()
1350	}
1351
1352	fn ascii_class_as_chars(
1353	kind: &ast::ClassAsciiKind,
1354	) -> impl Iterator<Item = (char, char)> {
1355	ascii_class(kind).map(\|(s: u8, e: u8)\| (char::from(s), char::from(e)))
1356	}
1357
1358	#[cfg(test)]
1359	mod tests {
1360	use crate::{
1361	ast::{parse::ParserBuilder, Position},
1362	hir::{Look, Properties},
1363	};
1364
1365	use super::*;
1366
1367	// We create these errors to compare with real hir::Errors in the tests.
1368	// We define equality between TestError and hir::Error to disregard the
1369	// pattern string in hir::Error, which is annoying to provide in tests.
1370	#[derive(Clone, Debug)]
1371	struct TestError {
1372	span: Span,
1373	kind: hir::ErrorKind,
1374	}
1375
1376	impl PartialEq<hir::Error> for TestError {
1377	fn eq(&self, other: &hir::Error) -> bool {
1378	self.span == other.span && self.kind == other.kind
1379	}
1380	}
1381
1382	impl PartialEq<TestError> for hir::Error {
1383	fn eq(&self, other: &TestError) -> bool {
1384	self.span == other.span && self.kind == other.kind
1385	}
1386	}
1387
1388	fn parse(pattern: &str) -> Ast {
1389	ParserBuilder::new().octal(`true`).build().parse(pattern).unwrap()
1390	}
1391
1392	fn t(pattern: &str) -> Hir {
1393	TranslatorBuilder::new()
1394	.utf8(`true`)
1395	.build()
1396	.translate(pattern, &parse(pattern))
1397	.unwrap()
1398	}
1399
1400	fn t_err(pattern: &str) -> hir::Error {
1401	TranslatorBuilder::new()
1402	.utf8(`true`)
1403	.build()
1404	.translate(pattern, &parse(pattern))
1405	.unwrap_err()
1406	}
1407
1408	fn t_bytes(pattern: &str) -> Hir {
1409	TranslatorBuilder::new()
1410	.utf8(`false`)
1411	.build()
1412	.translate(pattern, &parse(pattern))
1413	.unwrap()
1414	}
1415
1416	fn props(pattern: &str) -> Properties {
1417	t(pattern).properties().clone()
1418	}
1419
1420	fn props_bytes(pattern: &str) -> Properties {
1421	t_bytes(pattern).properties().clone()
1422	}
1423
1424	fn hir_lit(s: &str) -> Hir {
1425	hir_blit(s.as_bytes())
1426	}
1427
1428	fn hir_blit(s: &[u8]) -> Hir {
1429	Hir::literal(s)
1430	}
1431
1432	fn hir_capture(index: u32, expr: Hir) -> Hir {
1433	Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
1434	}
1435
1436	fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
1437	Hir::capture(hir::Capture {
1438	index,
1439	name: Some(name.into()),
1440	sub: Box::new(expr),
1441	})
1442	}
1443
1444	fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1445	Hir::repetition(hir::Repetition {
1446	min: `0`,
1447	max: Some(`1`),
1448	greedy,
1449	sub: Box::new(expr),
1450	})
1451	}
1452
1453	fn hir_star(greedy: bool, expr: Hir) -> Hir {
1454	Hir::repetition(hir::Repetition {
1455	min: `0`,
1456	max: None,
1457	greedy,
1458	sub: Box::new(expr),
1459	})
1460	}
1461
1462	fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1463	Hir::repetition(hir::Repetition {
1464	min: `1`,
1465	max: None,
1466	greedy,
1467	sub: Box::new(expr),
1468	})
1469	}
1470
1471	fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
1472	Hir::repetition(hir::Repetition {
1473	min,
1474	max,
1475	greedy,
1476	sub: Box::new(expr),
1477	})
1478	}
1479
1480	fn hir_alt(alts: Vec<Hir>) -> Hir {
1481	Hir::alternation(alts)
1482	}
1483
1484	fn hir_cat(exprs: Vec<Hir>) -> Hir {
1485	Hir::concat(exprs)
1486	}
1487
1488	#[allow(dead_code)]
1489	fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1490	Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1491	}
1492
1493	#[allow(dead_code)]
1494	fn hir_uclass_perl_word() -> Hir {
1495	Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1496	}
1497
1498	fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
1499	Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
1500	ascii_class_as_chars(kind)
1501	.map(\|(s, e)\| hir::ClassUnicodeRange::new(s, e)),
1502	)))
1503	}
1504
1505	fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
1506	Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
1507	ascii_class(kind).map(\|(s, e)\| hir::ClassBytesRange::new(s, e)),
1508	)))
1509	}
1510
1511	fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1512	Hir::class(uclass(ranges))
1513	}
1514
1515	fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1516	Hir::class(bclass(ranges))
1517	}
1518
1519	fn hir_case_fold(expr: Hir) -> Hir {
1520	match expr.into_kind() {
1521	HirKind::Class(mut cls) => {
1522	cls.case_fold_simple();
1523	Hir::class(cls)
1524	}
1525	_ => panic!("cannot case fold non-class Hir expr"),
1526	}
1527	}
1528
1529	fn hir_negate(expr: Hir) -> Hir {
1530	match expr.into_kind() {
1531	HirKind::Class(mut cls) => {
1532	cls.negate();
1533	Hir::class(cls)
1534	}
1535	_ => panic!("cannot negate non-class Hir expr"),
1536	}
1537	}
1538
1539	fn uclass(ranges: &[(char, char)]) -> hir::Class {
1540	let ranges: Vec<hir::ClassUnicodeRange> = ranges
1541	.iter()
1542	.map(\|&(s, e)\| hir::ClassUnicodeRange::new(s, e))
1543	.collect();
1544	hir::Class::Unicode(hir::ClassUnicode::new(ranges))
1545	}
1546
1547	fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
1548	let ranges: Vec<hir::ClassBytesRange> = ranges
1549	.iter()
1550	.map(\|&(s, e)\| hir::ClassBytesRange::new(s, e))
1551	.collect();
1552	hir::Class::Bytes(hir::ClassBytes::new(ranges))
1553	}
1554
1555	#[cfg(feature = "unicode-case")]
1556	fn class_case_fold(mut cls: hir::Class) -> Hir {
1557	cls.case_fold_simple();
1558	Hir::class(cls)
1559	}
1560
1561	fn class_negate(mut cls: hir::Class) -> Hir {
1562	cls.negate();
1563	Hir::class(cls)
1564	}
1565
1566	#[allow(dead_code)]
1567	fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1568	use crate::hir::Class::{Bytes, Unicode};
1569
1570	match (expr1.into_kind(), expr2.into_kind()) {
1571	(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1572	c1.union(&c2);
1573	Hir::class(hir::Class::Unicode(c1))
1574	}
1575	(HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1576	c1.union(&c2);
1577	Hir::class(hir::Class::Bytes(c1))
1578	}
1579	_ => panic!("cannot union non-class Hir exprs"),
1580	}
1581	}
1582
1583	#[allow(dead_code)]
1584	fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1585	use crate::hir::Class::{Bytes, Unicode};
1586
1587	match (expr1.into_kind(), expr2.into_kind()) {
1588	(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1589	c1.difference(&c2);
1590	Hir::class(hir::Class::Unicode(c1))
1591	}
1592	(HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1593	c1.difference(&c2);
1594	Hir::class(hir::Class::Bytes(c1))
1595	}
1596	_ => panic!("cannot difference non-class Hir exprs"),
1597	}
1598	}
1599
1600	fn hir_look(look: hir::Look) -> Hir {
1601	Hir::look(look)
1602	}
1603
1604	#[test]
1605	fn empty() {
1606	assert_eq!(t(""), Hir::empty());
1607	assert_eq!(t("(?i)"), Hir::empty());
1608	assert_eq!(t("()"), hir_capture(`1`, Hir::empty()));
1609	assert_eq!(t("(?:)"), Hir::empty());
1610	assert_eq!(t("(?P<wat>)"), hir_capture_name(`1`, "wat", Hir::empty()));
1611	assert_eq!(t("\|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1612	assert_eq!(
1613	t("()\|()"),
1614	hir_alt(vec![
1615	hir_capture(`1`, Hir::empty()),
1616	hir_capture(`2`, Hir::empty()),
1617	])
1618	);
1619	assert_eq!(
1620	t("(\|b)"),
1621	hir_capture(`1`, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1622	);
1623	assert_eq!(
1624	t("(a\|)"),
1625	hir_capture(`1`, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1626	);
1627	assert_eq!(
1628	t("(a\|\|c)"),
1629	hir_capture(
1630	`1`,
1631	hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1632	)
1633	);
1634	assert_eq!(
1635	t("(\|\|)"),
1636	hir_capture(
1637	`1`,
1638	hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1639	)
1640	);
1641	}
1642
1643	#[test]
1644	fn literal() {
1645	assert_eq!(t("a"), hir_lit("a"));
1646	assert_eq!(t("(?-u)a"), hir_lit("a"));
1647	assert_eq!(t("☃"), hir_lit("☃"));
1648	assert_eq!(t("abcd"), hir_lit("abcd"));
1649
1650	assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1651	assert_eq!(t_bytes("(?-u)`\x61`"), hir_lit("a"));
1652	assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1653	assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"`\xFF`"));
1654
1655	assert_eq!(t("(?-u)☃"), hir_lit("☃"));
1656	assert_eq!(
1657	t_err(r"(?-u)\xFF"),
1658	TestError {
1659	kind: hir::ErrorKind::InvalidUtf8,
1660	span: Span::new(
1661	Position::new(`5`, `1`, `6`),
1662	Position::new(`9`, `1`, `10`)
1663	),
1664	}
1665	);
1666	}
1667
1668	#[test]
1669	fn literal_case_insensitive() {
1670	#[cfg(feature = "unicode-case")]
1671	assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1672	#[cfg(feature = "unicode-case")]
1673	assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
1674	#[cfg(feature = "unicode-case")]
1675	assert_eq!(
1676	t("a(?i)a(?-i)a"),
1677	hir_cat(vec![
1678	hir_lit("a"),
1679	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1680	hir_lit("a"),
1681	])
1682	);
1683	#[cfg(feature = "unicode-case")]
1684	assert_eq!(
1685	t("(?i)ab@c"),
1686	hir_cat(vec![
1687	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1688	hir_uclass(&[('B', 'B'), ('b', 'b')]),
1689	hir_lit("@"),
1690	hir_uclass(&[('C', 'C'), ('c', 'c')]),
1691	])
1692	);
1693	#[cfg(feature = "unicode-case")]
1694	assert_eq!(
1695	t("(?i)β"),
1696	hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1697	);
1698
1699	assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1700	#[cfg(feature = "unicode-case")]
1701	assert_eq!(
1702	t("(?-u)a(?i)a(?-i)a"),
1703	hir_cat(vec![
1704	hir_lit("a"),
1705	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1706	hir_lit("a"),
1707	])
1708	);
1709	assert_eq!(
1710	t("(?i-u)ab@c"),
1711	hir_cat(vec![
1712	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1713	hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1714	hir_lit("@"),
1715	hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1716	])
1717	);
1718
1719	assert_eq!(
1720	t_bytes("(?i-u)a"),
1721	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1722	);
1723	assert_eq!(
1724	t_bytes("(?i-u)`\x61`"),
1725	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1726	);
1727	assert_eq!(
1728	t_bytes(r"(?i-u)\x61"),
1729	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1730	);
1731	assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"`\xFF`"));
1732
1733	assert_eq!(t("(?i-u)β"), hir_lit("β"),);
1734	}
1735
1736	#[test]
1737	fn dot() {
1738	assert_eq!(
1739	t("."),
1740	hir_uclass(&[('`\0`', '`\t`'), ('`\x0B`', '`\u{10FFFF}`')])
1741	);
1742	assert_eq!(
1743	t("(?R)."),
1744	hir_uclass(&[
1745	('`\0`', '`\t`'),
1746	('`\x0B`', '`\x0C`'),
1747	('`\x0E`', '`\u{10FFFF}`'),
1748	])
1749	);
1750	assert_eq!(t("(?s)."), hir_uclass(&[('`\0`', '`\u{10FFFF}`')]));
1751	assert_eq!(t("(?Rs)."), hir_uclass(&[('`\0`', '`\u{10FFFF}`')]));
1752	assert_eq!(
1753	t_bytes("(?-u)."),
1754	hir_bclass(&[(b'`\0`', b'`\t`'), (b'`\x0B`', b'`\xFF`')])
1755	);
1756	assert_eq!(
1757	t_bytes("(?R-u)."),
1758	hir_bclass(&[
1759	(b'`\0`', b'`\t`'),
1760	(b'`\x0B`', b'`\x0C`'),
1761	(b'`\x0E`', b'`\xFF`'),
1762	])
1763	);
1764	assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'`\0`', b'`\xFF`'),]));
1765	assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'`\0`', b'`\xFF`'),]));
1766
1767	// If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1768	assert_eq!(
1769	t_err("(?-u)."),
1770	TestError {
1771	kind: hir::ErrorKind::InvalidUtf8,
1772	span: Span::new(
1773	Position::new(`5`, `1`, `6`),
1774	Position::new(`6`, `1`, `7`)
1775	),
1776	}
1777	);
1778	assert_eq!(
1779	t_err("(?R-u)."),
1780	TestError {
1781	kind: hir::ErrorKind::InvalidUtf8,
1782	span: Span::new(
1783	Position::new(`6`, `1`, `7`),
1784	Position::new(`7`, `1`, `8`)
1785	),
1786	}
1787	);
1788	assert_eq!(
1789	t_err("(?s-u)."),
1790	TestError {
1791	kind: hir::ErrorKind::InvalidUtf8,
1792	span: Span::new(
1793	Position::new(`6`, `1`, `7`),
1794	Position::new(`7`, `1`, `8`)
1795	),
1796	}
1797	);
1798	assert_eq!(
1799	t_err("(?Rs-u)."),
1800	TestError {
1801	kind: hir::ErrorKind::InvalidUtf8,
1802	span: Span::new(
1803	Position::new(`7`, `1`, `8`),
1804	Position::new(`8`, `1`, `9`)
1805	),
1806	}
1807	);
1808	}
1809
1810	#[test]
1811	fn assertions() {
1812	assert_eq!(t("^"), hir_look(hir::Look::Start));
1813	assert_eq!(t("$"), hir_look(hir::Look::End));
1814	assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1815	assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1816	assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1817	assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1818	assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1819	assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1820
1821	assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
1822	assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
1823	assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
1824	assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
1825	}
1826
1827	#[test]
1828	fn group() {
1829	assert_eq!(t("(a)"), hir_capture(`1`, hir_lit("a")));
1830	assert_eq!(
1831	t("(a)(b)"),
1832	hir_cat(vec![
1833	hir_capture(`1`, hir_lit("a")),
1834	hir_capture(`2`, hir_lit("b")),
1835	])
1836	);
1837	assert_eq!(
1838	t("(a)\|(b)"),
1839	hir_alt(vec![
1840	hir_capture(`1`, hir_lit("a")),
1841	hir_capture(`2`, hir_lit("b")),
1842	])
1843	);
1844	assert_eq!(t("(?P<foo>)"), hir_capture_name(`1`, "foo", Hir::empty()));
1845	assert_eq!(t("(?P<foo>a)"), hir_capture_name(`1`, "foo", hir_lit("a")));
1846	assert_eq!(
1847	t("(?P<foo>a)(?P<bar>b)"),
1848	hir_cat(vec![
1849	hir_capture_name(`1`, "foo", hir_lit("a")),
1850	hir_capture_name(`2`, "bar", hir_lit("b")),
1851	])
1852	);
1853	assert_eq!(t("(?:)"), Hir::empty());
1854	assert_eq!(t("(?:a)"), hir_lit("a"));
1855	assert_eq!(
1856	t("(?:a)(b)"),
1857	hir_cat(vec![hir_lit("a"), hir_capture(`1`, hir_lit("b")),])
1858	);
1859	assert_eq!(
1860	t("(a)(?:b)(c)"),
1861	hir_cat(vec![
1862	hir_capture(`1`, hir_lit("a")),
1863	hir_lit("b"),
1864	hir_capture(`2`, hir_lit("c")),
1865	])
1866	);
1867	assert_eq!(
1868	t("(a)(?P<foo>b)(c)"),
1869	hir_cat(vec![
1870	hir_capture(`1`, hir_lit("a")),
1871	hir_capture_name(`2`, "foo", hir_lit("b")),
1872	hir_capture(`3`, hir_lit("c")),
1873	])
1874	);
1875	assert_eq!(t("()"), hir_capture(`1`, Hir::empty()));
1876	assert_eq!(t("((?i))"), hir_capture(`1`, Hir::empty()));
1877	assert_eq!(t("((?x))"), hir_capture(`1`, Hir::empty()));
1878	assert_eq!(
1879	t("(((?x)))"),
1880	hir_capture(`1`, hir_capture(`2`, Hir::empty()))
1881	);
1882	}
1883
1884	#[test]
1885	fn line_anchors() {
1886	assert_eq!(t("^"), hir_look(hir::Look::Start));
1887	assert_eq!(t("$"), hir_look(hir::Look::End));
1888	assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1889	assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1890
1891	assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1892	assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1893	assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1894	assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1895
1896	assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
1897	assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
1898	assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
1899	assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
1900
1901	assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
1902	assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
1903	assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
1904	assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
1905	}
1906
1907	#[test]
1908	fn flags() {
1909	#[cfg(feature = "unicode-case")]
1910	assert_eq!(
1911	t("(?i:a)a"),
1912	hir_cat(
1913	vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
1914	)
1915	);
1916	assert_eq!(
1917	t("(?i-u:a)β"),
1918	hir_cat(vec![
1919	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1920	hir_lit("β"),
1921	])
1922	);
1923	assert_eq!(
1924	t("(?:(?i-u)a)b"),
1925	hir_cat(vec![
1926	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1927	hir_lit("b"),
1928	])
1929	);
1930	assert_eq!(
1931	t("((?i-u)a)b"),
1932	hir_cat(vec![
1933	hir_capture(`1`, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1934	hir_lit("b"),
1935	])
1936	);
1937	#[cfg(feature = "unicode-case")]
1938	assert_eq!(
1939	t("(?i)(?-i:a)a"),
1940	hir_cat(
1941	vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
1942	)
1943	);
1944	#[cfg(feature = "unicode-case")]
1945	assert_eq!(
1946	t("(?im)a^"),
1947	hir_cat(vec![
1948	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1949	hir_look(hir::Look::StartLF),
1950	])
1951	);
1952	#[cfg(feature = "unicode-case")]
1953	assert_eq!(
1954	t("(?im)a^(?i-m)a^"),
1955	hir_cat(vec![
1956	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1957	hir_look(hir::Look::StartLF),
1958	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1959	hir_look(hir::Look::Start),
1960	])
1961	);
1962	assert_eq!(
1963	t("(?U)aa?(?-U)aa?"),
1964	hir_cat(vec![
1965	hir_star(`false`, hir_lit("a")),
1966	hir_star(`true`, hir_lit("a")),
1967	hir_star(`true`, hir_lit("a")),
1968	hir_star(`false`, hir_lit("a")),
1969	])
1970	);
1971	#[cfg(feature = "unicode-case")]
1972	assert_eq!(
1973	t("(?:a(?i)a)a"),
1974	hir_cat(vec![
1975	hir_cat(vec![
1976	hir_lit("a"),
1977	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1978	]),
1979	hir_lit("a"),
1980	])
1981	);
1982	#[cfg(feature = "unicode-case")]
1983	assert_eq!(
1984	t("(?i)(?:a(?-i)a)a"),
1985	hir_cat(vec![
1986	hir_cat(vec![
1987	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1988	hir_lit("a"),
1989	]),
1990	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1991	])
1992	);
1993	}
1994
1995	#[test]
1996	fn escape() {
1997	assert_eq!(
1998	t(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#"),
1999	hir_lit(r"\.+*?()\|[]{}^$#")
2000	);
2001	}
2002
2003	#[test]
2004	fn repetition() {
2005	assert_eq!(t("a?"), hir_quest(`true`, hir_lit("a")));
2006	assert_eq!(t("a*"), hir_star(`true`, hir_lit("a")));
2007	assert_eq!(t("a+"), hir_plus(`true`, hir_lit("a")));
2008	assert_eq!(t("a??"), hir_quest(`false`, hir_lit("a")));
2009	assert_eq!(t("a*?"), hir_star(`false`, hir_lit("a")));
2010	assert_eq!(t("a+?"), hir_plus(`false`, hir_lit("a")));
2011
2012	assert_eq!(t("a{1}"), hir_range(`true`, `1`, Some(`1`), hir_lit("a"),));
2013	assert_eq!(t("a{1,}"), hir_range(`true`, `1`, None, hir_lit("a"),));
2014	assert_eq!(t("a{1,2}"), hir_range(`true`, `1`, Some(`2`), hir_lit("a"),));
2015	assert_eq!(t("a{1}?"), hir_range(`false`, `1`, Some(`1`), hir_lit("a"),));
2016	assert_eq!(t("a{1,}?"), hir_range(`false`, `1`, None, hir_lit("a"),));
2017	assert_eq!(t("a{1,2}?"), hir_range(`false`, `1`, Some(`2`), hir_lit("a"),));
2018
2019	assert_eq!(
2020	t("ab?"),
2021	hir_cat(vec![hir_lit("a"), hir_quest(`true`, hir_lit("b")),])
2022	);
2023	assert_eq!(t("(ab)?"), hir_quest(`true`, hir_capture(`1`, hir_lit("ab"))));
2024	assert_eq!(
2025	t("a\|b?"),
2026	hir_alt(vec![hir_lit("a"), hir_quest(`true`, hir_lit("b")),])
2027	);
2028	}
2029
2030	#[test]
2031	fn cat_alt() {
2032	let a = \|\| hir_look(hir::Look::Start);
2033	let b = \|\| hir_look(hir::Look::End);
2034	let c = \|\| hir_look(hir::Look::WordUnicode);
2035	let d = \|\| hir_look(hir::Look::WordUnicodeNegate);
2036
2037	assert_eq!(t("(^$)"), hir_capture(`1`, hir_cat(vec![a(), b()])));
2038	assert_eq!(t("^\|$"), hir_alt(vec![a(), b()]));
2039	assert_eq!(t(r"^\|$\|\b"), hir_alt(vec![a(), b(), c()]));
2040	assert_eq!(
2041	t(r"^$\|$\b\|\b\B"),
2042	hir_alt(vec![
2043	hir_cat(vec![a(), b()]),
2044	hir_cat(vec![b(), c()]),
2045	hir_cat(vec![c(), d()]),
2046	])
2047	);
2048	assert_eq!(t("(^\|$)"), hir_capture(`1`, hir_alt(vec![a(), b()])));
2049	assert_eq!(
2050	t(r"(^\|$\|\b)"),
2051	hir_capture(`1`, hir_alt(vec![a(), b(), c()]))
2052	);
2053	assert_eq!(
2054	t(r"(^$\|$\b\|\b\B)"),
2055	hir_capture(
2056	`1`,
2057	hir_alt(vec![
2058	hir_cat(vec![a(), b()]),
2059	hir_cat(vec![b(), c()]),
2060	hir_cat(vec![c(), d()]),
2061	])
2062	)
2063	);
2064	assert_eq!(
2065	t(r"(^$\|($\b\|(\b\B)))"),
2066	hir_capture(
2067	`1`,
2068	hir_alt(vec![
2069	hir_cat(vec![a(), b()]),
2070	hir_capture(
2071	`2`,
2072	hir_alt(vec![
2073	hir_cat(vec![b(), c()]),
2074	hir_capture(`3`, hir_cat(vec![c(), d()])),
2075	])
2076	),
2077	])
2078	)
2079	);
2080	}
2081
2082	// Tests the HIR transformation of things like '[a-z]\|[A-Z]' into
2083	// '[A-Za-z]'. In other words, an alternation of just classes is always
2084	// equivalent to a single class corresponding to the union of the branches
2085	// in that class. (Unless some branches match invalid UTF-8 and others
2086	// match non-ASCII Unicode.)
2087	#[test]
2088	fn cat_class_flattened() {
2089	assert_eq!(t(r"[a-z]\|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2090	// Combining all of the letter properties should give us the one giant
2091	// letter property.
2092	#[cfg(feature = "unicode-gencat")]
2093	assert_eq!(
2094	t(r"(?x)
2095	\p{Lowercase_Letter}
2096	\|\p{Uppercase_Letter}
2097	\|\p{Titlecase_Letter}
2098	\|\p{Modifier_Letter}
2099	\|\p{Other_Letter}
2100	"),
2101	hir_uclass_query(ClassQuery::Binary("letter"))
2102	);
2103	// Byte classes that can truly match invalid UTF-8 cannot be combined
2104	// with Unicode classes.
2105	assert_eq!(
2106	t_bytes(r"[Δδ]\|(?-u:[\x90-\xFF])\|[Λλ]"),
2107	hir_alt(vec![
2108	hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
2109	hir_bclass(&[(b'`\x90`', b'`\xFF`')]),
2110	hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
2111	])
2112	);
2113	// Byte classes on their own can be combined, even if some are ASCII
2114	// and others are invalid UTF-8.
2115	assert_eq!(
2116	t_bytes(r"[a-z]\|(?-u:[\x90-\xFF])\|[A-Z]"),
2117	hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'`\x90`', b'`\xFF`')]),
2118	);
2119	}
2120
2121	#[test]
2122	fn class_ascii() {
2123	assert_eq!(
2124	t("[[:alnum:]]"),
2125	hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
2126	);
2127	assert_eq!(
2128	t("[[:alpha:]]"),
2129	hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
2130	);
2131	assert_eq!(
2132	t("[[:ascii:]]"),
2133	hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
2134	);
2135	assert_eq!(
2136	t("[[:blank:]]"),
2137	hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
2138	);
2139	assert_eq!(
2140	t("[[:cntrl:]]"),
2141	hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
2142	);
2143	assert_eq!(
2144	t("[[:digit:]]"),
2145	hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
2146	);
2147	assert_eq!(
2148	t("[[:graph:]]"),
2149	hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
2150	);
2151	assert_eq!(
2152	t("[[:lower:]]"),
2153	hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
2154	);
2155	assert_eq!(
2156	t("[[:print:]]"),
2157	hir_ascii_uclass(&ast::ClassAsciiKind::Print)
2158	);
2159	assert_eq!(
2160	t("[[:punct:]]"),
2161	hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
2162	);
2163	assert_eq!(
2164	t("[[:space:]]"),
2165	hir_ascii_uclass(&ast::ClassAsciiKind::Space)
2166	);
2167	assert_eq!(
2168	t("[[:upper:]]"),
2169	hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
2170	);
2171	assert_eq!(
2172	t("[[:word:]]"),
2173	hir_ascii_uclass(&ast::ClassAsciiKind::Word)
2174	);
2175	assert_eq!(
2176	t("[[:xdigit:]]"),
2177	hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
2178	);
2179
2180	assert_eq!(
2181	t("[[:^lower:]]"),
2182	hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
2183	);
2184	#[cfg(feature = "unicode-case")]
2185	assert_eq!(
2186	t("(?i)[[:lower:]]"),
2187	hir_uclass(&[
2188	('A', 'Z'),
2189	('a', 'z'),
2190	('`\u{17F}`', '`\u{17F}`'),
2191	('`\u{212A}`', '`\u{212A}`'),
2192	])
2193	);
2194
2195	assert_eq!(
2196	t("(?-u)[[:lower:]]"),
2197	hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
2198	);
2199	assert_eq!(
2200	t("(?i-u)[[:lower:]]"),
2201	hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
2202	);
2203
2204	assert_eq!(
2205	t_err("(?-u)[[:^lower:]]"),
2206	TestError {
2207	kind: hir::ErrorKind::InvalidUtf8,
2208	span: Span::new(
2209	Position::new(`6`, `1`, `7`),
2210	Position::new(`16`, `1`, `17`)
2211	),
2212	}
2213	);
2214	assert_eq!(
2215	t_err("(?i-u)[[:^lower:]]"),
2216	TestError {
2217	kind: hir::ErrorKind::InvalidUtf8,
2218	span: Span::new(
2219	Position::new(`7`, `1`, `8`),
2220	Position::new(`17`, `1`, `18`)
2221	),
2222	}
2223	);
2224	}
2225
2226	#[test]
2227	fn class_ascii_multiple() {
2228	// See: https://github.com/rust-lang/regex/issues/680
2229	assert_eq!(
2230	t("[[:alnum:][:^ascii:]]"),
2231	hir_union(
2232	hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
2233	hir_uclass(&[('`\u{80}`', '`\u{10FFFF}`')]),
2234	),
2235	);
2236	assert_eq!(
2237	t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
2238	hir_union(
2239	hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
2240	hir_bclass(&[(`0x80`, `0xFF`)]),
2241	),
2242	);
2243	}
2244
2245	#[test]
2246	#[cfg(feature = "unicode-perl")]
2247	fn class_perl_unicode() {
2248	// Unicode
2249	assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
2250	assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
2251	assert_eq!(t(r"\w"), hir_uclass_perl_word());
2252	#[cfg(feature = "unicode-case")]
2253	assert_eq!(
2254	t(r"(?i)\d"),
2255	hir_uclass_query(ClassQuery::Binary("digit"))
2256	);
2257	#[cfg(feature = "unicode-case")]
2258	assert_eq!(
2259	t(r"(?i)\s"),
2260	hir_uclass_query(ClassQuery::Binary("space"))
2261	);
2262	#[cfg(feature = "unicode-case")]
2263	assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2264
2265	// Unicode, negated
2266	assert_eq!(
2267	t(r"\D"),
2268	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2269	);
2270	assert_eq!(
2271	t(r"\S"),
2272	hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2273	);
2274	assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2275	#[cfg(feature = "unicode-case")]
2276	assert_eq!(
2277	t(r"(?i)\D"),
2278	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2279	);
2280	#[cfg(feature = "unicode-case")]
2281	assert_eq!(
2282	t(r"(?i)\S"),
2283	hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2284	);
2285	#[cfg(feature = "unicode-case")]
2286	assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2287	}
2288
2289	#[test]
2290	fn class_perl_ascii() {
2291	// ASCII only
2292	assert_eq!(
2293	t(r"(?-u)\d"),
2294	hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2295	);
2296	assert_eq!(
2297	t(r"(?-u)\s"),
2298	hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2299	);
2300	assert_eq!(
2301	t(r"(?-u)\w"),
2302	hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2303	);
2304	assert_eq!(
2305	t(r"(?i-u)\d"),
2306	hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2307	);
2308	assert_eq!(
2309	t(r"(?i-u)\s"),
2310	hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2311	);
2312	assert_eq!(
2313	t(r"(?i-u)\w"),
2314	hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2315	);
2316
2317	// ASCII only, negated
2318	assert_eq!(
2319	t_bytes(r"(?-u)\D"),
2320	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2321	);
2322	assert_eq!(
2323	t_bytes(r"(?-u)\S"),
2324	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2325	);
2326	assert_eq!(
2327	t_bytes(r"(?-u)\W"),
2328	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2329	);
2330	assert_eq!(
2331	t_bytes(r"(?i-u)\D"),
2332	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2333	);
2334	assert_eq!(
2335	t_bytes(r"(?i-u)\S"),
2336	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2337	);
2338	assert_eq!(
2339	t_bytes(r"(?i-u)\W"),
2340	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2341	);
2342
2343	// ASCII only, negated, with UTF-8 mode enabled.
2344	// In this case, negating any Perl class results in an error because
2345	// all such classes can match invalid UTF-8.
2346	assert_eq!(
2347	t_err(r"(?-u)\D"),
2348	TestError {
2349	kind: hir::ErrorKind::InvalidUtf8,
2350	span: Span::new(
2351	Position::new(`5`, `1`, `6`),
2352	Position::new(`7`, `1`, `8`),
2353	),
2354	},
2355	);
2356	assert_eq!(
2357	t_err(r"(?-u)\S"),
2358	TestError {
2359	kind: hir::ErrorKind::InvalidUtf8,
2360	span: Span::new(
2361	Position::new(`5`, `1`, `6`),
2362	Position::new(`7`, `1`, `8`),
2363	),
2364	},
2365	);
2366	assert_eq!(
2367	t_err(r"(?-u)\W"),
2368	TestError {
2369	kind: hir::ErrorKind::InvalidUtf8,
2370	span: Span::new(
2371	Position::new(`5`, `1`, `6`),
2372	Position::new(`7`, `1`, `8`),
2373	),
2374	},
2375	);
2376	assert_eq!(
2377	t_err(r"(?i-u)\D"),
2378	TestError {
2379	kind: hir::ErrorKind::InvalidUtf8,
2380	span: Span::new(
2381	Position::new(`6`, `1`, `7`),
2382	Position::new(`8`, `1`, `9`),
2383	),
2384	},
2385	);
2386	assert_eq!(
2387	t_err(r"(?i-u)\S"),
2388	TestError {
2389	kind: hir::ErrorKind::InvalidUtf8,
2390	span: Span::new(
2391	Position::new(`6`, `1`, `7`),
2392	Position::new(`8`, `1`, `9`),
2393	),
2394	},
2395	);
2396	assert_eq!(
2397	t_err(r"(?i-u)\W"),
2398	TestError {
2399	kind: hir::ErrorKind::InvalidUtf8,
2400	span: Span::new(
2401	Position::new(`6`, `1`, `7`),
2402	Position::new(`8`, `1`, `9`),
2403	),
2404	},
2405	);
2406	}
2407
2408	#[test]
2409	#[cfg(not(feature = "unicode-perl"))]
2410	fn class_perl_word_disabled() {
2411	assert_eq!(
2412	t_err(r"\w"),
2413	TestError {
2414	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2415	span: Span::new(
2416	Position::new(`0`, `1`, `1`),
2417	Position::new(`2`, `1`, `3`)
2418	),
2419	}
2420	);
2421	}
2422
2423	#[test]
2424	#[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2425	fn class_perl_space_disabled() {
2426	assert_eq!(
2427	t_err(r"\s"),
2428	TestError {
2429	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2430	span: Span::new(
2431	Position::new(`0`, `1`, `1`),
2432	Position::new(`2`, `1`, `3`)
2433	),
2434	}
2435	);
2436	}
2437
2438	#[test]
2439	#[cfg(all(
2440	not(feature = "unicode-perl"),
2441	not(feature = "unicode-gencat")
2442	))]
2443	fn class_perl_digit_disabled() {
2444	assert_eq!(
2445	t_err(r"\d"),
2446	TestError {
2447	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2448	span: Span::new(
2449	Position::new(`0`, `1`, `1`),
2450	Position::new(`2`, `1`, `3`)
2451	),
2452	}
2453	);
2454	}
2455
2456	#[test]
2457	#[cfg(feature = "unicode-gencat")]
2458	fn class_unicode_gencat() {
2459	assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2460	assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2461	assert_eq!(
2462	t(r"\p{Separator}"),
2463	hir_uclass_query(ClassQuery::Binary("Z"))
2464	);
2465	assert_eq!(
2466	t(r"\p{se PaRa ToR}"),
2467	hir_uclass_query(ClassQuery::Binary("Z"))
2468	);
2469	assert_eq!(
2470	t(r"\p{gc:Separator}"),
2471	hir_uclass_query(ClassQuery::Binary("Z"))
2472	);
2473	assert_eq!(
2474	t(r"\p{gc=Separator}"),
2475	hir_uclass_query(ClassQuery::Binary("Z"))
2476	);
2477	assert_eq!(
2478	t(r"\p{Other}"),
2479	hir_uclass_query(ClassQuery::Binary("Other"))
2480	);
2481	assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2482
2483	assert_eq!(
2484	t(r"\PZ"),
2485	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2486	);
2487	assert_eq!(
2488	t(r"\P{separator}"),
2489	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2490	);
2491	assert_eq!(
2492	t(r"\P{gc!=separator}"),
2493	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2494	);
2495
2496	assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2497	assert_eq!(
2498	t(r"\p{assigned}"),
2499	hir_uclass_query(ClassQuery::Binary("Assigned"))
2500	);
2501	assert_eq!(
2502	t(r"\p{ascii}"),
2503	hir_uclass_query(ClassQuery::Binary("ASCII"))
2504	);
2505	assert_eq!(
2506	t(r"\p{gc:any}"),
2507	hir_uclass_query(ClassQuery::Binary("Any"))
2508	);
2509	assert_eq!(
2510	t(r"\p{gc:assigned}"),
2511	hir_uclass_query(ClassQuery::Binary("Assigned"))
2512	);
2513	assert_eq!(
2514	t(r"\p{gc:ascii}"),
2515	hir_uclass_query(ClassQuery::Binary("ASCII"))
2516	);
2517
2518	assert_eq!(
2519	t_err(r"(?-u)\pZ"),
2520	TestError {
2521	kind: hir::ErrorKind::UnicodeNotAllowed,
2522	span: Span::new(
2523	Position::new(`5`, `1`, `6`),
2524	Position::new(`8`, `1`, `9`)
2525	),
2526	}
2527	);
2528	assert_eq!(
2529	t_err(r"(?-u)\p{Separator}"),
2530	TestError {
2531	kind: hir::ErrorKind::UnicodeNotAllowed,
2532	span: Span::new(
2533	Position::new(`5`, `1`, `6`),
2534	Position::new(`18`, `1`, `19`)
2535	),
2536	}
2537	);
2538	assert_eq!(
2539	t_err(r"\pE"),
2540	TestError {
2541	kind: hir::ErrorKind::UnicodePropertyNotFound,
2542	span: Span::new(
2543	Position::new(`0`, `1`, `1`),
2544	Position::new(`3`, `1`, `4`)
2545	),
2546	}
2547	);
2548	assert_eq!(
2549	t_err(r"\p{Foo}"),
2550	TestError {
2551	kind: hir::ErrorKind::UnicodePropertyNotFound,
2552	span: Span::new(
2553	Position::new(`0`, `1`, `1`),
2554	Position::new(`7`, `1`, `8`)
2555	),
2556	}
2557	);
2558	assert_eq!(
2559	t_err(r"\p{gc:Foo}"),
2560	TestError {
2561	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2562	span: Span::new(
2563	Position::new(`0`, `1`, `1`),
2564	Position::new(`10`, `1`, `11`)
2565	),
2566	}
2567	);
2568	}
2569
2570	#[test]
2571	#[cfg(not(feature = "unicode-gencat"))]
2572	fn class_unicode_gencat_disabled() {
2573	assert_eq!(
2574	t_err(r"\p{Separator}"),
2575	TestError {
2576	kind: hir::ErrorKind::UnicodePropertyNotFound,
2577	span: Span::new(
2578	Position::new(`0`, `1`, `1`),
2579	Position::new(`13`, `1`, `14`)
2580	),
2581	}
2582	);
2583
2584	assert_eq!(
2585	t_err(r"\p{Any}"),
2586	TestError {
2587	kind: hir::ErrorKind::UnicodePropertyNotFound,
2588	span: Span::new(
2589	Position::new(`0`, `1`, `1`),
2590	Position::new(`7`, `1`, `8`)
2591	),
2592	}
2593	);
2594	}
2595
2596	#[test]
2597	#[cfg(feature = "unicode-script")]
2598	fn class_unicode_script() {
2599	assert_eq!(
2600	t(r"\p{Greek}"),
2601	hir_uclass_query(ClassQuery::Binary("Greek"))
2602	);
2603	#[cfg(feature = "unicode-case")]
2604	assert_eq!(
2605	t(r"(?i)\p{Greek}"),
2606	hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2607	);
2608	#[cfg(feature = "unicode-case")]
2609	assert_eq!(
2610	t(r"(?i)\P{Greek}"),
2611	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2612	"Greek"
2613	))))
2614	);
2615
2616	assert_eq!(
2617	t_err(r"\p{sc:Foo}"),
2618	TestError {
2619	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2620	span: Span::new(
2621	Position::new(`0`, `1`, `1`),
2622	Position::new(`10`, `1`, `11`)
2623	),
2624	}
2625	);
2626	assert_eq!(
2627	t_err(r"\p{scx:Foo}"),
2628	TestError {
2629	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2630	span: Span::new(
2631	Position::new(`0`, `1`, `1`),
2632	Position::new(`11`, `1`, `12`)
2633	),
2634	}
2635	);
2636	}
2637
2638	#[test]
2639	#[cfg(not(feature = "unicode-script"))]
2640	fn class_unicode_script_disabled() {
2641	assert_eq!(
2642	t_err(r"\p{Greek}"),
2643	TestError {
2644	kind: hir::ErrorKind::UnicodePropertyNotFound,
2645	span: Span::new(
2646	Position::new(`0`, `1`, `1`),
2647	Position::new(`9`, `1`, `10`)
2648	),
2649	}
2650	);
2651
2652	assert_eq!(
2653	t_err(r"\p{scx:Greek}"),
2654	TestError {
2655	kind: hir::ErrorKind::UnicodePropertyNotFound,
2656	span: Span::new(
2657	Position::new(`0`, `1`, `1`),
2658	Position::new(`13`, `1`, `14`)
2659	),
2660	}
2661	);
2662	}
2663
2664	#[test]
2665	#[cfg(feature = "unicode-age")]
2666	fn class_unicode_age() {
2667	assert_eq!(
2668	t_err(r"\p{age:Foo}"),
2669	TestError {
2670	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2671	span: Span::new(
2672	Position::new(`0`, `1`, `1`),
2673	Position::new(`11`, `1`, `12`)
2674	),
2675	}
2676	);
2677	}
2678
2679	#[test]
2680	#[cfg(feature = "unicode-gencat")]
2681	fn class_unicode_any_empty() {
2682	assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
2683	}
2684
2685	#[test]
2686	#[cfg(not(feature = "unicode-age"))]
2687	fn class_unicode_age_disabled() {
2688	assert_eq!(
2689	t_err(r"\p{age:3.0}"),
2690	TestError {
2691	kind: hir::ErrorKind::UnicodePropertyNotFound,
2692	span: Span::new(
2693	Position::new(`0`, `1`, `1`),
2694	Position::new(`11`, `1`, `12`)
2695	),
2696	}
2697	);
2698	}
2699
2700	#[test]
2701	fn class_bracketed() {
2702	assert_eq!(t("[a]"), hir_lit("a"));
2703	assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
2704	assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
2705	assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2706	assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2707	assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2708	assert_eq!(t(r"[\x00]"), hir_uclass(&[('`\0`', '`\0`')]));
2709	assert_eq!(t(r"[\n]"), hir_uclass(&[('`\n`', '`\n`')]));
2710	assert_eq!(t("[`\n`]"), hir_uclass(&[('`\n`', '`\n`')]));
2711	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2712	assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2713	#[cfg(feature = "unicode-gencat")]
2714	assert_eq!(
2715	t(r"[\pZ]"),
2716	hir_uclass_query(ClassQuery::Binary("separator"))
2717	);
2718	#[cfg(feature = "unicode-gencat")]
2719	assert_eq!(
2720	t(r"[\p{separator}]"),
2721	hir_uclass_query(ClassQuery::Binary("separator"))
2722	);
2723	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2724	assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2725	#[cfg(feature = "unicode-gencat")]
2726	assert_eq!(
2727	t(r"[^\PZ]"),
2728	hir_uclass_query(ClassQuery::Binary("separator"))
2729	);
2730	#[cfg(feature = "unicode-gencat")]
2731	assert_eq!(
2732	t(r"[^\P{separator}]"),
2733	hir_uclass_query(ClassQuery::Binary("separator"))
2734	);
2735	#[cfg(all(
2736	feature = "unicode-case",
2737	any(feature = "unicode-perl", feature = "unicode-gencat")
2738	))]
2739	assert_eq!(
2740	t(r"(?i)[^\D]"),
2741	hir_uclass_query(ClassQuery::Binary("digit"))
2742	);
2743	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2744	assert_eq!(
2745	t(r"(?i)[^\P{greek}]"),
2746	hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2747	);
2748
2749	assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2750	assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'`\0`', b'`\0`')]));
2751	assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'`\xFF`', b'`\xFF`')]));
2752
2753	#[cfg(feature = "unicode-case")]
2754	assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2755	#[cfg(feature = "unicode-case")]
2756	assert_eq!(
2757	t("(?i)[k]"),
2758	hir_uclass(&[('K', 'K'), ('k', 'k'), ('`\u{212A}`', '`\u{212A}`'),])
2759	);
2760	#[cfg(feature = "unicode-case")]
2761	assert_eq!(
2762	t("(?i)[β]"),
2763	hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2764	);
2765	assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2766
2767	assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
2768	assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('`\0`', '`\0`')])));
2769	assert_eq!(
2770	t_bytes("(?-u)[^a]"),
2771	class_negate(bclass(&[(b'a', b'a')]))
2772	);
2773	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2774	assert_eq!(
2775	t(r"[^\d]"),
2776	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2777	);
2778	#[cfg(feature = "unicode-gencat")]
2779	assert_eq!(
2780	t(r"[^\pZ]"),
2781	hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2782	);
2783	#[cfg(feature = "unicode-gencat")]
2784	assert_eq!(
2785	t(r"[^\p{separator}]"),
2786	hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2787	);
2788	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2789	assert_eq!(
2790	t(r"(?i)[^\p{greek}]"),
2791	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2792	"greek"
2793	))))
2794	);
2795	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2796	assert_eq!(
2797	t(r"(?i)[\P{greek}]"),
2798	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2799	"greek"
2800	))))
2801	);
2802
2803	// Test some weird cases.
2804	assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2805
2806	assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2807	assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2808	assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2809	assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('`\0`', '&')]));
2810	assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '`\u{FF}`')]));
2811
2812	assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2813	assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2814	assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2815	assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('`\0`', '~')]));
2816	assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '`\u{FF}`')]));
2817
2818	assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2819	assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2820	assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2821	assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('`\0`', '-')]));
2822	assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '`\u{FF}`')]));
2823
2824	assert_eq!(
2825	t_err("(?-u)[^a]"),
2826	TestError {
2827	kind: hir::ErrorKind::InvalidUtf8,
2828	span: Span::new(
2829	Position::new(`5`, `1`, `6`),
2830	Position::new(`9`, `1`, `10`)
2831	),
2832	}
2833	);
2834	#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2835	assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
2836	#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2837	assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
2838	}
2839
2840	#[test]
2841	fn class_bracketed_union() {
2842	assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2843	#[cfg(feature = "unicode-gencat")]
2844	assert_eq!(
2845	t(r"[a\pZb]"),
2846	hir_union(
2847	hir_uclass(&[('a', 'b')]),
2848	hir_uclass_query(ClassQuery::Binary("separator"))
2849	)
2850	);
2851	#[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2852	assert_eq!(
2853	t(r"[\pZ\p{Greek}]"),
2854	hir_union(
2855	hir_uclass_query(ClassQuery::Binary("greek")),
2856	hir_uclass_query(ClassQuery::Binary("separator"))
2857	)
2858	);
2859	#[cfg(all(
2860	feature = "unicode-age",
2861	feature = "unicode-gencat",
2862	feature = "unicode-script"
2863	))]
2864	assert_eq!(
2865	t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2866	hir_union(
2867	hir_uclass_query(ClassQuery::ByValue {
2868	property_name: "age",
2869	property_value: "3.0",
2870	}),
2871	hir_union(
2872	hir_uclass_query(ClassQuery::Binary("greek")),
2873	hir_uclass_query(ClassQuery::Binary("separator"))
2874	)
2875	)
2876	);
2877	#[cfg(all(
2878	feature = "unicode-age",
2879	feature = "unicode-gencat",
2880	feature = "unicode-script"
2881	))]
2882	assert_eq!(
2883	t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2884	hir_union(
2885	hir_uclass_query(ClassQuery::ByValue {
2886	property_name: "age",
2887	property_value: "3.0",
2888	}),
2889	hir_union(
2890	hir_uclass_query(ClassQuery::Binary("cyrillic")),
2891	hir_union(
2892	hir_uclass_query(ClassQuery::Binary("greek")),
2893	hir_uclass_query(ClassQuery::Binary("separator"))
2894	)
2895	)
2896	)
2897	);
2898
2899	#[cfg(all(
2900	feature = "unicode-age",
2901	feature = "unicode-case",
2902	feature = "unicode-gencat",
2903	feature = "unicode-script"
2904	))]
2905	assert_eq!(
2906	t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2907	hir_case_fold(hir_union(
2908	hir_uclass_query(ClassQuery::ByValue {
2909	property_name: "age",
2910	property_value: "3.0",
2911	}),
2912	hir_union(
2913	hir_uclass_query(ClassQuery::Binary("greek")),
2914	hir_uclass_query(ClassQuery::Binary("separator"))
2915	)
2916	))
2917	);
2918	#[cfg(all(
2919	feature = "unicode-age",
2920	feature = "unicode-gencat",
2921	feature = "unicode-script"
2922	))]
2923	assert_eq!(
2924	t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2925	hir_negate(hir_union(
2926	hir_uclass_query(ClassQuery::ByValue {
2927	property_name: "age",
2928	property_value: "3.0",
2929	}),
2930	hir_union(
2931	hir_uclass_query(ClassQuery::Binary("greek")),
2932	hir_uclass_query(ClassQuery::Binary("separator"))
2933	)
2934	))
2935	);
2936	#[cfg(all(
2937	feature = "unicode-age",
2938	feature = "unicode-case",
2939	feature = "unicode-gencat",
2940	feature = "unicode-script"
2941	))]
2942	assert_eq!(
2943	t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2944	hir_negate(hir_case_fold(hir_union(
2945	hir_uclass_query(ClassQuery::ByValue {
2946	property_name: "age",
2947	property_value: "3.0",
2948	}),
2949	hir_union(
2950	hir_uclass_query(ClassQuery::Binary("greek")),
2951	hir_uclass_query(ClassQuery::Binary("separator"))
2952	)
2953	)))
2954	);
2955	}
2956
2957	#[test]
2958	fn class_bracketed_nested() {
2959	assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
2960	assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
2961	assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
2962
2963	assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2964	assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2965
2966	#[cfg(feature = "unicode-case")]
2967	assert_eq!(
2968	t(r"(?i)[a[^c]]"),
2969	hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2970	);
2971	#[cfg(feature = "unicode-case")]
2972	assert_eq!(
2973	t(r"(?i)[a-b[^c]]"),
2974	hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2975	);
2976
2977	#[cfg(feature = "unicode-case")]
2978	assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2979	#[cfg(feature = "unicode-case")]
2980	assert_eq!(
2981	t(r"(?i)[^a-b[^c]]"),
2982	hir_uclass(&[('C', 'C'), ('c', 'c')])
2983	);
2984
2985	assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
2986	#[cfg(feature = "unicode-case")]
2987	assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
2988	}
2989
2990	#[test]
2991	fn class_bracketed_intersect() {
2992	assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2993	assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2994	assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2995	assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2996	assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2997	assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2998	assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2999	assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
3000	assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3001
3002	assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
3003	assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3004	assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3005	assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
3006	assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
3007	assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
3008
3009	#[cfg(feature = "unicode-case")]
3010	assert_eq!(
3011	t("(?i)[abc&&b-c]"),
3012	hir_case_fold(hir_uclass(&[('b', 'c')]))
3013	);
3014	#[cfg(feature = "unicode-case")]
3015	assert_eq!(
3016	t("(?i)[abc&&[b-c]]"),
3017	hir_case_fold(hir_uclass(&[('b', 'c')]))
3018	);
3019	#[cfg(feature = "unicode-case")]
3020	assert_eq!(
3021	t("(?i)[[abc]&&[b-c]]"),
3022	hir_case_fold(hir_uclass(&[('b', 'c')]))
3023	);
3024	#[cfg(feature = "unicode-case")]
3025	assert_eq!(
3026	t("(?i)[a-z&&b-y&&c-x]"),
3027	hir_case_fold(hir_uclass(&[('c', 'x')]))
3028	);
3029	#[cfg(feature = "unicode-case")]
3030	assert_eq!(
3031	t("(?i)[c-da-b&&a-d]"),
3032	hir_case_fold(hir_uclass(&[('a', 'd')]))
3033	);
3034	#[cfg(feature = "unicode-case")]
3035	assert_eq!(
3036	t("(?i)[a-d&&c-da-b]"),
3037	hir_case_fold(hir_uclass(&[('a', 'd')]))
3038	);
3039
3040	assert_eq!(
3041	t("(?i-u)[abc&&b-c]"),
3042	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3043	);
3044	assert_eq!(
3045	t("(?i-u)[abc&&[b-c]]"),
3046	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3047	);
3048	assert_eq!(
3049	t("(?i-u)[[abc]&&[b-c]]"),
3050	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3051	);
3052	assert_eq!(
3053	t("(?i-u)[a-z&&b-y&&c-x]"),
3054	hir_case_fold(hir_bclass(&[(b'c', b'x')]))
3055	);
3056	assert_eq!(
3057	t("(?i-u)[c-da-b&&a-d]"),
3058	hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3059	);
3060	assert_eq!(
3061	t("(?i-u)[a-d&&c-da-b]"),
3062	hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3063	);
3064
3065	// In `[a^]`, `^` does not need to be escaped, so it makes sense that
3066	// `^` is also allowed to be unescaped after `&&`.
3067	assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
3068	// `]` needs to be escaped after `&&` since it's not at start of class.
3069	assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
3070	assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
3071	assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
3072	assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
3073	// Test precedence.
3074	assert_eq!(
3075	t(r"[a-w&&[^c-g]z]"),
3076	hir_uclass(&[('a', 'b'), ('h', 'w')])
3077	);
3078	}
3079
3080	#[test]
3081	fn class_bracketed_intersect_negate() {
3082	#[cfg(feature = "unicode-perl")]
3083	assert_eq!(
3084	t(r"[^\w&&\d]"),
3085	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3086	);
3087	assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3088	#[cfg(feature = "unicode-perl")]
3089	assert_eq!(
3090	t(r"[^[\w&&\d]]"),
3091	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3092	);
3093	#[cfg(feature = "unicode-perl")]
3094	assert_eq!(
3095	t(r"[^[^\w&&\d]]"),
3096	hir_uclass_query(ClassQuery::Binary("digit"))
3097	);
3098	#[cfg(feature = "unicode-perl")]
3099	assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
3100
3101	#[cfg(feature = "unicode-perl")]
3102	assert_eq!(
3103	t_bytes(r"(?-u)[^\w&&\d]"),
3104	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3105	);
3106	assert_eq!(
3107	t_bytes(r"(?-u)[^[a-z&&a-c]]"),
3108	hir_negate(hir_bclass(&[(b'a', b'c')]))
3109	);
3110	assert_eq!(
3111	t_bytes(r"(?-u)[^[\w&&\d]]"),
3112	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3113	);
3114	assert_eq!(
3115	t_bytes(r"(?-u)[^[^\w&&\d]]"),
3116	hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
3117	);
3118	assert_eq!(
3119	t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
3120	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
3121	);
3122	}
3123
3124	#[test]
3125	fn class_bracketed_difference() {
3126	#[cfg(feature = "unicode-gencat")]
3127	assert_eq!(
3128	t(r"[\pL--[:ascii:]]"),
3129	hir_difference(
3130	hir_uclass_query(ClassQuery::Binary("letter")),
3131	hir_uclass(&[('`\0`', '`\x7F`')])
3132	)
3133	);
3134
3135	assert_eq!(
3136	t(r"(?-u)[[:alpha:]--[:lower:]]"),
3137	hir_bclass(&[(b'A', b'Z')])
3138	);
3139	}
3140
3141	#[test]
3142	fn class_bracketed_symmetric_difference() {
3143	#[cfg(feature = "unicode-script")]
3144	assert_eq!(
3145	t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3146	// Class({
3147	// '·'..='·',
3148	// '\u{300}'..='\u{301}',
3149	// '\u{304}'..='\u{304}',
3150	// '\u{306}'..='\u{306}',
3151	// '\u{308}'..='\u{308}',
3152	// '\u{313}'..='\u{313}',
3153	// '\u{342}'..='\u{342}',
3154	// '\u{345}'..='\u{345}',
3155	// 'ʹ'..='ʹ',
3156	// '\u{1dc0}'..='\u{1dc1}',
3157	// '⁝'..='⁝',
3158	// })
3159	hir_uclass(&[
3160	('·', '·'),
3161	('`\u{0300}`', '`\u{0301}`'),
3162	('`\u{0304}`', '`\u{0304}`'),
3163	('`\u{0306}`', '`\u{0306}`'),
3164	('`\u{0308}`', '`\u{0308}`'),
3165	('`\u{0313}`', '`\u{0313}`'),
3166	('`\u{0342}`', '`\u{0342}`'),
3167	('`\u{0345}`', '`\u{0345}`'),
3168	('ʹ', 'ʹ'),
3169	('`\u{1DC0}`', '`\u{1DC1}`'),
3170	('⁝', '⁝'),
3171	])
3172	);
3173	assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
3174
3175	assert_eq!(
3176	t(r"(?-u)[a-g~~c-j]"),
3177	hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
3178	);
3179	}
3180
3181	#[test]
3182	fn ignore_whitespace() {
3183	assert_eq!(t(r"(?x)\12 3"), hir_lit("`\n`3"));
3184	assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
3185	assert_eq!(
3186	t(r"(?x)\x # comment
3187	{ # comment
3188	53 # comment
3189	} #comment"),
3190	hir_lit("S")
3191	);
3192
3193	assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
3194	assert_eq!(
3195	t(r"(?x)\x # comment
3196	53 # comment"),
3197	hir_lit("S")
3198	);
3199	assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
3200
3201	#[cfg(feature = "unicode-gencat")]
3202	assert_eq!(
3203	t(r"(?x)\p # comment
3204	{ # comment
3205	Separator # comment
3206	} # comment"),
3207	hir_uclass_query(ClassQuery::Binary("separator"))
3208	);
3209
3210	assert_eq!(
3211	t(r"(?x)a # comment
3212	{ # comment
3213	5 # comment
3214	, # comment
3215	10 # comment
3216	} # comment"),
3217	hir_range(`true`, `5`, Some(`10`), hir_lit("a"))
3218	);
3219
3220	assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
3221	}
3222
3223	#[test]
3224	fn analysis_is_utf8() {
3225	// Positive examples.
3226	assert!(props_bytes(r"a").is_utf8());
3227	assert!(props_bytes(r"ab").is_utf8());
3228	assert!(props_bytes(r"(?-u)a").is_utf8());
3229	assert!(props_bytes(r"(?-u)ab").is_utf8());
3230	assert!(props_bytes(r"\xFF").is_utf8());
3231	assert!(props_bytes(r"\xFF\xFF").is_utf8());
3232	assert!(props_bytes(r"[^a]").is_utf8());
3233	assert!(props_bytes(r"[^a][^a]").is_utf8());
3234	assert!(props_bytes(r"\b").is_utf8());
3235	assert!(props_bytes(r"\B").is_utf8());
3236	assert!(props_bytes(r"(?-u)\b").is_utf8());
3237	assert!(props_bytes(r"(?-u)\B").is_utf8());
3238
3239	// Negative examples.
3240	assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
3241	assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
3242	assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
3243	assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
3244	}
3245
3246	#[test]
3247	fn analysis_captures_len() {
3248	assert_eq!(`0`, props(r"a").explicit_captures_len());
3249	assert_eq!(`0`, props(r"(?:a)").explicit_captures_len());
3250	assert_eq!(`0`, props(r"(?i-u:a)").explicit_captures_len());
3251	assert_eq!(`0`, props(r"(?i-u)a").explicit_captures_len());
3252	assert_eq!(`1`, props(r"(a)").explicit_captures_len());
3253	assert_eq!(`1`, props(r"(?P<foo>a)").explicit_captures_len());
3254	assert_eq!(`1`, props(r"()").explicit_captures_len());
3255	assert_eq!(`1`, props(r"()a").explicit_captures_len());
3256	assert_eq!(`1`, props(r"(a)+").explicit_captures_len());
3257	assert_eq!(`2`, props(r"(a)(b)").explicit_captures_len());
3258	assert_eq!(`2`, props(r"(a)\|(b)").explicit_captures_len());
3259	assert_eq!(`2`, props(r"((a))").explicit_captures_len());
3260	assert_eq!(`1`, props(r"([a&&b])").explicit_captures_len());
3261	}
3262
3263	#[test]
3264	fn analysis_static_captures_len() {
3265	let len = \|pattern\| props(pattern).static_explicit_captures_len();
3266	assert_eq!(Some(`0`), len(r""));
3267	assert_eq!(Some(`0`), len(r"foo\|bar"));
3268	assert_eq!(None, len(r"(foo)\|bar"));
3269	assert_eq!(None, len(r"foo\|(bar)"));
3270	assert_eq!(Some(`1`), len(r"(foo\|bar)"));
3271	assert_eq!(Some(`1`), len(r"(a\|b\|c\|d\|e\|f)"));
3272	assert_eq!(Some(`1`), len(r"(a)\|(b)\|(c)\|(d)\|(e)\|(f)"));
3273	assert_eq!(Some(`2`), len(r"(a)(b)\|(c)(d)\|(e)(f)"));
3274	assert_eq!(Some(`6`), len(r"(a)(b)(c)(d)(e)(f)"));
3275	assert_eq!(Some(`3`), len(r"(a)(b)(extra)\|(a)(b)()"));
3276	assert_eq!(Some(`3`), len(r"(a)(b)((?:extra)?)"));
3277	assert_eq!(None, len(r"(a)(b)(extra)?"));
3278	assert_eq!(Some(`1`), len(r"(foo)\|(bar)"));
3279	assert_eq!(Some(`2`), len(r"(foo)(bar)"));
3280	assert_eq!(Some(`2`), len(r"(foo)+(bar)"));
3281	assert_eq!(None, len(r"(foo)*(bar)"));
3282	assert_eq!(Some(`0`), len(r"(foo)?{0}"));
3283	assert_eq!(None, len(r"(foo)?{1}"));
3284	assert_eq!(Some(`1`), len(r"(foo){1}"));
3285	assert_eq!(Some(`1`), len(r"(foo){1,}"));
3286	assert_eq!(Some(`1`), len(r"(foo){1,}?"));
3287	assert_eq!(None, len(r"(foo){1,}??"));
3288	assert_eq!(None, len(r"(foo){0,}"));
3289	assert_eq!(Some(`1`), len(r"(foo)(?:bar)"));
3290	assert_eq!(Some(`2`), len(r"(foo(?:bar)+)(?:baz(boo))"));
3291	assert_eq!(Some(`2`), len(r"(?P<bar>foo)(?:bar)(bal\|loon)"));
3292	assert_eq!(
3293	Some(`2`),
3294	len(r#"<(a)[^>]+href="([^"]+)"\|<(img)[^>]+src="([^"]+)""#)
3295	);
3296	}
3297
3298	#[test]
3299	fn analysis_is_all_assertions() {
3300	// Positive examples.
3301	let p = props(r"\b");
3302	assert!(!p.look_set().is_empty());
3303	assert_eq!(p.minimum_len(), Some(`0`));
3304
3305	let p = props(r"\B");
3306	assert!(!p.look_set().is_empty());
3307	assert_eq!(p.minimum_len(), Some(`0`));
3308
3309	let p = props(r"^");
3310	assert!(!p.look_set().is_empty());
3311	assert_eq!(p.minimum_len(), Some(`0`));
3312
3313	let p = props(r"$");
3314	assert!(!p.look_set().is_empty());
3315	assert_eq!(p.minimum_len(), Some(`0`));
3316
3317	let p = props(r"\A");
3318	assert!(!p.look_set().is_empty());
3319	assert_eq!(p.minimum_len(), Some(`0`));
3320
3321	let p = props(r"\z");
3322	assert!(!p.look_set().is_empty());
3323	assert_eq!(p.minimum_len(), Some(`0`));
3324
3325	let p = props(r"$^\z\A\b\B");
3326	assert!(!p.look_set().is_empty());
3327	assert_eq!(p.minimum_len(), Some(`0`));
3328
3329	let p = props(r"$\|^\|\z\|\A\|\b\|\B");
3330	assert!(!p.look_set().is_empty());
3331	assert_eq!(p.minimum_len(), Some(`0`));
3332
3333	let p = props(r"^$\|$^");
3334	assert!(!p.look_set().is_empty());
3335	assert_eq!(p.minimum_len(), Some(`0`));
3336
3337	let p = props(r"((\b)+())*^");
3338	assert!(!p.look_set().is_empty());
3339	assert_eq!(p.minimum_len(), Some(`0`));
3340
3341	// Negative examples.
3342	let p = props(r"^a");
3343	assert!(!p.look_set().is_empty());
3344	assert_eq!(p.minimum_len(), Some(`1`));
3345	}
3346
3347	#[test]
3348	fn analysis_look_set_prefix_any() {
3349	let p = props(r"(?-u)(?i:(?:\b\|_)win(?:32\|64\|dows)?(?:\b\|_))");
3350	assert!(p.look_set_prefix_any().contains(Look::WordAscii));
3351	}
3352
3353	#[test]
3354	fn analysis_is_anchored() {
3355	let is_start = \|p\| props(p).look_set_prefix().contains(Look::Start);
3356	let is_end = \|p\| props(p).look_set_suffix().contains(Look::End);
3357
3358	// Positive examples.
3359	assert!(is_start(r"^"));
3360	assert!(is_end(r"$"));
3361
3362	assert!(is_start(r"^^"));
3363	assert!(props(r"$$").look_set_suffix().contains(Look::End));
3364
3365	assert!(is_start(r"^$"));
3366	assert!(is_end(r"^$"));
3367
3368	assert!(is_start(r"^foo"));
3369	assert!(is_end(r"foo$"));
3370
3371	assert!(is_start(r"^foo\|^bar"));
3372	assert!(is_end(r"foo$\|bar$"));
3373
3374	assert!(is_start(r"^(foo\|bar)"));
3375	assert!(is_end(r"(foo\|bar)$"));
3376
3377	assert!(is_start(r"^+"));
3378	assert!(is_end(r"$+"));
3379	assert!(is_start(r"^++"));
3380	assert!(is_end(r"$++"));
3381	assert!(is_start(r"(^)+"));
3382	assert!(is_end(r"($)+"));
3383
3384	assert!(is_start(r"$^"));
3385	assert!(is_start(r"$^"));
3386	assert!(is_start(r"$^\|^$"));
3387	assert!(is_end(r"$^\|^$"));
3388
3389	assert!(is_start(r"\b^"));
3390	assert!(is_end(r"$\b"));
3391	assert!(is_start(r"^(?m:^)"));
3392	assert!(is_end(r"(?m:$)$"));
3393	assert!(is_start(r"(?m:^)^"));
3394	assert!(is_end(r"$(?m:$)"));
3395
3396	// Negative examples.
3397	assert!(!is_start(r"(?m)^"));
3398	assert!(!is_end(r"(?m)$"));
3399	assert!(!is_start(r"(?m:^$)\|$^"));
3400	assert!(!is_end(r"(?m:^$)\|$^"));
3401	assert!(!is_start(r"$^\|(?m:^$)"));
3402	assert!(!is_end(r"$^\|(?m:^$)"));
3403
3404	assert!(!is_start(r"a^"));
3405	assert!(!is_start(r"$a"));
3406
3407	assert!(!is_end(r"a^"));
3408	assert!(!is_end(r"$a"));
3409
3410	assert!(!is_start(r"^foo\|bar"));
3411	assert!(!is_end(r"foo\|bar$"));
3412
3413	assert!(!is_start(r"^*"));
3414	assert!(!is_end(r"$*"));
3415	assert!(!is_start(r"^*+"));
3416	assert!(!is_end(r"$*+"));
3417	assert!(!is_start(r"^+*"));
3418	assert!(!is_end(r"$+*"));
3419	assert!(!is_start(r"(^)*"));
3420	assert!(!is_end(r"($)*"));
3421	}
3422
3423	#[test]
3424	fn analysis_is_any_anchored() {
3425	let is_start = \|p\| props(p).look_set().contains(Look::Start);
3426	let is_end = \|p\| props(p).look_set().contains(Look::End);
3427
3428	// Positive examples.
3429	assert!(is_start(r"^"));
3430	assert!(is_end(r"$"));
3431	assert!(is_start(r"\A"));
3432	assert!(is_end(r"\z"));
3433
3434	// Negative examples.
3435	assert!(!is_start(r"(?m)^"));
3436	assert!(!is_end(r"(?m)$"));
3437	assert!(!is_start(r"$"));
3438	assert!(!is_end(r"^"));
3439	}
3440
3441	#[test]
3442	fn analysis_can_empty() {
3443	// Positive examples.
3444	let assert_empty =
3445	\|p\| assert_eq!(Some(`0`), props_bytes(p).minimum_len());
3446	assert_empty(r"");
3447	assert_empty(r"()");
3448	assert_empty(r"()*");
3449	assert_empty(r"()+");
3450	assert_empty(r"()?");
3451	assert_empty(r"a*");
3452	assert_empty(r"a?");
3453	assert_empty(r"a{0}");
3454	assert_empty(r"a{0,}");
3455	assert_empty(r"a{0,1}");
3456	assert_empty(r"a{0,10}");
3457	#[cfg(feature = "unicode-gencat")]
3458	assert_empty(r"\pL*");
3459	assert_empty(r"a*\|b");
3460	assert_empty(r"b\|a*");
3461	assert_empty(r"a\|");
3462	assert_empty(r"\|a");
3463	assert_empty(r"a\|\|b");
3464	assert_empty(r"aa?(abcd)");
3465	assert_empty(r"^");
3466	assert_empty(r"$");
3467	assert_empty(r"(?m)^");
3468	assert_empty(r"(?m)$");
3469	assert_empty(r"\A");
3470	assert_empty(r"\z");
3471	assert_empty(r"\B");
3472	assert_empty(r"(?-u)\B");
3473	assert_empty(r"\b");
3474	assert_empty(r"(?-u)\b");
3475
3476	// Negative examples.
3477	let assert_non_empty =
3478	\|p\| assert_ne!(Some(`0`), props_bytes(p).minimum_len());
3479	assert_non_empty(r"a+");
3480	assert_non_empty(r"a{1}");
3481	assert_non_empty(r"a{1,}");
3482	assert_non_empty(r"a{1,2}");
3483	assert_non_empty(r"a{1,10}");
3484	assert_non_empty(r"b\|a");
3485	assert_non_empty(r"aa+(abcd)");
3486	#[cfg(feature = "unicode-gencat")]
3487	assert_non_empty(r"\P{any}");
3488	assert_non_empty(r"[a--a]");
3489	assert_non_empty(r"[a&&b]");
3490	}
3491
3492	#[test]
3493	fn analysis_is_literal() {
3494	// Positive examples.
3495	assert!(props(r"a").is_literal());
3496	assert!(props(r"ab").is_literal());
3497	assert!(props(r"abc").is_literal());
3498	assert!(props(r"(?m)abc").is_literal());
3499	assert!(props(r"(?:a)").is_literal());
3500	assert!(props(r"foo(?:a)").is_literal());
3501	assert!(props(r"(?:a)foo").is_literal());
3502	assert!(props(r"[a]").is_literal());
3503
3504	// Negative examples.
3505	assert!(!props(r"").is_literal());
3506	assert!(!props(r"^").is_literal());
3507	assert!(!props(r"a\|b").is_literal());
3508	assert!(!props(r"(a)").is_literal());
3509	assert!(!props(r"a+").is_literal());
3510	assert!(!props(r"foo(a)").is_literal());
3511	assert!(!props(r"(a)foo").is_literal());
3512	assert!(!props(r"[ab]").is_literal());
3513	}
3514
3515	#[test]
3516	fn analysis_is_alternation_literal() {
3517	// Positive examples.
3518	assert!(props(r"a").is_alternation_literal());
3519	assert!(props(r"ab").is_alternation_literal());
3520	assert!(props(r"abc").is_alternation_literal());
3521	assert!(props(r"(?m)abc").is_alternation_literal());
3522	assert!(props(r"foo\|bar").is_alternation_literal());
3523	assert!(props(r"foo\|bar\|baz").is_alternation_literal());
3524	assert!(props(r"[a]").is_alternation_literal());
3525	assert!(props(r"(?:ab)\|cd").is_alternation_literal());
3526	assert!(props(r"ab\|(?:cd)").is_alternation_literal());
3527
3528	// Negative examples.
3529	assert!(!props(r"").is_alternation_literal());
3530	assert!(!props(r"^").is_alternation_literal());
3531	assert!(!props(r"(a)").is_alternation_literal());
3532	assert!(!props(r"a+").is_alternation_literal());
3533	assert!(!props(r"foo(a)").is_alternation_literal());
3534	assert!(!props(r"(a)foo").is_alternation_literal());
3535	assert!(!props(r"[ab]").is_alternation_literal());
3536	assert!(!props(r"[ab]\|b").is_alternation_literal());
3537	assert!(!props(r"a\|[ab]").is_alternation_literal());
3538	assert!(!props(r"(a)\|b").is_alternation_literal());
3539	assert!(!props(r"a\|(b)").is_alternation_literal());
3540	assert!(!props(r"a\|b").is_alternation_literal());
3541	assert!(!props(r"a\|b\|c").is_alternation_literal());
3542	assert!(!props(r"[a]\|b").is_alternation_literal());
3543	assert!(!props(r"a\|[b]").is_alternation_literal());
3544	assert!(!props(r"(?:a)\|b").is_alternation_literal());
3545	assert!(!props(r"a\|(?:b)").is_alternation_literal());
3546	assert!(!props(r"(?:z\|xx)@\|xx").is_alternation_literal());
3547	}
3548
3549	// This tests that the smart Hir::repetition constructors does some basic
3550	// simplifications.
3551	#[test]
3552	fn smart_repetition() {
3553	assert_eq!(t(r"a{0}"), Hir::empty());
3554	assert_eq!(t(r"a{1}"), hir_lit("a"));
3555	assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
3556	}
3557
3558	// This tests that the smart Hir::concat constructor simplifies the given
3559	// exprs in a way we expect.
3560	#[test]
3561	fn smart_concat() {
3562	assert_eq!(t(""), Hir::empty());
3563	assert_eq!(t("(?:)"), Hir::empty());
3564	assert_eq!(t("abc"), hir_lit("abc"));
3565	assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
3566	assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
3567	assert_eq!(
3568	t("foo(?:bar^baz)quux"),
3569	hir_cat(vec![
3570	hir_lit("foobar"),
3571	hir_look(hir::Look::Start),
3572	hir_lit("bazquux"),
3573	])
3574	);
3575	assert_eq!(
3576	t("foo(?:ba(?:r^b)az)quux"),
3577	hir_cat(vec![
3578	hir_lit("foobar"),
3579	hir_look(hir::Look::Start),
3580	hir_lit("bazquux"),
3581	])
3582	);
3583	}
3584
3585	// This tests that the smart Hir::alternation constructor simplifies the
3586	// given exprs in a way we expect.
3587	#[test]
3588	fn smart_alternation() {
3589	assert_eq!(
3590	t("(?:foo)\|(?:bar)"),
3591	hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
3592	);
3593	assert_eq!(
3594	t("quux\|(?:abc\|def\|xyz)\|baz"),
3595	hir_alt(vec![
3596	hir_lit("quux"),
3597	hir_lit("abc"),
3598	hir_lit("def"),
3599	hir_lit("xyz"),
3600	hir_lit("baz"),
3601	])
3602	);
3603	assert_eq!(
3604	t("quux\|(?:abc\|(?:def\|mno)\|xyz)\|baz"),
3605	hir_alt(vec![
3606	hir_lit("quux"),
3607	hir_lit("abc"),
3608	hir_lit("def"),
3609	hir_lit("mno"),
3610	hir_lit("xyz"),
3611	hir_lit("baz"),
3612	])
3613	);
3614	assert_eq!(
3615	t("a\|b\|c\|d\|e\|f\|x\|y\|z"),
3616	hir_uclass(&[('a', 'f'), ('x', 'z')]),
3617	);
3618	// Tests that we lift common prefixes out of an alternation.
3619	assert_eq!(
3620	t("[A-Z]foo\|[A-Z]quux"),
3621	hir_cat(vec![
3622	hir_uclass(&[('A', 'Z')]),
3623	hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
3624	]),
3625	);
3626	assert_eq!(
3627	t("[A-Z][A-Z]\|[A-Z]quux"),
3628	hir_cat(vec![
3629	hir_uclass(&[('A', 'Z')]),
3630	hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
3631	]),
3632	);
3633	assert_eq!(
3634	t("[A-Z][A-Z]\|[A-Z][A-Z]quux"),
3635	hir_cat(vec![
3636	hir_uclass(&[('A', 'Z')]),
3637	hir_uclass(&[('A', 'Z')]),
3638	hir_alt(vec![Hir::empty(), hir_lit("quux")]),
3639	]),
3640	);
3641	assert_eq!(
3642	t("[A-Z]foo\|[A-Z]foobar"),
3643	hir_cat(vec![
3644	hir_uclass(&[('A', 'Z')]),
3645	hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
3646	]),
3647	);
3648	}
3649
3650	#[test]
3651	fn regression_alt_empty_concat() {
3652	use crate::ast::{self, Ast};
3653
3654	let span = Span::splat(Position::new(`0`, `0`, `0`));
3655	let ast = Ast::alternation(ast::Alternation {
3656	span,
3657	asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
3658	});
3659
3660	let mut t = Translator::new();
3661	assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
3662	}
3663
3664	#[test]
3665	fn regression_empty_alt() {
3666	use crate::ast::{self, Ast};
3667
3668	let span = Span::splat(Position::new(`0`, `0`, `0`));
3669	let ast = Ast::concat(ast::Concat {
3670	span,
3671	asts: vec![Ast::alternation(ast::Alternation {
3672	span,
3673	asts: vec![],
3674	})],
3675	});
3676
3677	let mut t = Translator::new();
3678	assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
3679	}
3680
3681	#[test]
3682	fn regression_singleton_alt() {
3683	use crate::{
3684	ast::{self, Ast},
3685	hir::Dot,
3686	};
3687
3688	let span = Span::splat(Position::new(`0`, `0`, `0`));
3689	let ast = Ast::concat(ast::Concat {
3690	span,
3691	asts: vec![Ast::alternation(ast::Alternation {
3692	span,
3693	asts: vec![Ast::dot(span)],
3694	})],
3695	});
3696
3697	let mut t = Translator::new();
3698	assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
3699	}
3700
3701	// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
3702	#[test]
3703	fn regression_fuzz_match() {
3704	let pat = "[(`\u{6}` `\0`-`\u{afdf5}`] `\0` ";
3705	let ast = ParserBuilder::new()
3706	.octal(`false`)
3707	.ignore_whitespace(`true`)
3708	.build()
3709	.parse(pat)
3710	.unwrap();
3711	let hir = TranslatorBuilder::new()
3712	.utf8(`true`)
3713	.case_insensitive(`false`)
3714	.multi_line(`false`)
3715	.dot_matches_new_line(`false`)
3716	.swap_greed(`true`)
3717	.unicode(`true`)
3718	.build()
3719	.translate(pat, &ast)
3720	.unwrap();
3721	assert_eq!(
3722	hir,
3723	Hir::concat(vec![
3724	hir_uclass(&[('`\0`', '`\u{afdf5}`')]),
3725	hir_lit("`\0`"),
3726	])
3727	);
3728	}
3729
3730	// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
3731	#[cfg(feature = "unicode")]
3732	#[test]
3733	fn regression_fuzz_difference1() {
3734	let pat = r"\W\W\|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
3735	let _ = t(pat); // shouldn't panic
3736	}
3737
3738	// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
3739	#[test]
3740	fn regression_fuzz_char_decrement1() {
3741	let pat = "w[w[^w?`\r`w`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0`w?`\r`w[^w?`\r`w[^w?`\r`w[^w`\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\u{1}\0`]`\0\0\0\0\0\0\0\0\0``\0\0\u{1}\0`]`\0\0`-`\0`][^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w`\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\u{1}\0`]`\0\0\0\0\0\0\0\0\0`x`\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\0\0\0`??`\0\u{7f}`{2}`\u{10}`??`\0\0\0\0\0\0\0\0\0\u{3}\0\0\0`}`\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\0\u{1}\0`]`\0\u{1}\u{1}`H-i]-]`\0\0\0\0\u{1}\0`]`\0\0\0\u{1}\0`]`\0\0`-`\0\0\0\0\u{1}`9-`\u{7f}`]`\0`'\|-`\u{7f}`]`\0`'\|(?i-ux)[-`\u{7f}`]`\0`'`\u{3}\0\0\0`}`\0`-`\0`]<D`\0\0\0\0\0\0\u{1}`]`\0\0\0\0`]`\0\0`-*`\0`]`\0\0` ";
3742	let _ = t(pat); // shouldn't panic
3743	}
3744	}
3745