translate.rs source code [crates/regex-syntax/src/hir/translate.rs]

1	/!*
2	Defines a translator that converts an `Ast` to an `Hir`.
3	*/
4
5	use core::cell::{Cell, RefCell};
6
7	use alloc::{boxed::Box, string::ToString, vec, vec::Vec};
8
9	use crate::{
10	ast::{self, Ast, Span, Visitor},
11	either::Either,
12	hir::{self, Error, ErrorKind, Hir, HirKind},
13	unicode::{self, ClassQuery},
14	};
15
16	type Result<T> = core::result::Result<T, Error>;
17
18	/// A builder for constructing an AST->HIR translator.
19	#[derive(Clone, Debug)]
20	pub struct TranslatorBuilder {
21	utf8: bool,
22	line_terminator: u8,
23	flags: Flags,
24	}
25
26	impl Default for TranslatorBuilder {
27	fn default() -> TranslatorBuilder {
28	TranslatorBuilder::new()
29	}
30	}
31
32	impl TranslatorBuilder {
33	/// Create a new translator builder with a default c onfiguration.
34	pub fn new() -> TranslatorBuilder {
35	TranslatorBuilder {
36	utf8: `true`,
37	line_terminator: b'`\n`',
38	flags: Flags::default(),
39	}
40	}
41
42	/// Build a translator using the current configuration.
43	pub fn build(&self) -> Translator {
44	Translator {
45	stack: RefCell::new(vec![]),
46	flags: Cell::new(self.flags),
47	utf8: self.utf8,
48	line_terminator: self.line_terminator,
49	}
50	}
51
52	/// When disabled, translation will permit the construction of a regular
53	/// expression that may match invalid UTF-8.
54	///
55	/// When enabled (the default), the translator is guaranteed to produce an
56	/// expression that, for non-empty matches, will only ever produce spans
57	/// that are entirely valid UTF-8 (otherwise, the translator will return an
58	/// error).
59	///
60	/// Perhaps surprisingly, when UTF-8 is enabled, an empty regex or even
61	/// a negated ASCII word boundary (uttered as `(?-u:\B)` in the concrete
62	/// syntax) will be allowed even though they can produce matches that split
63	/// a UTF-8 encoded codepoint. This only applies to zero-width or "empty"
64	/// matches, and it is expected that the regex engine itself must handle
65	/// these cases if necessary (perhaps by suppressing any zero-width matches
66	/// that split a codepoint).
67	pub fn utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
68	self.utf8 = yes;
69	self
70	}
71
72	/// Sets the line terminator for use with `(?u-s:.)` and `(?-us:.)`.
73	///
74	/// Namely, instead of `.` (by default) matching everything except for `\n`,
75	/// this will cause `.` to match everything except for the byte given.
76	///
77	/// If `.` is used in a context where Unicode mode is enabled and this byte
78	/// isn't ASCII, then an error will be returned. When Unicode mode is
79	/// disabled, then any byte is permitted, but will return an error if UTF-8
80	/// mode is enabled and it is a non-ASCII byte.
81	///
82	/// In short, any ASCII value for a line terminator is always okay. But a
83	/// non-ASCII byte might result in an error depending on whether Unicode
84	/// mode or UTF-8 mode are enabled.
85	///
86	/// Note that if `R` mode is enabled then it always takes precedence and
87	/// the line terminator will be treated as `\r` and `\n` simultaneously.
88	///
89	/// Note also that this doesn't* impact the look-around assertions*
90	/// `(?m:^)` and `(?m:$)`. That's usually controlled by additional
91	/// configuration in the regex engine itself.
92	pub fn line_terminator(&mut self, byte: u8) -> &mut TranslatorBuilder {
93	self.line_terminator = byte;
94	self
95	}
96
97	/// Enable or disable the case insensitive flag (`i`) by default.
98	pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
99	self.flags.case_insensitive = if yes { Some(`true`) } else { None };
100	self
101	}
102
103	/// Enable or disable the multi-line matching flag (`m`) by default.
104	pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
105	self.flags.multi_line = if yes { Some(`true`) } else { None };
106	self
107	}
108
109	/// Enable or disable the "dot matches any character" flag (`s`) by
110	/// default.
111	pub fn dot_matches_new_line(
112	&mut self,
113	yes: bool,
114	) -> &mut TranslatorBuilder {
115	self.flags.dot_matches_new_line = if yes { Some(`true`) } else { None };
116	self
117	}
118
119	/// Enable or disable the CRLF mode flag (`R`) by default.
120	pub fn crlf(&mut self, yes: bool) -> &mut TranslatorBuilder {
121	self.flags.crlf = if yes { Some(`true`) } else { None };
122	self
123	}
124
125	/// Enable or disable the "swap greed" flag (`U`) by default.
126	pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
127	self.flags.swap_greed = if yes { Some(`true`) } else { None };
128	self
129	}
130
131	/// Enable or disable the Unicode flag (`u`) by default.
132	pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
133	self.flags.unicode = if yes { None } else { Some(`false`) };
134	self
135	}
136	}
137
138	/// A translator maps abstract syntax to a high level intermediate
139	/// representation.
140	///
141	/// A translator may be benefit from reuse. That is, a translator can translate
142	/// many abstract syntax trees.
143	///
144	/// A `Translator` can be configured in more detail via a
145	/// [`TranslatorBuilder`].
146	#[derive(Clone, Debug)]
147	pub struct Translator {
148	/// Our call stack, but on the heap.
149	stack: RefCell<Vec<HirFrame>>,
150	/// The current flag settings.
151	flags: Cell<Flags>,
152	/// Whether we're allowed to produce HIR that can match arbitrary bytes.
153	utf8: bool,
154	/// The line terminator to use for `.`.
155	line_terminator: u8,
156	}
157
158	impl Translator {
159	/// Create a new translator using the default configuration.
160	pub fn new() -> Translator {
161	TranslatorBuilder::new().build()
162	}
163
164	/// Translate the given abstract syntax tree (AST) into a high level
165	/// intermediate representation (HIR).
166	///
167	/// If there was a problem doing the translation, then an HIR-specific
168	/// error is returned.
169	///
170	/// The original pattern string used to produce the `Ast` must* also be*
171	/// provided. The translator does not use the pattern string during any
172	/// correct translation, but is used for error reporting.
173	pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
174	ast::visit(ast, visitor:TranslatorI::new(self, pattern))
175	}
176	}
177
178	/// An HirFrame is a single stack frame, represented explicitly, which is
179	/// created for each item in the Ast that we traverse.
180	///
181	/// Note that technically, this type doesn't represent our entire stack
182	/// frame. In particular, the Ast visitor represents any state associated with
183	/// traversing the Ast itself.
184	#[derive(Clone, Debug)]
185	enum HirFrame {
186	/// An arbitrary HIR expression. These get pushed whenever we hit a base
187	/// case in the Ast. They get popped after an inductive (i.e., recursive)
188	/// step is complete.
189	Expr(Hir),
190	/// A literal that is being constructed, character by character, from the
191	/// AST. We need this because the AST gives each individual character its
192	/// own node. So as we see characters, we peek at the top-most HirFrame.
193	/// If it's a literal, then we add to it. Otherwise, we push a new literal.
194	/// When it comes time to pop it, we convert it to an Hir via Hir::literal.
195	Literal(Vec<u8>),
196	/// A Unicode character class. This frame is mutated as we descend into
197	/// the Ast of a character class (which is itself its own mini recursive
198	/// structure).
199	ClassUnicode(hir::ClassUnicode),
200	/// A byte-oriented character class. This frame is mutated as we descend
201	/// into the Ast of a character class (which is itself its own mini
202	/// recursive structure).
203	///
204	/// Byte character classes are created when Unicode mode (`u`) is disabled.
205	/// If `utf8` is enabled (the default), then a byte character is only
206	/// permitted to match ASCII text.
207	ClassBytes(hir::ClassBytes),
208	/// This is pushed whenever a repetition is observed. After visiting every
209	/// sub-expression in the repetition, the translator's stack is expected to
210	/// have this sentinel at the top.
211	///
212	/// This sentinel only exists to stop other things (like flattening
213	/// literals) from reaching across repetition operators.
214	Repetition,
215	/// This is pushed on to the stack upon first seeing any kind of capture,
216	/// indicated by parentheses (including non-capturing groups). It is popped
217	/// upon leaving a group.
218	Group {
219	/// The old active flags when this group was opened.
220	///
221	/// If this group sets flags, then the new active flags are set to the
222	/// result of merging the old flags with the flags introduced by this
223	/// group. If the group doesn't set any flags, then this is simply
224	/// equivalent to whatever flags were set when the group was opened.
225	///
226	/// When this group is popped, the active flags should be restored to
227	/// the flags set here.
228	///
229	/// The "active" flags correspond to whatever flags are set in the
230	/// Translator.
231	old_flags: Flags,
232	},
233	/// This is pushed whenever a concatenation is observed. After visiting
234	/// every sub-expression in the concatenation, the translator's stack is
235	/// popped until it sees a Concat frame.
236	Concat,
237	/// This is pushed whenever an alternation is observed. After visiting
238	/// every sub-expression in the alternation, the translator's stack is
239	/// popped until it sees an Alternation frame.
240	Alternation,
241	/// This is pushed immediately before each sub-expression in an
242	/// alternation. This separates the branches of an alternation on the
243	/// stack and prevents literal flattening from reaching across alternation
244	/// branches.
245	///
246	/// It is popped after each expression in a branch until an 'Alternation'
247	/// frame is observed when doing a post visit on an alternation.
248	AlternationBranch,
249	}
250
251	impl HirFrame {
252	/// Assert that the current stack frame is an Hir expression and return it.
253	fn unwrap_expr(self) -> Hir {
254	match self {
255	HirFrame::Expr(expr) => expr,
256	HirFrame::Literal(lit) => Hir::literal(lit),
257	_ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
258	}
259	}
260
261	/// Assert that the current stack frame is a Unicode class expression and
262	/// return it.
263	fn unwrap_class_unicode(self) -> hir::ClassUnicode {
264	match self {
265	HirFrame::ClassUnicode(cls) => cls,
266	_ => panic!(
267	"tried to unwrap Unicode class \
268	from HirFrame, got: {:?}",
269	self
270	),
271	}
272	}
273
274	/// Assert that the current stack frame is a byte class expression and
275	/// return it.
276	fn unwrap_class_bytes(self) -> hir::ClassBytes {
277	match self {
278	HirFrame::ClassBytes(cls) => cls,
279	_ => panic!(
280	"tried to unwrap byte class \
281	from HirFrame, got: {:?}",
282	self
283	),
284	}
285	}
286
287	/// Assert that the current stack frame is a repetition sentinel. If it
288	/// isn't, then panic.
289	fn unwrap_repetition(self) {
290	match self {
291	HirFrame::Repetition => {}
292	_ => {
293	panic!(
294	"tried to unwrap repetition from HirFrame, got: {:?}",
295	self
296	)
297	}
298	}
299	}
300
301	/// Assert that the current stack frame is a group indicator and return
302	/// its corresponding flags (the flags that were active at the time the
303	/// group was entered).
304	fn unwrap_group(self) -> Flags {
305	match self {
306	HirFrame::Group { old_flags } => old_flags,
307	_ => {
308	panic!("tried to unwrap group from HirFrame, got: {:?}", self)
309	}
310	}
311	}
312
313	/// Assert that the current stack frame is an alternation pipe sentinel. If
314	/// it isn't, then panic.
315	fn unwrap_alternation_pipe(self) {
316	match self {
317	HirFrame::AlternationBranch => {}
318	_ => {
319	panic!(
320	"tried to unwrap alt pipe from HirFrame, got: {:?}",
321	self
322	)
323	}
324	}
325	}
326	}
327
328	impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
329	type Output = Hir;
330	type Err = Error;
331
332	fn finish(self) -> Result<Hir> {
333	// ... otherwise, we should have exactly one HIR on the stack.
334	assert_eq!(self.trans().stack.borrow().len(), `1`);
335	Ok(self.pop().unwrap().unwrap_expr())
336	}
337
338	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
339	match *ast {
340	Ast::ClassBracketed(_) => {
341	if self.flags().unicode() {
342	let cls = hir::ClassUnicode::empty();
343	self.push(HirFrame::ClassUnicode(cls));
344	} else {
345	let cls = hir::ClassBytes::empty();
346	self.push(HirFrame::ClassBytes(cls));
347	}
348	}
349	Ast::Repetition(_) => self.push(HirFrame::Repetition),
350	Ast::Group(ref x) => {
351	let old_flags = x
352	.flags()
353	.map(\|ast\| self.set_flags(ast))
354	.unwrap_or_else(\|\| self.flags());
355	self.push(HirFrame::Group { old_flags });
356	}
357	Ast::Concat(_) => {
358	self.push(HirFrame::Concat);
359	}
360	Ast::Alternation(ref x) => {
361	self.push(HirFrame::Alternation);
362	if !x.asts.is_empty() {
363	self.push(HirFrame::AlternationBranch);
364	}
365	}
366	_ => {}
367	}
368	Ok(())
369	}
370
371	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
372	match *ast {
373	Ast::Empty(_) => {
374	self.push(HirFrame::Expr(Hir::empty()));
375	}
376	Ast::Flags(ref x) => {
377	self.set_flags(&x.flags);
378	// Flags in the AST are generally considered directives and
379	// not actual sub-expressions. However, they can be used in
380	// the concrete syntax like `((?i))`, and we need some kind of
381	// indication of an expression there, and Empty is the correct
382	// choice.
383	//
384	// There can also be things like `(?i)+`, but we rule those out
385	// in the parser. In the future, we might allow them for
386	// consistency sake.
387	self.push(HirFrame::Expr(Hir::empty()));
388	}
389	Ast::Literal(ref x) => match self.ast_literal_to_scalar(x)? {
390	Either::Right(byte) => self.push_byte(byte),
391	Either::Left(ch) => match self.case_fold_char(x.span, ch)? {
392	None => self.push_char(ch),
393	Some(expr) => self.push(HirFrame::Expr(expr)),
394	},
395	},
396	Ast::Dot(ref span) => {
397	self.push(HirFrame::Expr(self.hir_dot(**span)?));
398	}
399	Ast::Assertion(ref x) => {
400	self.push(HirFrame::Expr(self.hir_assertion(x)?));
401	}
402	Ast::ClassPerl(ref x) => {
403	if self.flags().unicode() {
404	let cls = self.hir_perl_unicode_class(x)?;
405	let hcls = hir::Class::Unicode(cls);
406	self.push(HirFrame::Expr(Hir::class(hcls)));
407	} else {
408	let cls = self.hir_perl_byte_class(x)?;
409	let hcls = hir::Class::Bytes(cls);
410	self.push(HirFrame::Expr(Hir::class(hcls)));
411	}
412	}
413	Ast::ClassUnicode(ref x) => {
414	let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
415	self.push(HirFrame::Expr(Hir::class(cls)));
416	}
417	Ast::ClassBracketed(ref ast) => {
418	if self.flags().unicode() {
419	let mut cls = self.pop().unwrap().unwrap_class_unicode();
420	self.unicode_fold_and_negate(
421	&ast.span,
422	ast.negated,
423	&mut cls,
424	)?;
425	let expr = Hir::class(hir::Class::Unicode(cls));
426	self.push(HirFrame::Expr(expr));
427	} else {
428	let mut cls = self.pop().unwrap().unwrap_class_bytes();
429	self.bytes_fold_and_negate(
430	&ast.span,
431	ast.negated,
432	&mut cls,
433	)?;
434	let expr = Hir::class(hir::Class::Bytes(cls));
435	self.push(HirFrame::Expr(expr));
436	}
437	}
438	Ast::Repetition(ref x) => {
439	let expr = self.pop().unwrap().unwrap_expr();
440	self.pop().unwrap().unwrap_repetition();
441	self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
442	}
443	Ast::Group(ref x) => {
444	let expr = self.pop().unwrap().unwrap_expr();
445	let old_flags = self.pop().unwrap().unwrap_group();
446	self.trans().flags.set(old_flags);
447	self.push(HirFrame::Expr(self.hir_capture(x, expr)));
448	}
449	Ast::Concat(_) => {
450	let mut exprs = vec![];
451	while let Some(expr) = self.pop_concat_expr() {
452	if !matches!(*expr.kind(), HirKind::Empty) {
453	exprs.push(expr);
454	}
455	}
456	exprs.reverse();
457	self.push(HirFrame::Expr(Hir::concat(exprs)));
458	}
459	Ast::Alternation(_) => {
460	let mut exprs = vec![];
461	while let Some(expr) = self.pop_alt_expr() {
462	self.pop().unwrap().unwrap_alternation_pipe();
463	exprs.push(expr);
464	}
465	exprs.reverse();
466	self.push(HirFrame::Expr(Hir::alternation(exprs)));
467	}
468	}
469	Ok(())
470	}
471
472	fn visit_alternation_in(&mut self) -> Result<()> {
473	self.push(HirFrame::AlternationBranch);
474	Ok(())
475	}
476
477	fn visit_class_set_item_pre(
478	&mut self,
479	ast: &ast::ClassSetItem,
480	) -> Result<()> {
481	match *ast {
482	ast::ClassSetItem::Bracketed(_) => {
483	if self.flags().unicode() {
484	let cls = hir::ClassUnicode::empty();
485	self.push(HirFrame::ClassUnicode(cls));
486	} else {
487	let cls = hir::ClassBytes::empty();
488	self.push(HirFrame::ClassBytes(cls));
489	}
490	}
491	// We needn't handle the Union case here since the visitor will
492	// do it for us.
493	_ => {}
494	}
495	Ok(())
496	}
497
498	fn visit_class_set_item_post(
499	&mut self,
500	ast: &ast::ClassSetItem,
501	) -> Result<()> {
502	match *ast {
503	ast::ClassSetItem::Empty(_) => {}
504	ast::ClassSetItem::Literal(ref x) => {
505	if self.flags().unicode() {
506	let mut cls = self.pop().unwrap().unwrap_class_unicode();
507	cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
508	self.push(HirFrame::ClassUnicode(cls));
509	} else {
510	let mut cls = self.pop().unwrap().unwrap_class_bytes();
511	let byte = self.class_literal_byte(x)?;
512	cls.push(hir::ClassBytesRange::new(byte, byte));
513	self.push(HirFrame::ClassBytes(cls));
514	}
515	}
516	ast::ClassSetItem::Range(ref x) => {
517	if self.flags().unicode() {
518	let mut cls = self.pop().unwrap().unwrap_class_unicode();
519	cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
520	self.push(HirFrame::ClassUnicode(cls));
521	} else {
522	let mut cls = self.pop().unwrap().unwrap_class_bytes();
523	let start = self.class_literal_byte(&x.start)?;
524	let end = self.class_literal_byte(&x.end)?;
525	cls.push(hir::ClassBytesRange::new(start, end));
526	self.push(HirFrame::ClassBytes(cls));
527	}
528	}
529	ast::ClassSetItem::Ascii(ref x) => {
530	if self.flags().unicode() {
531	let xcls = self.hir_ascii_unicode_class(x)?;
532	let mut cls = self.pop().unwrap().unwrap_class_unicode();
533	cls.union(&xcls);
534	self.push(HirFrame::ClassUnicode(cls));
535	} else {
536	let xcls = self.hir_ascii_byte_class(x)?;
537	let mut cls = self.pop().unwrap().unwrap_class_bytes();
538	cls.union(&xcls);
539	self.push(HirFrame::ClassBytes(cls));
540	}
541	}
542	ast::ClassSetItem::Unicode(ref x) => {
543	let xcls = self.hir_unicode_class(x)?;
544	let mut cls = self.pop().unwrap().unwrap_class_unicode();
545	cls.union(&xcls);
546	self.push(HirFrame::ClassUnicode(cls));
547	}
548	ast::ClassSetItem::Perl(ref x) => {
549	if self.flags().unicode() {
550	let xcls = self.hir_perl_unicode_class(x)?;
551	let mut cls = self.pop().unwrap().unwrap_class_unicode();
552	cls.union(&xcls);
553	self.push(HirFrame::ClassUnicode(cls));
554	} else {
555	let xcls = self.hir_perl_byte_class(x)?;
556	let mut cls = self.pop().unwrap().unwrap_class_bytes();
557	cls.union(&xcls);
558	self.push(HirFrame::ClassBytes(cls));
559	}
560	}
561	ast::ClassSetItem::Bracketed(ref ast) => {
562	if self.flags().unicode() {
563	let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
564	self.unicode_fold_and_negate(
565	&ast.span,
566	ast.negated,
567	&mut cls1,
568	)?;
569
570	let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
571	cls2.union(&cls1);
572	self.push(HirFrame::ClassUnicode(cls2));
573	} else {
574	let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
575	self.bytes_fold_and_negate(
576	&ast.span,
577	ast.negated,
578	&mut cls1,
579	)?;
580
581	let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
582	cls2.union(&cls1);
583	self.push(HirFrame::ClassBytes(cls2));
584	}
585	}
586	// This is handled automatically by the visitor.
587	ast::ClassSetItem::Union(_) => {}
588	}
589	Ok(())
590	}
591
592	fn visit_class_set_binary_op_pre(
593	&mut self,
594	_op: &ast::ClassSetBinaryOp,
595	) -> Result<()> {
596	if self.flags().unicode() {
597	let cls = hir::ClassUnicode::empty();
598	self.push(HirFrame::ClassUnicode(cls));
599	} else {
600	let cls = hir::ClassBytes::empty();
601	self.push(HirFrame::ClassBytes(cls));
602	}
603	Ok(())
604	}
605
606	fn visit_class_set_binary_op_in(
607	&mut self,
608	_op: &ast::ClassSetBinaryOp,
609	) -> Result<()> {
610	if self.flags().unicode() {
611	let cls = hir::ClassUnicode::empty();
612	self.push(HirFrame::ClassUnicode(cls));
613	} else {
614	let cls = hir::ClassBytes::empty();
615	self.push(HirFrame::ClassBytes(cls));
616	}
617	Ok(())
618	}
619
620	fn visit_class_set_binary_op_post(
621	&mut self,
622	op: &ast::ClassSetBinaryOp,
623	) -> Result<()> {
624	use crate::ast::ClassSetBinaryOpKind::*;
625
626	if self.flags().unicode() {
627	let mut rhs = self.pop().unwrap().unwrap_class_unicode();
628	let mut lhs = self.pop().unwrap().unwrap_class_unicode();
629	let mut cls = self.pop().unwrap().unwrap_class_unicode();
630	if self.flags().case_insensitive() {
631	rhs.try_case_fold_simple().map_err(\|_\| {
632	self.error(
633	op.rhs.span().clone(),
634	ErrorKind::UnicodeCaseUnavailable,
635	)
636	})?;
637	lhs.try_case_fold_simple().map_err(\|_\| {
638	self.error(
639	op.lhs.span().clone(),
640	ErrorKind::UnicodeCaseUnavailable,
641	)
642	})?;
643	}
644	match op.kind {
645	Intersection => lhs.intersect(&rhs),
646	Difference => lhs.difference(&rhs),
647	SymmetricDifference => lhs.symmetric_difference(&rhs),
648	}
649	cls.union(&lhs);
650	self.push(HirFrame::ClassUnicode(cls));
651	} else {
652	let mut rhs = self.pop().unwrap().unwrap_class_bytes();
653	let mut lhs = self.pop().unwrap().unwrap_class_bytes();
654	let mut cls = self.pop().unwrap().unwrap_class_bytes();
655	if self.flags().case_insensitive() {
656	rhs.case_fold_simple();
657	lhs.case_fold_simple();
658	}
659	match op.kind {
660	Intersection => lhs.intersect(&rhs),
661	Difference => lhs.difference(&rhs),
662	SymmetricDifference => lhs.symmetric_difference(&rhs),
663	}
664	cls.union(&lhs);
665	self.push(HirFrame::ClassBytes(cls));
666	}
667	Ok(())
668	}
669	}
670
671	/// The internal implementation of a translator.
672	///
673	/// This type is responsible for carrying around the original pattern string,
674	/// which is not tied to the internal state of a translator.
675	///
676	/// A TranslatorI exists for the time it takes to translate a single Ast.
677	#[derive(Clone, Debug)]
678	struct TranslatorI<'t, 'p> {
679	trans: &'t Translator,
680	pattern: &'p str,
681	}
682
683	impl<'t, 'p> TranslatorI<'t, 'p> {
684	/// Build a new internal translator.
685	fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
686	TranslatorI { trans, pattern }
687	}
688
689	/// Return a reference to the underlying translator.
690	fn trans(&self) -> &Translator {
691	&self.trans
692	}
693
694	/// Push the given frame on to the call stack.
695	fn push(&self, frame: HirFrame) {
696	self.trans().stack.borrow_mut().push(frame);
697	}
698
699	/// Push the given literal char on to the call stack.
700	///
701	/// If the top-most element of the stack is a literal, then the char
702	/// is appended to the end of that literal. Otherwise, a new literal
703	/// containing just the given char is pushed to the top of the stack.
704	fn push_char(&self, ch: char) {
705	let mut buf = [`0`; `4`];
706	let bytes = ch.encode_utf8(&mut buf).as_bytes();
707	let mut stack = self.trans().stack.borrow_mut();
708	if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
709	literal.extend_from_slice(bytes);
710	} else {
711	stack.push(HirFrame::Literal(bytes.to_vec()));
712	}
713	}
714
715	/// Push the given literal byte on to the call stack.
716	///
717	/// If the top-most element of the stack is a literal, then the byte
718	/// is appended to the end of that literal. Otherwise, a new literal
719	/// containing just the given byte is pushed to the top of the stack.
720	fn push_byte(&self, byte: u8) {
721	let mut stack = self.trans().stack.borrow_mut();
722	if let Some(HirFrame::Literal(ref mut literal)) = stack.last_mut() {
723	literal.push(byte);
724	} else {
725	stack.push(HirFrame::Literal(vec![byte]));
726	}
727	}
728
729	/// Pop the top of the call stack. If the call stack is empty, return None.
730	fn pop(&self) -> Option<HirFrame> {
731	self.trans().stack.borrow_mut().pop()
732	}
733
734	/// Pop an HIR expression from the top of the stack for a concatenation.
735	///
736	/// This returns None if the stack is empty or when a concat frame is seen.
737	/// Otherwise, it panics if it could not find an HIR expression.
738	fn pop_concat_expr(&self) -> Option<Hir> {
739	let frame = self.pop()?;
740	match frame {
741	HirFrame::Concat => None,
742	HirFrame::Expr(expr) => Some(expr),
743	HirFrame::Literal(lit) => Some(Hir::literal(lit)),
744	HirFrame::ClassUnicode(_) => {
745	unreachable!("expected expr or concat, got Unicode class")
746	}
747	HirFrame::ClassBytes(_) => {
748	unreachable!("expected expr or concat, got byte class")
749	}
750	HirFrame::Repetition => {
751	unreachable!("expected expr or concat, got repetition")
752	}
753	HirFrame::Group { .. } => {
754	unreachable!("expected expr or concat, got group")
755	}
756	HirFrame::Alternation => {
757	unreachable!("expected expr or concat, got alt marker")
758	}
759	HirFrame::AlternationBranch => {
760	unreachable!("expected expr or concat, got alt branch marker")
761	}
762	}
763	}
764
765	/// Pop an HIR expression from the top of the stack for an alternation.
766	///
767	/// This returns None if the stack is empty or when an alternation frame is
768	/// seen. Otherwise, it panics if it could not find an HIR expression.
769	fn pop_alt_expr(&self) -> Option<Hir> {
770	let frame = self.pop()?;
771	match frame {
772	HirFrame::Alternation => None,
773	HirFrame::Expr(expr) => Some(expr),
774	HirFrame::Literal(lit) => Some(Hir::literal(lit)),
775	HirFrame::ClassUnicode(_) => {
776	unreachable!("expected expr or alt, got Unicode class")
777	}
778	HirFrame::ClassBytes(_) => {
779	unreachable!("expected expr or alt, got byte class")
780	}
781	HirFrame::Repetition => {
782	unreachable!("expected expr or alt, got repetition")
783	}
784	HirFrame::Group { .. } => {
785	unreachable!("expected expr or alt, got group")
786	}
787	HirFrame::Concat => {
788	unreachable!("expected expr or alt, got concat marker")
789	}
790	HirFrame::AlternationBranch => {
791	unreachable!("expected expr or alt, got alt branch marker")
792	}
793	}
794	}
795
796	/// Create a new error with the given span and error type.
797	fn error(&self, span: Span, kind: ErrorKind) -> Error {
798	Error { kind, pattern: self.pattern.to_string(), span }
799	}
800
801	/// Return a copy of the active flags.
802	fn flags(&self) -> Flags {
803	self.trans().flags.get()
804	}
805
806	/// Set the flags of this translator from the flags set in the given AST.
807	/// Then, return the old flags.
808	fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
809	let old_flags = self.flags();
810	let mut new_flags = Flags::from_ast(ast_flags);
811	new_flags.merge(&old_flags);
812	self.trans().flags.set(new_flags);
813	old_flags
814	}
815
816	/// Convert an Ast literal to its scalar representation.
817	///
818	/// When Unicode mode is enabled, then this always succeeds and returns a
819	/// `char` (Unicode scalar value).
820	///
821	/// When Unicode mode is disabled, then a `char` will still be returned
822	/// whenever possible. A byte is returned only when invalid UTF-8 is
823	/// allowed and when the byte is not ASCII. Otherwise, a non-ASCII byte
824	/// will result in an error when invalid UTF-8 is not allowed.
825	fn ast_literal_to_scalar(
826	&self,
827	lit: &ast::Literal,
828	) -> Result<Either<char, u8>> {
829	if self.flags().unicode() {
830	return Ok(Either::Left(lit.c));
831	}
832	let byte = match lit.byte() {
833	None => return Ok(Either::Left(lit.c)),
834	Some(byte) => byte,
835	};
836	if byte <= `0x7F` {
837	return Ok(Either::Left(char::try_from(byte).unwrap()));
838	}
839	if self.trans().utf8 {
840	return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
841	}
842	Ok(Either::Right(byte))
843	}
844
845	fn case_fold_char(&self, span: Span, c: char) -> Result<Option<Hir>> {
846	if !self.flags().case_insensitive() {
847	return Ok(None);
848	}
849	if self.flags().unicode() {
850	// If case folding won't do anything, then don't bother trying.
851	let map = unicode::SimpleCaseFolder::new()
852	.map(\|f\| f.overlaps(c, c))
853	.map_err(\|_\| {
854	self.error(span, ErrorKind::UnicodeCaseUnavailable)
855	})?;
856	if !map {
857	return Ok(None);
858	}
859	let mut cls =
860	hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
861	c, c,
862	)]);
863	cls.try_case_fold_simple().map_err(\|_\| {
864	self.error(span, ErrorKind::UnicodeCaseUnavailable)
865	})?;
866	Ok(Some(Hir::class(hir::Class::Unicode(cls))))
867	} else {
868	if !c.is_ascii() {
869	return Ok(None);
870	}
871	// If case folding won't do anything, then don't bother trying.
872	match c {
873	'A'..='Z' \| 'a'..='z' => {}
874	_ => return Ok(None),
875	}
876	let mut cls =
877	hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
878	// OK because 'c.len_utf8() == 1' which in turn implies
879	// that 'c' is ASCII.
880	u8::try_from(c).unwrap(),
881	u8::try_from(c).unwrap(),
882	)]);
883	cls.case_fold_simple();
884	Ok(Some(Hir::class(hir::Class::Bytes(cls))))
885	}
886	}
887
888	fn hir_dot(&self, span: Span) -> Result<Hir> {
889	let (utf8, lineterm, flags) =
890	(self.trans().utf8, self.trans().line_terminator, self.flags());
891	if utf8 && (!flags.unicode() \|\| !lineterm.is_ascii()) {
892	return Err(self.error(span, ErrorKind::InvalidUtf8));
893	}
894	let dot = if flags.dot_matches_new_line() {
895	if flags.unicode() {
896	hir::Dot::AnyChar
897	} else {
898	hir::Dot::AnyByte
899	}
900	} else {
901	if flags.unicode() {
902	if flags.crlf() {
903	hir::Dot::AnyCharExceptCRLF
904	} else {
905	if !lineterm.is_ascii() {
906	return Err(
907	self.error(span, ErrorKind::InvalidLineTerminator)
908	);
909	}
910	hir::Dot::AnyCharExcept(char::from(lineterm))
911	}
912	} else {
913	if flags.crlf() {
914	hir::Dot::AnyByteExceptCRLF
915	} else {
916	hir::Dot::AnyByteExcept(lineterm)
917	}
918	}
919	};
920	Ok(Hir::dot(dot))
921	}
922
923	fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
924	let unicode = self.flags().unicode();
925	let multi_line = self.flags().multi_line();
926	let crlf = self.flags().crlf();
927	Ok(match asst.kind {
928	ast::AssertionKind::StartLine => Hir::look(if multi_line {
929	if crlf {
930	hir::Look::StartCRLF
931	} else {
932	hir::Look::StartLF
933	}
934	} else {
935	hir::Look::Start
936	}),
937	ast::AssertionKind::EndLine => Hir::look(if multi_line {
938	if crlf {
939	hir::Look::EndCRLF
940	} else {
941	hir::Look::EndLF
942	}
943	} else {
944	hir::Look::End
945	}),
946	ast::AssertionKind::StartText => Hir::look(hir::Look::Start),
947	ast::AssertionKind::EndText => Hir::look(hir::Look::End),
948	ast::AssertionKind::WordBoundary => Hir::look(if unicode {
949	hir::Look::WordUnicode
950	} else {
951	hir::Look::WordAscii
952	}),
953	ast::AssertionKind::NotWordBoundary => Hir::look(if unicode {
954	hir::Look::WordUnicodeNegate
955	} else {
956	hir::Look::WordAsciiNegate
957	}),
958	ast::AssertionKind::WordBoundaryStart
959	\| ast::AssertionKind::WordBoundaryStartAngle => {
960	Hir::look(if unicode {
961	hir::Look::WordStartUnicode
962	} else {
963	hir::Look::WordStartAscii
964	})
965	}
966	ast::AssertionKind::WordBoundaryEnd
967	\| ast::AssertionKind::WordBoundaryEndAngle => {
968	Hir::look(if unicode {
969	hir::Look::WordEndUnicode
970	} else {
971	hir::Look::WordEndAscii
972	})
973	}
974	ast::AssertionKind::WordBoundaryStartHalf => {
975	Hir::look(if unicode {
976	hir::Look::WordStartHalfUnicode
977	} else {
978	hir::Look::WordStartHalfAscii
979	})
980	}
981	ast::AssertionKind::WordBoundaryEndHalf => Hir::look(if unicode {
982	hir::Look::WordEndHalfUnicode
983	} else {
984	hir::Look::WordEndHalfAscii
985	}),
986	})
987	}
988
989	fn hir_capture(&self, group: &ast::Group, expr: Hir) -> Hir {
990	let (index, name) = match group.kind {
991	ast::GroupKind::CaptureIndex(index) => (index, None),
992	ast::GroupKind::CaptureName { ref name, .. } => {
993	(name.index, Some(name.name.clone().into_boxed_str()))
994	}
995	// The HIR doesn't need to use non-capturing groups, since the way
996	// in which the data type is defined handles this automatically.
997	ast::GroupKind::NonCapturing(_) => return expr,
998	};
999	Hir::capture(hir::Capture { index, name, sub: Box::new(expr) })
1000	}
1001
1002	fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
1003	let (min, max) = match rep.op.kind {
1004	ast::RepetitionKind::ZeroOrOne => (`0`, Some(`1`)),
1005	ast::RepetitionKind::ZeroOrMore => (`0`, None),
1006	ast::RepetitionKind::OneOrMore => (`1`, None),
1007	ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
1008	(m, Some(m))
1009	}
1010	ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
1011	(m, None)
1012	}
1013	ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
1014	m,
1015	n,
1016	)) => (m, Some(n)),
1017	};
1018	let greedy =
1019	if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
1020	Hir::repetition(hir::Repetition {
1021	min,
1022	max,
1023	greedy,
1024	sub: Box::new(expr),
1025	})
1026	}
1027
1028	fn hir_unicode_class(
1029	&self,
1030	ast_class: &ast::ClassUnicode,
1031	) -> Result<hir::ClassUnicode> {
1032	use crate::ast::ClassUnicodeKind::*;
1033
1034	if !self.flags().unicode() {
1035	return Err(
1036	self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
1037	);
1038	}
1039	let query = match ast_class.kind {
1040	OneLetter(name) => ClassQuery::OneLetter(name),
1041	Named(ref name) => ClassQuery::Binary(name),
1042	NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
1043	property_name: name,
1044	property_value: value,
1045	},
1046	};
1047	let mut result = self.convert_unicode_class_error(
1048	&ast_class.span,
1049	unicode::class(query),
1050	);
1051	if let Ok(ref mut class) = result {
1052	self.unicode_fold_and_negate(
1053	&ast_class.span,
1054	ast_class.negated,
1055	class,
1056	)?;
1057	}
1058	result
1059	}
1060
1061	fn hir_ascii_unicode_class(
1062	&self,
1063	ast: &ast::ClassAscii,
1064	) -> Result<hir::ClassUnicode> {
1065	let mut cls = hir::ClassUnicode::new(
1066	ascii_class_as_chars(&ast.kind)
1067	.map(\|(s, e)\| hir::ClassUnicodeRange::new(s, e)),
1068	);
1069	self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1070	Ok(cls)
1071	}
1072
1073	fn hir_ascii_byte_class(
1074	&self,
1075	ast: &ast::ClassAscii,
1076	) -> Result<hir::ClassBytes> {
1077	let mut cls = hir::ClassBytes::new(
1078	ascii_class(&ast.kind)
1079	.map(\|(s, e)\| hir::ClassBytesRange::new(s, e)),
1080	);
1081	self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
1082	Ok(cls)
1083	}
1084
1085	fn hir_perl_unicode_class(
1086	&self,
1087	ast_class: &ast::ClassPerl,
1088	) -> Result<hir::ClassUnicode> {
1089	use crate::ast::ClassPerlKind::*;
1090
1091	assert!(self.flags().unicode());
1092	let result = match ast_class.kind {
1093	Digit => unicode::perl_digit(),
1094	Space => unicode::perl_space(),
1095	Word => unicode::perl_word(),
1096	};
1097	let mut class =
1098	self.convert_unicode_class_error(&ast_class.span, result)?;
1099	// We needn't apply case folding here because the Perl Unicode classes
1100	// are already closed under Unicode simple case folding.
1101	if ast_class.negated {
1102	class.negate();
1103	}
1104	Ok(class)
1105	}
1106
1107	fn hir_perl_byte_class(
1108	&self,
1109	ast_class: &ast::ClassPerl,
1110	) -> Result<hir::ClassBytes> {
1111	use crate::ast::ClassPerlKind::*;
1112
1113	assert!(!self.flags().unicode());
1114	let mut class = match ast_class.kind {
1115	Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
1116	Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
1117	Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
1118	};
1119	// We needn't apply case folding here because the Perl ASCII classes
1120	// are already closed (under ASCII case folding).
1121	if ast_class.negated {
1122	class.negate();
1123	}
1124	// Negating a Perl byte class is likely to cause it to match invalid
1125	// UTF-8. That's only OK if the translator is configured to allow such
1126	// things.
1127	if self.trans().utf8 && !class.is_ascii() {
1128	return Err(self.error(ast_class.span, ErrorKind::InvalidUtf8));
1129	}
1130	Ok(class)
1131	}
1132
1133	/// Converts the given Unicode specific error to an HIR translation error.
1134	///
1135	/// The span given should approximate the position at which an error would
1136	/// occur.
1137	fn convert_unicode_class_error(
1138	&self,
1139	span: &Span,
1140	result: core::result::Result<hir::ClassUnicode, unicode::Error>,
1141	) -> Result<hir::ClassUnicode> {
1142	result.map_err(\|err\| {
1143	let sp = span.clone();
1144	match err {
1145	unicode::Error::PropertyNotFound => {
1146	self.error(sp, ErrorKind::UnicodePropertyNotFound)
1147	}
1148	unicode::Error::PropertyValueNotFound => {
1149	self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
1150	}
1151	unicode::Error::PerlClassNotFound => {
1152	self.error(sp, ErrorKind::UnicodePerlClassNotFound)
1153	}
1154	}
1155	})
1156	}
1157
1158	fn unicode_fold_and_negate(
1159	&self,
1160	span: &Span,
1161	negated: bool,
1162	class: &mut hir::ClassUnicode,
1163	) -> Result<()> {
1164	// Note that we must apply case folding before negation!
1165	// Consider `(?i)[^x]`. If we applied negation first, then
1166	// the result would be the character class that matched any
1167	// Unicode scalar value.
1168	if self.flags().case_insensitive() {
1169	class.try_case_fold_simple().map_err(\|_\| {
1170	self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
1171	})?;
1172	}
1173	if negated {
1174	class.negate();
1175	}
1176	Ok(())
1177	}
1178
1179	fn bytes_fold_and_negate(
1180	&self,
1181	span: &Span,
1182	negated: bool,
1183	class: &mut hir::ClassBytes,
1184	) -> Result<()> {
1185	// Note that we must apply case folding before negation!
1186	// Consider `(?i)[^x]`. If we applied negation first, then
1187	// the result would be the character class that matched any
1188	// Unicode scalar value.
1189	if self.flags().case_insensitive() {
1190	class.case_fold_simple();
1191	}
1192	if negated {
1193	class.negate();
1194	}
1195	if self.trans().utf8 && !class.is_ascii() {
1196	return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
1197	}
1198	Ok(())
1199	}
1200
1201	/// Return a scalar byte value suitable for use as a literal in a byte
1202	/// character class.
1203	fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
1204	match self.ast_literal_to_scalar(ast)? {
1205	Either::Right(byte) => Ok(byte),
1206	Either::Left(ch) => {
1207	if ch.is_ascii() {
1208	Ok(u8::try_from(ch).unwrap())
1209	} else {
1210	// We can't feasibly support Unicode in
1211	// byte oriented classes. Byte classes don't
1212	// do Unicode case folding.
1213	Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
1214	}
1215	}
1216	}
1217	}
1218	}
1219
1220	/// A translator's representation of a regular expression's flags at any given
1221	/// moment in time.
1222	///
1223	/// Each flag can be in one of three states: absent, present but disabled or
1224	/// present but enabled.
1225	#[derive(Clone, Copy, Debug, Default)]
1226	struct Flags {
1227	case_insensitive: Option<bool>,
1228	multi_line: Option<bool>,
1229	dot_matches_new_line: Option<bool>,
1230	swap_greed: Option<bool>,
1231	unicode: Option<bool>,
1232	crlf: Option<bool>,
1233	// Note that `ignore_whitespace` is omitted here because it is handled
1234	// entirely in the parser.
1235	}
1236
1237	impl Flags {
1238	fn from_ast(ast: &ast::Flags) -> Flags {
1239	let mut flags = Flags::default();
1240	let mut enable = `true`;
1241	for item in &ast.items {
1242	match item.kind {
1243	ast::FlagsItemKind::Negation => {
1244	enable = `false`;
1245	}
1246	ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1247	flags.case_insensitive = Some(enable);
1248	}
1249	ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1250	flags.multi_line = Some(enable);
1251	}
1252	ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1253	flags.dot_matches_new_line = Some(enable);
1254	}
1255	ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1256	flags.swap_greed = Some(enable);
1257	}
1258	ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1259	flags.unicode = Some(enable);
1260	}
1261	ast::FlagsItemKind::Flag(ast::Flag::CRLF) => {
1262	flags.crlf = Some(enable);
1263	}
1264	ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1265	}
1266	}
1267	flags
1268	}
1269
1270	fn merge(&mut self, previous: &Flags) {
1271	if self.case_insensitive.is_none() {
1272	self.case_insensitive = previous.case_insensitive;
1273	}
1274	if self.multi_line.is_none() {
1275	self.multi_line = previous.multi_line;
1276	}
1277	if self.dot_matches_new_line.is_none() {
1278	self.dot_matches_new_line = previous.dot_matches_new_line;
1279	}
1280	if self.swap_greed.is_none() {
1281	self.swap_greed = previous.swap_greed;
1282	}
1283	if self.unicode.is_none() {
1284	self.unicode = previous.unicode;
1285	}
1286	if self.crlf.is_none() {
1287	self.crlf = previous.crlf;
1288	}
1289	}
1290
1291	fn case_insensitive(&self) -> bool {
1292	self.case_insensitive.unwrap_or(`false`)
1293	}
1294
1295	fn multi_line(&self) -> bool {
1296	self.multi_line.unwrap_or(`false`)
1297	}
1298
1299	fn dot_matches_new_line(&self) -> bool {
1300	self.dot_matches_new_line.unwrap_or(`false`)
1301	}
1302
1303	fn swap_greed(&self) -> bool {
1304	self.swap_greed.unwrap_or(`false`)
1305	}
1306
1307	fn unicode(&self) -> bool {
1308	self.unicode.unwrap_or(`true`)
1309	}
1310
1311	fn crlf(&self) -> bool {
1312	self.crlf.unwrap_or(`false`)
1313	}
1314	}
1315
1316	fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1317	let ranges: Vec<_> = ascii_classimpl Iterator(kind)
1318	.map(\|(s: u8, e: u8)\| hir::ClassBytesRange::new(start:s, end:e))
1319	.collect();
1320	hir::ClassBytes::new(ranges)
1321	}
1322
1323	fn ascii_class(kind: &ast::ClassAsciiKind) -> impl Iterator<Item = (u8, u8)> {
1324	use crate::ast::ClassAsciiKind::*;
1325
1326	let slice: &'static [(u8, u8)] = match *kind {
1327	Alnum => &[(b'0', b'9'), (b'A', b'Z'), (b'a', b'z')],
1328	Alpha => &[(b'A', b'Z'), (b'a', b'z')],
1329	Ascii => &[(b'`\x00`', b'`\x7F`')],
1330	Blank => &[(b'`\t`', b'`\t`'), (b' ', b' ')],
1331	Cntrl => &[(b'`\x00`', b'`\x1F`'), (b'`\x7F`', b'`\x7F`')],
1332	Digit => &[(b'0', b'9')],
1333	Graph => &[(b'!', b'~')],
1334	Lower => &[(b'a', b'z')],
1335	Print => &[(b' ', b'~')],
1336	Punct => &[(b'!', b'/'), (b':', b'@'), (b'[', b'`'), (b'{', b'~')],
1337	Space => &[
1338	(b'`\t`', b'`\t`'),
1339	(b'`\n`', b'`\n`'),
1340	(b'`\x0B`', b'`\x0B`'),
1341	(b'`\x0C`', b'`\x0C`'),
1342	(b'`\r`', b'`\r`'),
1343	(b' ', b' '),
1344	],
1345	Upper => &[(b'A', b'Z')],
1346	Word => &[(b'0', b'9'), (b'A', b'Z'), (b'_', b'_'), (b'a', b'z')],
1347	Xdigit => &[(b'0', b'9'), (b'A', b'F'), (b'a', b'f')],
1348	};
1349	slice.iter().copied()
1350	}
1351
1352	fn ascii_class_as_chars(
1353	kind: &ast::ClassAsciiKind,
1354	) -> impl Iterator<Item = (char, char)> {
1355	ascii_class(kind).map(\|(s: u8, e: u8)\| (char::from(s), char::from(e)))
1356	}
1357
1358	#[cfg(test)]
1359	mod tests {
1360	use crate::{
1361	ast::{self, parse::ParserBuilder, Ast, Position, Span},
1362	hir::{self, Hir, HirKind, Look, Properties},
1363	unicode::{self, ClassQuery},
1364	};
1365
1366	use super::*;
1367
1368	// We create these errors to compare with real hir::Errors in the tests.
1369	// We define equality between TestError and hir::Error to disregard the
1370	// pattern string in hir::Error, which is annoying to provide in tests.
1371	#[derive(Clone, Debug)]
1372	struct TestError {
1373	span: Span,
1374	kind: hir::ErrorKind,
1375	}
1376
1377	impl PartialEq<hir::Error> for TestError {
1378	fn eq(&self, other: &hir::Error) -> bool {
1379	self.span == other.span && self.kind == other.kind
1380	}
1381	}
1382
1383	impl PartialEq<TestError> for hir::Error {
1384	fn eq(&self, other: &TestError) -> bool {
1385	self.span == other.span && self.kind == other.kind
1386	}
1387	}
1388
1389	fn parse(pattern: &str) -> Ast {
1390	ParserBuilder::new().octal(`true`).build().parse(pattern).unwrap()
1391	}
1392
1393	fn t(pattern: &str) -> Hir {
1394	TranslatorBuilder::new()
1395	.utf8(`true`)
1396	.build()
1397	.translate(pattern, &parse(pattern))
1398	.unwrap()
1399	}
1400
1401	fn t_err(pattern: &str) -> hir::Error {
1402	TranslatorBuilder::new()
1403	.utf8(`true`)
1404	.build()
1405	.translate(pattern, &parse(pattern))
1406	.unwrap_err()
1407	}
1408
1409	fn t_bytes(pattern: &str) -> Hir {
1410	TranslatorBuilder::new()
1411	.utf8(`false`)
1412	.build()
1413	.translate(pattern, &parse(pattern))
1414	.unwrap()
1415	}
1416
1417	fn props(pattern: &str) -> Properties {
1418	t(pattern).properties().clone()
1419	}
1420
1421	fn props_bytes(pattern: &str) -> Properties {
1422	t_bytes(pattern).properties().clone()
1423	}
1424
1425	fn hir_lit(s: &str) -> Hir {
1426	hir_blit(s.as_bytes())
1427	}
1428
1429	fn hir_blit(s: &[u8]) -> Hir {
1430	Hir::literal(s)
1431	}
1432
1433	fn hir_capture(index: u32, expr: Hir) -> Hir {
1434	Hir::capture(hir::Capture { index, name: None, sub: Box::new(expr) })
1435	}
1436
1437	fn hir_capture_name(index: u32, name: &str, expr: Hir) -> Hir {
1438	Hir::capture(hir::Capture {
1439	index,
1440	name: Some(name.into()),
1441	sub: Box::new(expr),
1442	})
1443	}
1444
1445	fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1446	Hir::repetition(hir::Repetition {
1447	min: `0`,
1448	max: Some(`1`),
1449	greedy,
1450	sub: Box::new(expr),
1451	})
1452	}
1453
1454	fn hir_star(greedy: bool, expr: Hir) -> Hir {
1455	Hir::repetition(hir::Repetition {
1456	min: `0`,
1457	max: None,
1458	greedy,
1459	sub: Box::new(expr),
1460	})
1461	}
1462
1463	fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1464	Hir::repetition(hir::Repetition {
1465	min: `1`,
1466	max: None,
1467	greedy,
1468	sub: Box::new(expr),
1469	})
1470	}
1471
1472	fn hir_range(greedy: bool, min: u32, max: Option<u32>, expr: Hir) -> Hir {
1473	Hir::repetition(hir::Repetition {
1474	min,
1475	max,
1476	greedy,
1477	sub: Box::new(expr),
1478	})
1479	}
1480
1481	fn hir_alt(alts: Vec<Hir>) -> Hir {
1482	Hir::alternation(alts)
1483	}
1484
1485	fn hir_cat(exprs: Vec<Hir>) -> Hir {
1486	Hir::concat(exprs)
1487	}
1488
1489	#[allow(dead_code)]
1490	fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1491	Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1492	}
1493
1494	#[allow(dead_code)]
1495	fn hir_uclass_perl_word() -> Hir {
1496	Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1497	}
1498
1499	fn hir_ascii_uclass(kind: &ast::ClassAsciiKind) -> Hir {
1500	Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(
1501	ascii_class_as_chars(kind)
1502	.map(\|(s, e)\| hir::ClassUnicodeRange::new(s, e)),
1503	)))
1504	}
1505
1506	fn hir_ascii_bclass(kind: &ast::ClassAsciiKind) -> Hir {
1507	Hir::class(hir::Class::Bytes(hir::ClassBytes::new(
1508	ascii_class(kind).map(\|(s, e)\| hir::ClassBytesRange::new(s, e)),
1509	)))
1510	}
1511
1512	fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1513	Hir::class(uclass(ranges))
1514	}
1515
1516	fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1517	Hir::class(bclass(ranges))
1518	}
1519
1520	fn hir_case_fold(expr: Hir) -> Hir {
1521	match expr.into_kind() {
1522	HirKind::Class(mut cls) => {
1523	cls.case_fold_simple();
1524	Hir::class(cls)
1525	}
1526	_ => panic!("cannot case fold non-class Hir expr"),
1527	}
1528	}
1529
1530	fn hir_negate(expr: Hir) -> Hir {
1531	match expr.into_kind() {
1532	HirKind::Class(mut cls) => {
1533	cls.negate();
1534	Hir::class(cls)
1535	}
1536	_ => panic!("cannot negate non-class Hir expr"),
1537	}
1538	}
1539
1540	fn uclass(ranges: &[(char, char)]) -> hir::Class {
1541	let ranges: Vec<hir::ClassUnicodeRange> = ranges
1542	.iter()
1543	.map(\|&(s, e)\| hir::ClassUnicodeRange::new(s, e))
1544	.collect();
1545	hir::Class::Unicode(hir::ClassUnicode::new(ranges))
1546	}
1547
1548	fn bclass(ranges: &[(u8, u8)]) -> hir::Class {
1549	let ranges: Vec<hir::ClassBytesRange> = ranges
1550	.iter()
1551	.map(\|&(s, e)\| hir::ClassBytesRange::new(s, e))
1552	.collect();
1553	hir::Class::Bytes(hir::ClassBytes::new(ranges))
1554	}
1555
1556	#[cfg(feature = "unicode-case")]
1557	fn class_case_fold(mut cls: hir::Class) -> Hir {
1558	cls.case_fold_simple();
1559	Hir::class(cls)
1560	}
1561
1562	fn class_negate(mut cls: hir::Class) -> Hir {
1563	cls.negate();
1564	Hir::class(cls)
1565	}
1566
1567	#[allow(dead_code)]
1568	fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1569	use crate::hir::Class::{Bytes, Unicode};
1570
1571	match (expr1.into_kind(), expr2.into_kind()) {
1572	(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1573	c1.union(&c2);
1574	Hir::class(hir::Class::Unicode(c1))
1575	}
1576	(HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1577	c1.union(&c2);
1578	Hir::class(hir::Class::Bytes(c1))
1579	}
1580	_ => panic!("cannot union non-class Hir exprs"),
1581	}
1582	}
1583
1584	#[allow(dead_code)]
1585	fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1586	use crate::hir::Class::{Bytes, Unicode};
1587
1588	match (expr1.into_kind(), expr2.into_kind()) {
1589	(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1590	c1.difference(&c2);
1591	Hir::class(hir::Class::Unicode(c1))
1592	}
1593	(HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1594	c1.difference(&c2);
1595	Hir::class(hir::Class::Bytes(c1))
1596	}
1597	_ => panic!("cannot difference non-class Hir exprs"),
1598	}
1599	}
1600
1601	fn hir_look(look: hir::Look) -> Hir {
1602	Hir::look(look)
1603	}
1604
1605	#[test]
1606	fn empty() {
1607	assert_eq!(t(""), Hir::empty());
1608	assert_eq!(t("(?i)"), Hir::empty());
1609	assert_eq!(t("()"), hir_capture(`1`, Hir::empty()));
1610	assert_eq!(t("(?:)"), Hir::empty());
1611	assert_eq!(t("(?P<wat>)"), hir_capture_name(`1`, "wat", Hir::empty()));
1612	assert_eq!(t("\|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1613	assert_eq!(
1614	t("()\|()"),
1615	hir_alt(vec![
1616	hir_capture(`1`, Hir::empty()),
1617	hir_capture(`2`, Hir::empty()),
1618	])
1619	);
1620	assert_eq!(
1621	t("(\|b)"),
1622	hir_capture(`1`, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1623	);
1624	assert_eq!(
1625	t("(a\|)"),
1626	hir_capture(`1`, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1627	);
1628	assert_eq!(
1629	t("(a\|\|c)"),
1630	hir_capture(
1631	`1`,
1632	hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1633	)
1634	);
1635	assert_eq!(
1636	t("(\|\|)"),
1637	hir_capture(
1638	`1`,
1639	hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1640	)
1641	);
1642	}
1643
1644	#[test]
1645	fn literal() {
1646	assert_eq!(t("a"), hir_lit("a"));
1647	assert_eq!(t("(?-u)a"), hir_lit("a"));
1648	assert_eq!(t("☃"), hir_lit("☃"));
1649	assert_eq!(t("abcd"), hir_lit("abcd"));
1650
1651	assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1652	assert_eq!(t_bytes("(?-u)`\x61`"), hir_lit("a"));
1653	assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1654	assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"`\xFF`"));
1655
1656	assert_eq!(t("(?-u)☃"), hir_lit("☃"));
1657	assert_eq!(
1658	t_err(r"(?-u)\xFF"),
1659	TestError {
1660	kind: hir::ErrorKind::InvalidUtf8,
1661	span: Span::new(
1662	Position::new(`5`, `1`, `6`),
1663	Position::new(`9`, `1`, `10`)
1664	),
1665	}
1666	);
1667	}
1668
1669	#[test]
1670	fn literal_case_insensitive() {
1671	#[cfg(feature = "unicode-case")]
1672	assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1673	#[cfg(feature = "unicode-case")]
1674	assert_eq!(t("(?i:a)"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
1675	#[cfg(feature = "unicode-case")]
1676	assert_eq!(
1677	t("a(?i)a(?-i)a"),
1678	hir_cat(vec![
1679	hir_lit("a"),
1680	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1681	hir_lit("a"),
1682	])
1683	);
1684	#[cfg(feature = "unicode-case")]
1685	assert_eq!(
1686	t("(?i)ab@c"),
1687	hir_cat(vec![
1688	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1689	hir_uclass(&[('B', 'B'), ('b', 'b')]),
1690	hir_lit("@"),
1691	hir_uclass(&[('C', 'C'), ('c', 'c')]),
1692	])
1693	);
1694	#[cfg(feature = "unicode-case")]
1695	assert_eq!(
1696	t("(?i)β"),
1697	hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1698	);
1699
1700	assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1701	#[cfg(feature = "unicode-case")]
1702	assert_eq!(
1703	t("(?-u)a(?i)a(?-i)a"),
1704	hir_cat(vec![
1705	hir_lit("a"),
1706	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1707	hir_lit("a"),
1708	])
1709	);
1710	assert_eq!(
1711	t("(?i-u)ab@c"),
1712	hir_cat(vec![
1713	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1714	hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1715	hir_lit("@"),
1716	hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1717	])
1718	);
1719
1720	assert_eq!(
1721	t_bytes("(?i-u)a"),
1722	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1723	);
1724	assert_eq!(
1725	t_bytes("(?i-u)`\x61`"),
1726	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1727	);
1728	assert_eq!(
1729	t_bytes(r"(?i-u)\x61"),
1730	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1731	);
1732	assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"`\xFF`"));
1733
1734	assert_eq!(t("(?i-u)β"), hir_lit("β"),);
1735	}
1736
1737	#[test]
1738	fn dot() {
1739	assert_eq!(
1740	t("."),
1741	hir_uclass(&[('`\0`', '`\t`'), ('`\x0B`', '`\u{10FFFF}`')])
1742	);
1743	assert_eq!(
1744	t("(?R)."),
1745	hir_uclass(&[
1746	('`\0`', '`\t`'),
1747	('`\x0B`', '`\x0C`'),
1748	('`\x0E`', '`\u{10FFFF}`'),
1749	])
1750	);
1751	assert_eq!(t("(?s)."), hir_uclass(&[('`\0`', '`\u{10FFFF}`')]));
1752	assert_eq!(t("(?Rs)."), hir_uclass(&[('`\0`', '`\u{10FFFF}`')]));
1753	assert_eq!(
1754	t_bytes("(?-u)."),
1755	hir_bclass(&[(b'`\0`', b'`\t`'), (b'`\x0B`', b'`\xFF`')])
1756	);
1757	assert_eq!(
1758	t_bytes("(?R-u)."),
1759	hir_bclass(&[
1760	(b'`\0`', b'`\t`'),
1761	(b'`\x0B`', b'`\x0C`'),
1762	(b'`\x0E`', b'`\xFF`'),
1763	])
1764	);
1765	assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'`\0`', b'`\xFF`'),]));
1766	assert_eq!(t_bytes("(?Rs-u)."), hir_bclass(&[(b'`\0`', b'`\xFF`'),]));
1767
1768	// If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1769	assert_eq!(
1770	t_err("(?-u)."),
1771	TestError {
1772	kind: hir::ErrorKind::InvalidUtf8,
1773	span: Span::new(
1774	Position::new(`5`, `1`, `6`),
1775	Position::new(`6`, `1`, `7`)
1776	),
1777	}
1778	);
1779	assert_eq!(
1780	t_err("(?R-u)."),
1781	TestError {
1782	kind: hir::ErrorKind::InvalidUtf8,
1783	span: Span::new(
1784	Position::new(`6`, `1`, `7`),
1785	Position::new(`7`, `1`, `8`)
1786	),
1787	}
1788	);
1789	assert_eq!(
1790	t_err("(?s-u)."),
1791	TestError {
1792	kind: hir::ErrorKind::InvalidUtf8,
1793	span: Span::new(
1794	Position::new(`6`, `1`, `7`),
1795	Position::new(`7`, `1`, `8`)
1796	),
1797	}
1798	);
1799	assert_eq!(
1800	t_err("(?Rs-u)."),
1801	TestError {
1802	kind: hir::ErrorKind::InvalidUtf8,
1803	span: Span::new(
1804	Position::new(`7`, `1`, `8`),
1805	Position::new(`8`, `1`, `9`)
1806	),
1807	}
1808	);
1809	}
1810
1811	#[test]
1812	fn assertions() {
1813	assert_eq!(t("^"), hir_look(hir::Look::Start));
1814	assert_eq!(t("$"), hir_look(hir::Look::End));
1815	assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1816	assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1817	assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1818	assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1819	assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1820	assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1821
1822	assert_eq!(t(r"\b"), hir_look(hir::Look::WordUnicode));
1823	assert_eq!(t(r"\B"), hir_look(hir::Look::WordUnicodeNegate));
1824	assert_eq!(t(r"(?-u)\b"), hir_look(hir::Look::WordAscii));
1825	assert_eq!(t(r"(?-u)\B"), hir_look(hir::Look::WordAsciiNegate));
1826	}
1827
1828	#[test]
1829	fn group() {
1830	assert_eq!(t("(a)"), hir_capture(`1`, hir_lit("a")));
1831	assert_eq!(
1832	t("(a)(b)"),
1833	hir_cat(vec![
1834	hir_capture(`1`, hir_lit("a")),
1835	hir_capture(`2`, hir_lit("b")),
1836	])
1837	);
1838	assert_eq!(
1839	t("(a)\|(b)"),
1840	hir_alt(vec![
1841	hir_capture(`1`, hir_lit("a")),
1842	hir_capture(`2`, hir_lit("b")),
1843	])
1844	);
1845	assert_eq!(t("(?P<foo>)"), hir_capture_name(`1`, "foo", Hir::empty()));
1846	assert_eq!(t("(?P<foo>a)"), hir_capture_name(`1`, "foo", hir_lit("a")));
1847	assert_eq!(
1848	t("(?P<foo>a)(?P<bar>b)"),
1849	hir_cat(vec![
1850	hir_capture_name(`1`, "foo", hir_lit("a")),
1851	hir_capture_name(`2`, "bar", hir_lit("b")),
1852	])
1853	);
1854	assert_eq!(t("(?:)"), Hir::empty());
1855	assert_eq!(t("(?:a)"), hir_lit("a"));
1856	assert_eq!(
1857	t("(?:a)(b)"),
1858	hir_cat(vec![hir_lit("a"), hir_capture(`1`, hir_lit("b")),])
1859	);
1860	assert_eq!(
1861	t("(a)(?:b)(c)"),
1862	hir_cat(vec![
1863	hir_capture(`1`, hir_lit("a")),
1864	hir_lit("b"),
1865	hir_capture(`2`, hir_lit("c")),
1866	])
1867	);
1868	assert_eq!(
1869	t("(a)(?P<foo>b)(c)"),
1870	hir_cat(vec![
1871	hir_capture(`1`, hir_lit("a")),
1872	hir_capture_name(`2`, "foo", hir_lit("b")),
1873	hir_capture(`3`, hir_lit("c")),
1874	])
1875	);
1876	assert_eq!(t("()"), hir_capture(`1`, Hir::empty()));
1877	assert_eq!(t("((?i))"), hir_capture(`1`, Hir::empty()));
1878	assert_eq!(t("((?x))"), hir_capture(`1`, Hir::empty()));
1879	assert_eq!(
1880	t("(((?x)))"),
1881	hir_capture(`1`, hir_capture(`2`, Hir::empty()))
1882	);
1883	}
1884
1885	#[test]
1886	fn line_anchors() {
1887	assert_eq!(t("^"), hir_look(hir::Look::Start));
1888	assert_eq!(t("$"), hir_look(hir::Look::End));
1889	assert_eq!(t(r"\A"), hir_look(hir::Look::Start));
1890	assert_eq!(t(r"\z"), hir_look(hir::Look::End));
1891
1892	assert_eq!(t(r"(?m)\A"), hir_look(hir::Look::Start));
1893	assert_eq!(t(r"(?m)\z"), hir_look(hir::Look::End));
1894	assert_eq!(t("(?m)^"), hir_look(hir::Look::StartLF));
1895	assert_eq!(t("(?m)$"), hir_look(hir::Look::EndLF));
1896
1897	assert_eq!(t(r"(?R)\A"), hir_look(hir::Look::Start));
1898	assert_eq!(t(r"(?R)\z"), hir_look(hir::Look::End));
1899	assert_eq!(t("(?R)^"), hir_look(hir::Look::Start));
1900	assert_eq!(t("(?R)$"), hir_look(hir::Look::End));
1901
1902	assert_eq!(t(r"(?Rm)\A"), hir_look(hir::Look::Start));
1903	assert_eq!(t(r"(?Rm)\z"), hir_look(hir::Look::End));
1904	assert_eq!(t("(?Rm)^"), hir_look(hir::Look::StartCRLF));
1905	assert_eq!(t("(?Rm)$"), hir_look(hir::Look::EndCRLF));
1906	}
1907
1908	#[test]
1909	fn flags() {
1910	#[cfg(feature = "unicode-case")]
1911	assert_eq!(
1912	t("(?i:a)a"),
1913	hir_cat(
1914	vec![hir_uclass(&[('A', 'A'), ('a', 'a')]), hir_lit("a"),]
1915	)
1916	);
1917	assert_eq!(
1918	t("(?i-u:a)β"),
1919	hir_cat(vec![
1920	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1921	hir_lit("β"),
1922	])
1923	);
1924	assert_eq!(
1925	t("(?:(?i-u)a)b"),
1926	hir_cat(vec![
1927	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1928	hir_lit("b"),
1929	])
1930	);
1931	assert_eq!(
1932	t("((?i-u)a)b"),
1933	hir_cat(vec![
1934	hir_capture(`1`, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1935	hir_lit("b"),
1936	])
1937	);
1938	#[cfg(feature = "unicode-case")]
1939	assert_eq!(
1940	t("(?i)(?-i:a)a"),
1941	hir_cat(
1942	vec![hir_lit("a"), hir_uclass(&[('A', 'A'), ('a', 'a')]),]
1943	)
1944	);
1945	#[cfg(feature = "unicode-case")]
1946	assert_eq!(
1947	t("(?im)a^"),
1948	hir_cat(vec![
1949	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1950	hir_look(hir::Look::StartLF),
1951	])
1952	);
1953	#[cfg(feature = "unicode-case")]
1954	assert_eq!(
1955	t("(?im)a^(?i-m)a^"),
1956	hir_cat(vec![
1957	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1958	hir_look(hir::Look::StartLF),
1959	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1960	hir_look(hir::Look::Start),
1961	])
1962	);
1963	assert_eq!(
1964	t("(?U)aa?(?-U)aa?"),
1965	hir_cat(vec![
1966	hir_star(`false`, hir_lit("a")),
1967	hir_star(`true`, hir_lit("a")),
1968	hir_star(`true`, hir_lit("a")),
1969	hir_star(`false`, hir_lit("a")),
1970	])
1971	);
1972	#[cfg(feature = "unicode-case")]
1973	assert_eq!(
1974	t("(?:a(?i)a)a"),
1975	hir_cat(vec![
1976	hir_cat(vec![
1977	hir_lit("a"),
1978	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1979	]),
1980	hir_lit("a"),
1981	])
1982	);
1983	#[cfg(feature = "unicode-case")]
1984	assert_eq!(
1985	t("(?i)(?:a(?-i)a)a"),
1986	hir_cat(vec![
1987	hir_cat(vec![
1988	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1989	hir_lit("a"),
1990	]),
1991	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1992	])
1993	);
1994	}
1995
1996	#[test]
1997	fn escape() {
1998	assert_eq!(
1999	t(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#"),
2000	hir_lit(r"\.+*?()\|[]{}^$#")
2001	);
2002	}
2003
2004	#[test]
2005	fn repetition() {
2006	assert_eq!(t("a?"), hir_quest(`true`, hir_lit("a")));
2007	assert_eq!(t("a*"), hir_star(`true`, hir_lit("a")));
2008	assert_eq!(t("a+"), hir_plus(`true`, hir_lit("a")));
2009	assert_eq!(t("a??"), hir_quest(`false`, hir_lit("a")));
2010	assert_eq!(t("a*?"), hir_star(`false`, hir_lit("a")));
2011	assert_eq!(t("a+?"), hir_plus(`false`, hir_lit("a")));
2012
2013	assert_eq!(t("a{1}"), hir_range(`true`, `1`, Some(`1`), hir_lit("a"),));
2014	assert_eq!(t("a{1,}"), hir_range(`true`, `1`, None, hir_lit("a"),));
2015	assert_eq!(t("a{1,2}"), hir_range(`true`, `1`, Some(`2`), hir_lit("a"),));
2016	assert_eq!(t("a{1}?"), hir_range(`false`, `1`, Some(`1`), hir_lit("a"),));
2017	assert_eq!(t("a{1,}?"), hir_range(`false`, `1`, None, hir_lit("a"),));
2018	assert_eq!(t("a{1,2}?"), hir_range(`false`, `1`, Some(`2`), hir_lit("a"),));
2019
2020	assert_eq!(
2021	t("ab?"),
2022	hir_cat(vec![hir_lit("a"), hir_quest(`true`, hir_lit("b")),])
2023	);
2024	assert_eq!(t("(ab)?"), hir_quest(`true`, hir_capture(`1`, hir_lit("ab"))));
2025	assert_eq!(
2026	t("a\|b?"),
2027	hir_alt(vec![hir_lit("a"), hir_quest(`true`, hir_lit("b")),])
2028	);
2029	}
2030
2031	#[test]
2032	fn cat_alt() {
2033	let a = \|\| hir_look(hir::Look::Start);
2034	let b = \|\| hir_look(hir::Look::End);
2035	let c = \|\| hir_look(hir::Look::WordUnicode);
2036	let d = \|\| hir_look(hir::Look::WordUnicodeNegate);
2037
2038	assert_eq!(t("(^$)"), hir_capture(`1`, hir_cat(vec![a(), b()])));
2039	assert_eq!(t("^\|$"), hir_alt(vec![a(), b()]));
2040	assert_eq!(t(r"^\|$\|\b"), hir_alt(vec![a(), b(), c()]));
2041	assert_eq!(
2042	t(r"^$\|$\b\|\b\B"),
2043	hir_alt(vec![
2044	hir_cat(vec![a(), b()]),
2045	hir_cat(vec![b(), c()]),
2046	hir_cat(vec![c(), d()]),
2047	])
2048	);
2049	assert_eq!(t("(^\|$)"), hir_capture(`1`, hir_alt(vec![a(), b()])));
2050	assert_eq!(
2051	t(r"(^\|$\|\b)"),
2052	hir_capture(`1`, hir_alt(vec![a(), b(), c()]))
2053	);
2054	assert_eq!(
2055	t(r"(^$\|$\b\|\b\B)"),
2056	hir_capture(
2057	`1`,
2058	hir_alt(vec![
2059	hir_cat(vec![a(), b()]),
2060	hir_cat(vec![b(), c()]),
2061	hir_cat(vec![c(), d()]),
2062	])
2063	)
2064	);
2065	assert_eq!(
2066	t(r"(^$\|($\b\|(\b\B)))"),
2067	hir_capture(
2068	`1`,
2069	hir_alt(vec![
2070	hir_cat(vec![a(), b()]),
2071	hir_capture(
2072	`2`,
2073	hir_alt(vec![
2074	hir_cat(vec![b(), c()]),
2075	hir_capture(`3`, hir_cat(vec![c(), d()])),
2076	])
2077	),
2078	])
2079	)
2080	);
2081	}
2082
2083	// Tests the HIR transformation of things like '[a-z]\|[A-Z]' into
2084	// '[A-Za-z]'. In other words, an alternation of just classes is always
2085	// equivalent to a single class corresponding to the union of the branches
2086	// in that class. (Unless some branches match invalid UTF-8 and others
2087	// match non-ASCII Unicode.)
2088	#[test]
2089	fn cat_class_flattened() {
2090	assert_eq!(t(r"[a-z]\|[A-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2091	// Combining all of the letter properties should give us the one giant
2092	// letter property.
2093	#[cfg(feature = "unicode-gencat")]
2094	assert_eq!(
2095	t(r"(?x)
2096	\p{Lowercase_Letter}
2097	\|\p{Uppercase_Letter}
2098	\|\p{Titlecase_Letter}
2099	\|\p{Modifier_Letter}
2100	\|\p{Other_Letter}
2101	"),
2102	hir_uclass_query(ClassQuery::Binary("letter"))
2103	);
2104	// Byte classes that can truly match invalid UTF-8 cannot be combined
2105	// with Unicode classes.
2106	assert_eq!(
2107	t_bytes(r"[Δδ]\|(?-u:[\x90-\xFF])\|[Λλ]"),
2108	hir_alt(vec![
2109	hir_uclass(&[('Δ', 'Δ'), ('δ', 'δ')]),
2110	hir_bclass(&[(b'`\x90`', b'`\xFF`')]),
2111	hir_uclass(&[('Λ', 'Λ'), ('λ', 'λ')]),
2112	])
2113	);
2114	// Byte classes on their own can be combined, even if some are ASCII
2115	// and others are invalid UTF-8.
2116	assert_eq!(
2117	t_bytes(r"[a-z]\|(?-u:[\x90-\xFF])\|[A-Z]"),
2118	hir_bclass(&[(b'A', b'Z'), (b'a', b'z'), (b'`\x90`', b'`\xFF`')]),
2119	);
2120	}
2121
2122	#[test]
2123	fn class_ascii() {
2124	assert_eq!(
2125	t("[[:alnum:]]"),
2126	hir_ascii_uclass(&ast::ClassAsciiKind::Alnum)
2127	);
2128	assert_eq!(
2129	t("[[:alpha:]]"),
2130	hir_ascii_uclass(&ast::ClassAsciiKind::Alpha)
2131	);
2132	assert_eq!(
2133	t("[[:ascii:]]"),
2134	hir_ascii_uclass(&ast::ClassAsciiKind::Ascii)
2135	);
2136	assert_eq!(
2137	t("[[:blank:]]"),
2138	hir_ascii_uclass(&ast::ClassAsciiKind::Blank)
2139	);
2140	assert_eq!(
2141	t("[[:cntrl:]]"),
2142	hir_ascii_uclass(&ast::ClassAsciiKind::Cntrl)
2143	);
2144	assert_eq!(
2145	t("[[:digit:]]"),
2146	hir_ascii_uclass(&ast::ClassAsciiKind::Digit)
2147	);
2148	assert_eq!(
2149	t("[[:graph:]]"),
2150	hir_ascii_uclass(&ast::ClassAsciiKind::Graph)
2151	);
2152	assert_eq!(
2153	t("[[:lower:]]"),
2154	hir_ascii_uclass(&ast::ClassAsciiKind::Lower)
2155	);
2156	assert_eq!(
2157	t("[[:print:]]"),
2158	hir_ascii_uclass(&ast::ClassAsciiKind::Print)
2159	);
2160	assert_eq!(
2161	t("[[:punct:]]"),
2162	hir_ascii_uclass(&ast::ClassAsciiKind::Punct)
2163	);
2164	assert_eq!(
2165	t("[[:space:]]"),
2166	hir_ascii_uclass(&ast::ClassAsciiKind::Space)
2167	);
2168	assert_eq!(
2169	t("[[:upper:]]"),
2170	hir_ascii_uclass(&ast::ClassAsciiKind::Upper)
2171	);
2172	assert_eq!(
2173	t("[[:word:]]"),
2174	hir_ascii_uclass(&ast::ClassAsciiKind::Word)
2175	);
2176	assert_eq!(
2177	t("[[:xdigit:]]"),
2178	hir_ascii_uclass(&ast::ClassAsciiKind::Xdigit)
2179	);
2180
2181	assert_eq!(
2182	t("[[:^lower:]]"),
2183	hir_negate(hir_ascii_uclass(&ast::ClassAsciiKind::Lower))
2184	);
2185	#[cfg(feature = "unicode-case")]
2186	assert_eq!(
2187	t("(?i)[[:lower:]]"),
2188	hir_uclass(&[
2189	('A', 'Z'),
2190	('a', 'z'),
2191	('`\u{17F}`', '`\u{17F}`'),
2192	('`\u{212A}`', '`\u{212A}`'),
2193	])
2194	);
2195
2196	assert_eq!(
2197	t("(?-u)[[:lower:]]"),
2198	hir_ascii_bclass(&ast::ClassAsciiKind::Lower)
2199	);
2200	assert_eq!(
2201	t("(?i-u)[[:lower:]]"),
2202	hir_case_fold(hir_ascii_bclass(&ast::ClassAsciiKind::Lower))
2203	);
2204
2205	assert_eq!(
2206	t_err("(?-u)[[:^lower:]]"),
2207	TestError {
2208	kind: hir::ErrorKind::InvalidUtf8,
2209	span: Span::new(
2210	Position::new(`6`, `1`, `7`),
2211	Position::new(`16`, `1`, `17`)
2212	),
2213	}
2214	);
2215	assert_eq!(
2216	t_err("(?i-u)[[:^lower:]]"),
2217	TestError {
2218	kind: hir::ErrorKind::InvalidUtf8,
2219	span: Span::new(
2220	Position::new(`7`, `1`, `8`),
2221	Position::new(`17`, `1`, `18`)
2222	),
2223	}
2224	);
2225	}
2226
2227	#[test]
2228	fn class_ascii_multiple() {
2229	// See: https://github.com/rust-lang/regex/issues/680
2230	assert_eq!(
2231	t("[[:alnum:][:^ascii:]]"),
2232	hir_union(
2233	hir_ascii_uclass(&ast::ClassAsciiKind::Alnum),
2234	hir_uclass(&[('`\u{80}`', '`\u{10FFFF}`')]),
2235	),
2236	);
2237	assert_eq!(
2238	t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
2239	hir_union(
2240	hir_ascii_bclass(&ast::ClassAsciiKind::Alnum),
2241	hir_bclass(&[(`0x80`, `0xFF`)]),
2242	),
2243	);
2244	}
2245
2246	#[test]
2247	#[cfg(feature = "unicode-perl")]
2248	fn class_perl_unicode() {
2249	// Unicode
2250	assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
2251	assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
2252	assert_eq!(t(r"\w"), hir_uclass_perl_word());
2253	#[cfg(feature = "unicode-case")]
2254	assert_eq!(
2255	t(r"(?i)\d"),
2256	hir_uclass_query(ClassQuery::Binary("digit"))
2257	);
2258	#[cfg(feature = "unicode-case")]
2259	assert_eq!(
2260	t(r"(?i)\s"),
2261	hir_uclass_query(ClassQuery::Binary("space"))
2262	);
2263	#[cfg(feature = "unicode-case")]
2264	assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2265
2266	// Unicode, negated
2267	assert_eq!(
2268	t(r"\D"),
2269	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2270	);
2271	assert_eq!(
2272	t(r"\S"),
2273	hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2274	);
2275	assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2276	#[cfg(feature = "unicode-case")]
2277	assert_eq!(
2278	t(r"(?i)\D"),
2279	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2280	);
2281	#[cfg(feature = "unicode-case")]
2282	assert_eq!(
2283	t(r"(?i)\S"),
2284	hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2285	);
2286	#[cfg(feature = "unicode-case")]
2287	assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2288	}
2289
2290	#[test]
2291	fn class_perl_ascii() {
2292	// ASCII only
2293	assert_eq!(
2294	t(r"(?-u)\d"),
2295	hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2296	);
2297	assert_eq!(
2298	t(r"(?-u)\s"),
2299	hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2300	);
2301	assert_eq!(
2302	t(r"(?-u)\w"),
2303	hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2304	);
2305	assert_eq!(
2306	t(r"(?i-u)\d"),
2307	hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
2308	);
2309	assert_eq!(
2310	t(r"(?i-u)\s"),
2311	hir_ascii_bclass(&ast::ClassAsciiKind::Space)
2312	);
2313	assert_eq!(
2314	t(r"(?i-u)\w"),
2315	hir_ascii_bclass(&ast::ClassAsciiKind::Word)
2316	);
2317
2318	// ASCII only, negated
2319	assert_eq!(
2320	t_bytes(r"(?-u)\D"),
2321	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2322	);
2323	assert_eq!(
2324	t_bytes(r"(?-u)\S"),
2325	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2326	);
2327	assert_eq!(
2328	t_bytes(r"(?-u)\W"),
2329	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2330	);
2331	assert_eq!(
2332	t_bytes(r"(?i-u)\D"),
2333	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
2334	);
2335	assert_eq!(
2336	t_bytes(r"(?i-u)\S"),
2337	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Space))
2338	);
2339	assert_eq!(
2340	t_bytes(r"(?i-u)\W"),
2341	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
2342	);
2343
2344	// ASCII only, negated, with UTF-8 mode enabled.
2345	// In this case, negating any Perl class results in an error because
2346	// all such classes can match invalid UTF-8.
2347	assert_eq!(
2348	t_err(r"(?-u)\D"),
2349	TestError {
2350	kind: hir::ErrorKind::InvalidUtf8,
2351	span: Span::new(
2352	Position::new(`5`, `1`, `6`),
2353	Position::new(`7`, `1`, `8`),
2354	),
2355	},
2356	);
2357	assert_eq!(
2358	t_err(r"(?-u)\S"),
2359	TestError {
2360	kind: hir::ErrorKind::InvalidUtf8,
2361	span: Span::new(
2362	Position::new(`5`, `1`, `6`),
2363	Position::new(`7`, `1`, `8`),
2364	),
2365	},
2366	);
2367	assert_eq!(
2368	t_err(r"(?-u)\W"),
2369	TestError {
2370	kind: hir::ErrorKind::InvalidUtf8,
2371	span: Span::new(
2372	Position::new(`5`, `1`, `6`),
2373	Position::new(`7`, `1`, `8`),
2374	),
2375	},
2376	);
2377	assert_eq!(
2378	t_err(r"(?i-u)\D"),
2379	TestError {
2380	kind: hir::ErrorKind::InvalidUtf8,
2381	span: Span::new(
2382	Position::new(`6`, `1`, `7`),
2383	Position::new(`8`, `1`, `9`),
2384	),
2385	},
2386	);
2387	assert_eq!(
2388	t_err(r"(?i-u)\S"),
2389	TestError {
2390	kind: hir::ErrorKind::InvalidUtf8,
2391	span: Span::new(
2392	Position::new(`6`, `1`, `7`),
2393	Position::new(`8`, `1`, `9`),
2394	),
2395	},
2396	);
2397	assert_eq!(
2398	t_err(r"(?i-u)\W"),
2399	TestError {
2400	kind: hir::ErrorKind::InvalidUtf8,
2401	span: Span::new(
2402	Position::new(`6`, `1`, `7`),
2403	Position::new(`8`, `1`, `9`),
2404	),
2405	},
2406	);
2407	}
2408
2409	#[test]
2410	#[cfg(not(feature = "unicode-perl"))]
2411	fn class_perl_word_disabled() {
2412	assert_eq!(
2413	t_err(r"\w"),
2414	TestError {
2415	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2416	span: Span::new(
2417	Position::new(`0`, `1`, `1`),
2418	Position::new(`2`, `1`, `3`)
2419	),
2420	}
2421	);
2422	}
2423
2424	#[test]
2425	#[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2426	fn class_perl_space_disabled() {
2427	assert_eq!(
2428	t_err(r"\s"),
2429	TestError {
2430	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2431	span: Span::new(
2432	Position::new(`0`, `1`, `1`),
2433	Position::new(`2`, `1`, `3`)
2434	),
2435	}
2436	);
2437	}
2438
2439	#[test]
2440	#[cfg(all(
2441	not(feature = "unicode-perl"),
2442	not(feature = "unicode-gencat")
2443	))]
2444	fn class_perl_digit_disabled() {
2445	assert_eq!(
2446	t_err(r"\d"),
2447	TestError {
2448	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2449	span: Span::new(
2450	Position::new(`0`, `1`, `1`),
2451	Position::new(`2`, `1`, `3`)
2452	),
2453	}
2454	);
2455	}
2456
2457	#[test]
2458	#[cfg(feature = "unicode-gencat")]
2459	fn class_unicode_gencat() {
2460	assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2461	assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2462	assert_eq!(
2463	t(r"\p{Separator}"),
2464	hir_uclass_query(ClassQuery::Binary("Z"))
2465	);
2466	assert_eq!(
2467	t(r"\p{se PaRa ToR}"),
2468	hir_uclass_query(ClassQuery::Binary("Z"))
2469	);
2470	assert_eq!(
2471	t(r"\p{gc:Separator}"),
2472	hir_uclass_query(ClassQuery::Binary("Z"))
2473	);
2474	assert_eq!(
2475	t(r"\p{gc=Separator}"),
2476	hir_uclass_query(ClassQuery::Binary("Z"))
2477	);
2478	assert_eq!(
2479	t(r"\p{Other}"),
2480	hir_uclass_query(ClassQuery::Binary("Other"))
2481	);
2482	assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2483
2484	assert_eq!(
2485	t(r"\PZ"),
2486	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2487	);
2488	assert_eq!(
2489	t(r"\P{separator}"),
2490	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2491	);
2492	assert_eq!(
2493	t(r"\P{gc!=separator}"),
2494	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2495	);
2496
2497	assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2498	assert_eq!(
2499	t(r"\p{assigned}"),
2500	hir_uclass_query(ClassQuery::Binary("Assigned"))
2501	);
2502	assert_eq!(
2503	t(r"\p{ascii}"),
2504	hir_uclass_query(ClassQuery::Binary("ASCII"))
2505	);
2506	assert_eq!(
2507	t(r"\p{gc:any}"),
2508	hir_uclass_query(ClassQuery::Binary("Any"))
2509	);
2510	assert_eq!(
2511	t(r"\p{gc:assigned}"),
2512	hir_uclass_query(ClassQuery::Binary("Assigned"))
2513	);
2514	assert_eq!(
2515	t(r"\p{gc:ascii}"),
2516	hir_uclass_query(ClassQuery::Binary("ASCII"))
2517	);
2518
2519	assert_eq!(
2520	t_err(r"(?-u)\pZ"),
2521	TestError {
2522	kind: hir::ErrorKind::UnicodeNotAllowed,
2523	span: Span::new(
2524	Position::new(`5`, `1`, `6`),
2525	Position::new(`8`, `1`, `9`)
2526	),
2527	}
2528	);
2529	assert_eq!(
2530	t_err(r"(?-u)\p{Separator}"),
2531	TestError {
2532	kind: hir::ErrorKind::UnicodeNotAllowed,
2533	span: Span::new(
2534	Position::new(`5`, `1`, `6`),
2535	Position::new(`18`, `1`, `19`)
2536	),
2537	}
2538	);
2539	assert_eq!(
2540	t_err(r"\pE"),
2541	TestError {
2542	kind: hir::ErrorKind::UnicodePropertyNotFound,
2543	span: Span::new(
2544	Position::new(`0`, `1`, `1`),
2545	Position::new(`3`, `1`, `4`)
2546	),
2547	}
2548	);
2549	assert_eq!(
2550	t_err(r"\p{Foo}"),
2551	TestError {
2552	kind: hir::ErrorKind::UnicodePropertyNotFound,
2553	span: Span::new(
2554	Position::new(`0`, `1`, `1`),
2555	Position::new(`7`, `1`, `8`)
2556	),
2557	}
2558	);
2559	assert_eq!(
2560	t_err(r"\p{gc:Foo}"),
2561	TestError {
2562	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2563	span: Span::new(
2564	Position::new(`0`, `1`, `1`),
2565	Position::new(`10`, `1`, `11`)
2566	),
2567	}
2568	);
2569	}
2570
2571	#[test]
2572	#[cfg(not(feature = "unicode-gencat"))]
2573	fn class_unicode_gencat_disabled() {
2574	assert_eq!(
2575	t_err(r"\p{Separator}"),
2576	TestError {
2577	kind: hir::ErrorKind::UnicodePropertyNotFound,
2578	span: Span::new(
2579	Position::new(`0`, `1`, `1`),
2580	Position::new(`13`, `1`, `14`)
2581	),
2582	}
2583	);
2584
2585	assert_eq!(
2586	t_err(r"\p{Any}"),
2587	TestError {
2588	kind: hir::ErrorKind::UnicodePropertyNotFound,
2589	span: Span::new(
2590	Position::new(`0`, `1`, `1`),
2591	Position::new(`7`, `1`, `8`)
2592	),
2593	}
2594	);
2595	}
2596
2597	#[test]
2598	#[cfg(feature = "unicode-script")]
2599	fn class_unicode_script() {
2600	assert_eq!(
2601	t(r"\p{Greek}"),
2602	hir_uclass_query(ClassQuery::Binary("Greek"))
2603	);
2604	#[cfg(feature = "unicode-case")]
2605	assert_eq!(
2606	t(r"(?i)\p{Greek}"),
2607	hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2608	);
2609	#[cfg(feature = "unicode-case")]
2610	assert_eq!(
2611	t(r"(?i)\P{Greek}"),
2612	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2613	"Greek"
2614	))))
2615	);
2616
2617	assert_eq!(
2618	t_err(r"\p{sc:Foo}"),
2619	TestError {
2620	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2621	span: Span::new(
2622	Position::new(`0`, `1`, `1`),
2623	Position::new(`10`, `1`, `11`)
2624	),
2625	}
2626	);
2627	assert_eq!(
2628	t_err(r"\p{scx:Foo}"),
2629	TestError {
2630	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2631	span: Span::new(
2632	Position::new(`0`, `1`, `1`),
2633	Position::new(`11`, `1`, `12`)
2634	),
2635	}
2636	);
2637	}
2638
2639	#[test]
2640	#[cfg(not(feature = "unicode-script"))]
2641	fn class_unicode_script_disabled() {
2642	assert_eq!(
2643	t_err(r"\p{Greek}"),
2644	TestError {
2645	kind: hir::ErrorKind::UnicodePropertyNotFound,
2646	span: Span::new(
2647	Position::new(`0`, `1`, `1`),
2648	Position::new(`9`, `1`, `10`)
2649	),
2650	}
2651	);
2652
2653	assert_eq!(
2654	t_err(r"\p{scx:Greek}"),
2655	TestError {
2656	kind: hir::ErrorKind::UnicodePropertyNotFound,
2657	span: Span::new(
2658	Position::new(`0`, `1`, `1`),
2659	Position::new(`13`, `1`, `14`)
2660	),
2661	}
2662	);
2663	}
2664
2665	#[test]
2666	#[cfg(feature = "unicode-age")]
2667	fn class_unicode_age() {
2668	assert_eq!(
2669	t_err(r"\p{age:Foo}"),
2670	TestError {
2671	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2672	span: Span::new(
2673	Position::new(`0`, `1`, `1`),
2674	Position::new(`11`, `1`, `12`)
2675	),
2676	}
2677	);
2678	}
2679
2680	#[test]
2681	#[cfg(feature = "unicode-gencat")]
2682	fn class_unicode_any_empty() {
2683	assert_eq!(t(r"\P{any}"), hir_uclass(&[]),);
2684	}
2685
2686	#[test]
2687	#[cfg(not(feature = "unicode-age"))]
2688	fn class_unicode_age_disabled() {
2689	assert_eq!(
2690	t_err(r"\p{age:3.0}"),
2691	TestError {
2692	kind: hir::ErrorKind::UnicodePropertyNotFound,
2693	span: Span::new(
2694	Position::new(`0`, `1`, `1`),
2695	Position::new(`11`, `1`, `12`)
2696	),
2697	}
2698	);
2699	}
2700
2701	#[test]
2702	fn class_bracketed() {
2703	assert_eq!(t("[a]"), hir_lit("a"));
2704	assert_eq!(t("[ab]"), hir_uclass(&[('a', 'b')]));
2705	assert_eq!(t("[^[a]]"), class_negate(uclass(&[('a', 'a')])));
2706	assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2707	assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2708	assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2709	assert_eq!(t(r"[\x00]"), hir_uclass(&[('`\0`', '`\0`')]));
2710	assert_eq!(t(r"[\n]"), hir_uclass(&[('`\n`', '`\n`')]));
2711	assert_eq!(t("[`\n`]"), hir_uclass(&[('`\n`', '`\n`')]));
2712	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2713	assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2714	#[cfg(feature = "unicode-gencat")]
2715	assert_eq!(
2716	t(r"[\pZ]"),
2717	hir_uclass_query(ClassQuery::Binary("separator"))
2718	);
2719	#[cfg(feature = "unicode-gencat")]
2720	assert_eq!(
2721	t(r"[\p{separator}]"),
2722	hir_uclass_query(ClassQuery::Binary("separator"))
2723	);
2724	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2725	assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2726	#[cfg(feature = "unicode-gencat")]
2727	assert_eq!(
2728	t(r"[^\PZ]"),
2729	hir_uclass_query(ClassQuery::Binary("separator"))
2730	);
2731	#[cfg(feature = "unicode-gencat")]
2732	assert_eq!(
2733	t(r"[^\P{separator}]"),
2734	hir_uclass_query(ClassQuery::Binary("separator"))
2735	);
2736	#[cfg(all(
2737	feature = "unicode-case",
2738	any(feature = "unicode-perl", feature = "unicode-gencat")
2739	))]
2740	assert_eq!(
2741	t(r"(?i)[^\D]"),
2742	hir_uclass_query(ClassQuery::Binary("digit"))
2743	);
2744	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2745	assert_eq!(
2746	t(r"(?i)[^\P{greek}]"),
2747	hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2748	);
2749
2750	assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2751	assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'`\0`', b'`\0`')]));
2752	assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'`\xFF`', b'`\xFF`')]));
2753
2754	#[cfg(feature = "unicode-case")]
2755	assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2756	#[cfg(feature = "unicode-case")]
2757	assert_eq!(
2758	t("(?i)[k]"),
2759	hir_uclass(&[('K', 'K'), ('k', 'k'), ('`\u{212A}`', '`\u{212A}`'),])
2760	);
2761	#[cfg(feature = "unicode-case")]
2762	assert_eq!(
2763	t("(?i)[β]"),
2764	hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2765	);
2766	assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2767
2768	assert_eq!(t("[^a]"), class_negate(uclass(&[('a', 'a')])));
2769	assert_eq!(t(r"[^\x00]"), class_negate(uclass(&[('`\0`', '`\0`')])));
2770	assert_eq!(
2771	t_bytes("(?-u)[^a]"),
2772	class_negate(bclass(&[(b'a', b'a')]))
2773	);
2774	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2775	assert_eq!(
2776	t(r"[^\d]"),
2777	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2778	);
2779	#[cfg(feature = "unicode-gencat")]
2780	assert_eq!(
2781	t(r"[^\pZ]"),
2782	hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2783	);
2784	#[cfg(feature = "unicode-gencat")]
2785	assert_eq!(
2786	t(r"[^\p{separator}]"),
2787	hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2788	);
2789	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2790	assert_eq!(
2791	t(r"(?i)[^\p{greek}]"),
2792	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2793	"greek"
2794	))))
2795	);
2796	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2797	assert_eq!(
2798	t(r"(?i)[\P{greek}]"),
2799	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2800	"greek"
2801	))))
2802	);
2803
2804	// Test some weird cases.
2805	assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2806
2807	assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2808	assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2809	assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2810	assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('`\0`', '&')]));
2811	assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '`\u{FF}`')]));
2812
2813	assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2814	assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2815	assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2816	assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('`\0`', '~')]));
2817	assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '`\u{FF}`')]));
2818
2819	assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2820	assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2821	assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2822	assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('`\0`', '-')]));
2823	assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '`\u{FF}`')]));
2824
2825	assert_eq!(
2826	t_err("(?-u)[^a]"),
2827	TestError {
2828	kind: hir::ErrorKind::InvalidUtf8,
2829	span: Span::new(
2830	Position::new(`5`, `1`, `6`),
2831	Position::new(`9`, `1`, `10`)
2832	),
2833	}
2834	);
2835	#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2836	assert_eq!(t(r"[^\s\S]"), hir_uclass(&[]),);
2837	#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2838	assert_eq!(t_bytes(r"(?-u)[^\s\S]"), hir_bclass(&[]),);
2839	}
2840
2841	#[test]
2842	fn class_bracketed_union() {
2843	assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2844	#[cfg(feature = "unicode-gencat")]
2845	assert_eq!(
2846	t(r"[a\pZb]"),
2847	hir_union(
2848	hir_uclass(&[('a', 'b')]),
2849	hir_uclass_query(ClassQuery::Binary("separator"))
2850	)
2851	);
2852	#[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2853	assert_eq!(
2854	t(r"[\pZ\p{Greek}]"),
2855	hir_union(
2856	hir_uclass_query(ClassQuery::Binary("greek")),
2857	hir_uclass_query(ClassQuery::Binary("separator"))
2858	)
2859	);
2860	#[cfg(all(
2861	feature = "unicode-age",
2862	feature = "unicode-gencat",
2863	feature = "unicode-script"
2864	))]
2865	assert_eq!(
2866	t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2867	hir_union(
2868	hir_uclass_query(ClassQuery::ByValue {
2869	property_name: "age",
2870	property_value: "3.0",
2871	}),
2872	hir_union(
2873	hir_uclass_query(ClassQuery::Binary("greek")),
2874	hir_uclass_query(ClassQuery::Binary("separator"))
2875	)
2876	)
2877	);
2878	#[cfg(all(
2879	feature = "unicode-age",
2880	feature = "unicode-gencat",
2881	feature = "unicode-script"
2882	))]
2883	assert_eq!(
2884	t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2885	hir_union(
2886	hir_uclass_query(ClassQuery::ByValue {
2887	property_name: "age",
2888	property_value: "3.0",
2889	}),
2890	hir_union(
2891	hir_uclass_query(ClassQuery::Binary("cyrillic")),
2892	hir_union(
2893	hir_uclass_query(ClassQuery::Binary("greek")),
2894	hir_uclass_query(ClassQuery::Binary("separator"))
2895	)
2896	)
2897	)
2898	);
2899
2900	#[cfg(all(
2901	feature = "unicode-age",
2902	feature = "unicode-case",
2903	feature = "unicode-gencat",
2904	feature = "unicode-script"
2905	))]
2906	assert_eq!(
2907	t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2908	hir_case_fold(hir_union(
2909	hir_uclass_query(ClassQuery::ByValue {
2910	property_name: "age",
2911	property_value: "3.0",
2912	}),
2913	hir_union(
2914	hir_uclass_query(ClassQuery::Binary("greek")),
2915	hir_uclass_query(ClassQuery::Binary("separator"))
2916	)
2917	))
2918	);
2919	#[cfg(all(
2920	feature = "unicode-age",
2921	feature = "unicode-gencat",
2922	feature = "unicode-script"
2923	))]
2924	assert_eq!(
2925	t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2926	hir_negate(hir_union(
2927	hir_uclass_query(ClassQuery::ByValue {
2928	property_name: "age",
2929	property_value: "3.0",
2930	}),
2931	hir_union(
2932	hir_uclass_query(ClassQuery::Binary("greek")),
2933	hir_uclass_query(ClassQuery::Binary("separator"))
2934	)
2935	))
2936	);
2937	#[cfg(all(
2938	feature = "unicode-age",
2939	feature = "unicode-case",
2940	feature = "unicode-gencat",
2941	feature = "unicode-script"
2942	))]
2943	assert_eq!(
2944	t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2945	hir_negate(hir_case_fold(hir_union(
2946	hir_uclass_query(ClassQuery::ByValue {
2947	property_name: "age",
2948	property_value: "3.0",
2949	}),
2950	hir_union(
2951	hir_uclass_query(ClassQuery::Binary("greek")),
2952	hir_uclass_query(ClassQuery::Binary("separator"))
2953	)
2954	)))
2955	);
2956	}
2957
2958	#[test]
2959	fn class_bracketed_nested() {
2960	assert_eq!(t(r"[a[^c]]"), class_negate(uclass(&[('c', 'c')])));
2961	assert_eq!(t(r"[a-b[^c]]"), class_negate(uclass(&[('c', 'c')])));
2962	assert_eq!(t(r"[a-c[^c]]"), class_negate(uclass(&[])));
2963
2964	assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2965	assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2966
2967	#[cfg(feature = "unicode-case")]
2968	assert_eq!(
2969	t(r"(?i)[a[^c]]"),
2970	hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2971	);
2972	#[cfg(feature = "unicode-case")]
2973	assert_eq!(
2974	t(r"(?i)[a-b[^c]]"),
2975	hir_negate(class_case_fold(uclass(&[('c', 'c')])))
2976	);
2977
2978	#[cfg(feature = "unicode-case")]
2979	assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2980	#[cfg(feature = "unicode-case")]
2981	assert_eq!(
2982	t(r"(?i)[^a-b[^c]]"),
2983	hir_uclass(&[('C', 'C'), ('c', 'c')])
2984	);
2985
2986	assert_eq!(t(r"[^a-c[^c]]"), hir_uclass(&[]),);
2987	#[cfg(feature = "unicode-case")]
2988	assert_eq!(t(r"(?i)[^a-c[^c]]"), hir_uclass(&[]),);
2989	}
2990
2991	#[test]
2992	fn class_bracketed_intersect() {
2993	assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2994	assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2995	assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2996	assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2997	assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2998	assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2999	assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
3000	assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
3001	assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3002
3003	assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
3004	assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3005	assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
3006	assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
3007	assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
3008	assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
3009
3010	#[cfg(feature = "unicode-case")]
3011	assert_eq!(
3012	t("(?i)[abc&&b-c]"),
3013	hir_case_fold(hir_uclass(&[('b', 'c')]))
3014	);
3015	#[cfg(feature = "unicode-case")]
3016	assert_eq!(
3017	t("(?i)[abc&&[b-c]]"),
3018	hir_case_fold(hir_uclass(&[('b', 'c')]))
3019	);
3020	#[cfg(feature = "unicode-case")]
3021	assert_eq!(
3022	t("(?i)[[abc]&&[b-c]]"),
3023	hir_case_fold(hir_uclass(&[('b', 'c')]))
3024	);
3025	#[cfg(feature = "unicode-case")]
3026	assert_eq!(
3027	t("(?i)[a-z&&b-y&&c-x]"),
3028	hir_case_fold(hir_uclass(&[('c', 'x')]))
3029	);
3030	#[cfg(feature = "unicode-case")]
3031	assert_eq!(
3032	t("(?i)[c-da-b&&a-d]"),
3033	hir_case_fold(hir_uclass(&[('a', 'd')]))
3034	);
3035	#[cfg(feature = "unicode-case")]
3036	assert_eq!(
3037	t("(?i)[a-d&&c-da-b]"),
3038	hir_case_fold(hir_uclass(&[('a', 'd')]))
3039	);
3040
3041	assert_eq!(
3042	t("(?i-u)[abc&&b-c]"),
3043	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3044	);
3045	assert_eq!(
3046	t("(?i-u)[abc&&[b-c]]"),
3047	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3048	);
3049	assert_eq!(
3050	t("(?i-u)[[abc]&&[b-c]]"),
3051	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
3052	);
3053	assert_eq!(
3054	t("(?i-u)[a-z&&b-y&&c-x]"),
3055	hir_case_fold(hir_bclass(&[(b'c', b'x')]))
3056	);
3057	assert_eq!(
3058	t("(?i-u)[c-da-b&&a-d]"),
3059	hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3060	);
3061	assert_eq!(
3062	t("(?i-u)[a-d&&c-da-b]"),
3063	hir_case_fold(hir_bclass(&[(b'a', b'd')]))
3064	);
3065
3066	// In `[a^]`, `^` does not need to be escaped, so it makes sense that
3067	// `^` is also allowed to be unescaped after `&&`.
3068	assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
3069	// `]` needs to be escaped after `&&` since it's not at start of class.
3070	assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
3071	assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
3072	assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
3073	assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
3074	// Test precedence.
3075	assert_eq!(
3076	t(r"[a-w&&[^c-g]z]"),
3077	hir_uclass(&[('a', 'b'), ('h', 'w')])
3078	);
3079	}
3080
3081	#[test]
3082	fn class_bracketed_intersect_negate() {
3083	#[cfg(feature = "unicode-perl")]
3084	assert_eq!(
3085	t(r"[^\w&&\d]"),
3086	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3087	);
3088	assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
3089	#[cfg(feature = "unicode-perl")]
3090	assert_eq!(
3091	t(r"[^[\w&&\d]]"),
3092	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
3093	);
3094	#[cfg(feature = "unicode-perl")]
3095	assert_eq!(
3096	t(r"[^[^\w&&\d]]"),
3097	hir_uclass_query(ClassQuery::Binary("digit"))
3098	);
3099	#[cfg(feature = "unicode-perl")]
3100	assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
3101
3102	#[cfg(feature = "unicode-perl")]
3103	assert_eq!(
3104	t_bytes(r"(?-u)[^\w&&\d]"),
3105	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3106	);
3107	assert_eq!(
3108	t_bytes(r"(?-u)[^[a-z&&a-c]]"),
3109	hir_negate(hir_bclass(&[(b'a', b'c')]))
3110	);
3111	assert_eq!(
3112	t_bytes(r"(?-u)[^[\w&&\d]]"),
3113	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Digit))
3114	);
3115	assert_eq!(
3116	t_bytes(r"(?-u)[^[^\w&&\d]]"),
3117	hir_ascii_bclass(&ast::ClassAsciiKind::Digit)
3118	);
3119	assert_eq!(
3120	t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
3121	hir_negate(hir_ascii_bclass(&ast::ClassAsciiKind::Word))
3122	);
3123	}
3124
3125	#[test]
3126	fn class_bracketed_difference() {
3127	#[cfg(feature = "unicode-gencat")]
3128	assert_eq!(
3129	t(r"[\pL--[:ascii:]]"),
3130	hir_difference(
3131	hir_uclass_query(ClassQuery::Binary("letter")),
3132	hir_uclass(&[('`\0`', '`\x7F`')])
3133	)
3134	);
3135
3136	assert_eq!(
3137	t(r"(?-u)[[:alpha:]--[:lower:]]"),
3138	hir_bclass(&[(b'A', b'Z')])
3139	);
3140	}
3141
3142	#[test]
3143	fn class_bracketed_symmetric_difference() {
3144	#[cfg(feature = "unicode-script")]
3145	assert_eq!(
3146	t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3147	hir_uclass(&[
3148	('`\u{0342}`', '`\u{0342}`'),
3149	('`\u{0345}`', '`\u{0345}`'),
3150	('`\u{1DC0}`', '`\u{1DC1}`'),
3151	])
3152	);
3153	assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
3154
3155	assert_eq!(
3156	t(r"(?-u)[a-g~~c-j]"),
3157	hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
3158	);
3159	}
3160
3161	#[test]
3162	fn ignore_whitespace() {
3163	assert_eq!(t(r"(?x)\12 3"), hir_lit("`\n`3"));
3164	assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
3165	assert_eq!(
3166	t(r"(?x)\x # comment
3167	{ # comment
3168	53 # comment
3169	} #comment"),
3170	hir_lit("S")
3171	);
3172
3173	assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
3174	assert_eq!(
3175	t(r"(?x)\x # comment
3176	53 # comment"),
3177	hir_lit("S")
3178	);
3179	assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
3180
3181	#[cfg(feature = "unicode-gencat")]
3182	assert_eq!(
3183	t(r"(?x)\p # comment
3184	{ # comment
3185	Separator # comment
3186	} # comment"),
3187	hir_uclass_query(ClassQuery::Binary("separator"))
3188	);
3189
3190	assert_eq!(
3191	t(r"(?x)a # comment
3192	{ # comment
3193	5 # comment
3194	, # comment
3195	10 # comment
3196	} # comment"),
3197	hir_range(`true`, `5`, Some(`10`), hir_lit("a"))
3198	);
3199
3200	assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
3201	}
3202
3203	#[test]
3204	fn analysis_is_utf8() {
3205	// Positive examples.
3206	assert!(props_bytes(r"a").is_utf8());
3207	assert!(props_bytes(r"ab").is_utf8());
3208	assert!(props_bytes(r"(?-u)a").is_utf8());
3209	assert!(props_bytes(r"(?-u)ab").is_utf8());
3210	assert!(props_bytes(r"\xFF").is_utf8());
3211	assert!(props_bytes(r"\xFF\xFF").is_utf8());
3212	assert!(props_bytes(r"[^a]").is_utf8());
3213	assert!(props_bytes(r"[^a][^a]").is_utf8());
3214	assert!(props_bytes(r"\b").is_utf8());
3215	assert!(props_bytes(r"\B").is_utf8());
3216	assert!(props_bytes(r"(?-u)\b").is_utf8());
3217	assert!(props_bytes(r"(?-u)\B").is_utf8());
3218
3219	// Negative examples.
3220	assert!(!props_bytes(r"(?-u)\xFF").is_utf8());
3221	assert!(!props_bytes(r"(?-u)\xFF\xFF").is_utf8());
3222	assert!(!props_bytes(r"(?-u)[^a]").is_utf8());
3223	assert!(!props_bytes(r"(?-u)[^a][^a]").is_utf8());
3224	}
3225
3226	#[test]
3227	fn analysis_captures_len() {
3228	assert_eq!(`0`, props(r"a").explicit_captures_len());
3229	assert_eq!(`0`, props(r"(?:a)").explicit_captures_len());
3230	assert_eq!(`0`, props(r"(?i-u:a)").explicit_captures_len());
3231	assert_eq!(`0`, props(r"(?i-u)a").explicit_captures_len());
3232	assert_eq!(`1`, props(r"(a)").explicit_captures_len());
3233	assert_eq!(`1`, props(r"(?P<foo>a)").explicit_captures_len());
3234	assert_eq!(`1`, props(r"()").explicit_captures_len());
3235	assert_eq!(`1`, props(r"()a").explicit_captures_len());
3236	assert_eq!(`1`, props(r"(a)+").explicit_captures_len());
3237	assert_eq!(`2`, props(r"(a)(b)").explicit_captures_len());
3238	assert_eq!(`2`, props(r"(a)\|(b)").explicit_captures_len());
3239	assert_eq!(`2`, props(r"((a))").explicit_captures_len());
3240	assert_eq!(`1`, props(r"([a&&b])").explicit_captures_len());
3241	}
3242
3243	#[test]
3244	fn analysis_static_captures_len() {
3245	let len = \|pattern\| props(pattern).static_explicit_captures_len();
3246	assert_eq!(Some(`0`), len(r""));
3247	assert_eq!(Some(`0`), len(r"foo\|bar"));
3248	assert_eq!(None, len(r"(foo)\|bar"));
3249	assert_eq!(None, len(r"foo\|(bar)"));
3250	assert_eq!(Some(`1`), len(r"(foo\|bar)"));
3251	assert_eq!(Some(`1`), len(r"(a\|b\|c\|d\|e\|f)"));
3252	assert_eq!(Some(`1`), len(r"(a)\|(b)\|(c)\|(d)\|(e)\|(f)"));
3253	assert_eq!(Some(`2`), len(r"(a)(b)\|(c)(d)\|(e)(f)"));
3254	assert_eq!(Some(`6`), len(r"(a)(b)(c)(d)(e)(f)"));
3255	assert_eq!(Some(`3`), len(r"(a)(b)(extra)\|(a)(b)()"));
3256	assert_eq!(Some(`3`), len(r"(a)(b)((?:extra)?)"));
3257	assert_eq!(None, len(r"(a)(b)(extra)?"));
3258	assert_eq!(Some(`1`), len(r"(foo)\|(bar)"));
3259	assert_eq!(Some(`2`), len(r"(foo)(bar)"));
3260	assert_eq!(Some(`2`), len(r"(foo)+(bar)"));
3261	assert_eq!(None, len(r"(foo)*(bar)"));
3262	assert_eq!(Some(`0`), len(r"(foo)?{0}"));
3263	assert_eq!(None, len(r"(foo)?{1}"));
3264	assert_eq!(Some(`1`), len(r"(foo){1}"));
3265	assert_eq!(Some(`1`), len(r"(foo){1,}"));
3266	assert_eq!(Some(`1`), len(r"(foo){1,}?"));
3267	assert_eq!(None, len(r"(foo){1,}??"));
3268	assert_eq!(None, len(r"(foo){0,}"));
3269	assert_eq!(Some(`1`), len(r"(foo)(?:bar)"));
3270	assert_eq!(Some(`2`), len(r"(foo(?:bar)+)(?:baz(boo))"));
3271	assert_eq!(Some(`2`), len(r"(?P<bar>foo)(?:bar)(bal\|loon)"));
3272	assert_eq!(
3273	Some(`2`),
3274	len(r#"<(a)[^>]+href="([^"]+)"\|<(img)[^>]+src="([^"]+)""#)
3275	);
3276	}
3277
3278	#[test]
3279	fn analysis_is_all_assertions() {
3280	// Positive examples.
3281	let p = props(r"\b");
3282	assert!(!p.look_set().is_empty());
3283	assert_eq!(p.minimum_len(), Some(`0`));
3284
3285	let p = props(r"\B");
3286	assert!(!p.look_set().is_empty());
3287	assert_eq!(p.minimum_len(), Some(`0`));
3288
3289	let p = props(r"^");
3290	assert!(!p.look_set().is_empty());
3291	assert_eq!(p.minimum_len(), Some(`0`));
3292
3293	let p = props(r"$");
3294	assert!(!p.look_set().is_empty());
3295	assert_eq!(p.minimum_len(), Some(`0`));
3296
3297	let p = props(r"\A");
3298	assert!(!p.look_set().is_empty());
3299	assert_eq!(p.minimum_len(), Some(`0`));
3300
3301	let p = props(r"\z");
3302	assert!(!p.look_set().is_empty());
3303	assert_eq!(p.minimum_len(), Some(`0`));
3304
3305	let p = props(r"$^\z\A\b\B");
3306	assert!(!p.look_set().is_empty());
3307	assert_eq!(p.minimum_len(), Some(`0`));
3308
3309	let p = props(r"$\|^\|\z\|\A\|\b\|\B");
3310	assert!(!p.look_set().is_empty());
3311	assert_eq!(p.minimum_len(), Some(`0`));
3312
3313	let p = props(r"^$\|$^");
3314	assert!(!p.look_set().is_empty());
3315	assert_eq!(p.minimum_len(), Some(`0`));
3316
3317	let p = props(r"((\b)+())*^");
3318	assert!(!p.look_set().is_empty());
3319	assert_eq!(p.minimum_len(), Some(`0`));
3320
3321	// Negative examples.
3322	let p = props(r"^a");
3323	assert!(!p.look_set().is_empty());
3324	assert_eq!(p.minimum_len(), Some(`1`));
3325	}
3326
3327	#[test]
3328	fn analysis_look_set_prefix_any() {
3329	let p = props(r"(?-u)(?i:(?:\b\|_)win(?:32\|64\|dows)?(?:\b\|_))");
3330	assert!(p.look_set_prefix_any().contains(Look::WordAscii));
3331	}
3332
3333	#[test]
3334	fn analysis_is_anchored() {
3335	let is_start = \|p\| props(p).look_set_prefix().contains(Look::Start);
3336	let is_end = \|p\| props(p).look_set_suffix().contains(Look::End);
3337
3338	// Positive examples.
3339	assert!(is_start(r"^"));
3340	assert!(is_end(r"$"));
3341
3342	assert!(is_start(r"^^"));
3343	assert!(props(r"$$").look_set_suffix().contains(Look::End));
3344
3345	assert!(is_start(r"^$"));
3346	assert!(is_end(r"^$"));
3347
3348	assert!(is_start(r"^foo"));
3349	assert!(is_end(r"foo$"));
3350
3351	assert!(is_start(r"^foo\|^bar"));
3352	assert!(is_end(r"foo$\|bar$"));
3353
3354	assert!(is_start(r"^(foo\|bar)"));
3355	assert!(is_end(r"(foo\|bar)$"));
3356
3357	assert!(is_start(r"^+"));
3358	assert!(is_end(r"$+"));
3359	assert!(is_start(r"^++"));
3360	assert!(is_end(r"$++"));
3361	assert!(is_start(r"(^)+"));
3362	assert!(is_end(r"($)+"));
3363
3364	assert!(is_start(r"$^"));
3365	assert!(is_start(r"$^"));
3366	assert!(is_start(r"$^\|^$"));
3367	assert!(is_end(r"$^\|^$"));
3368
3369	assert!(is_start(r"\b^"));
3370	assert!(is_end(r"$\b"));
3371	assert!(is_start(r"^(?m:^)"));
3372	assert!(is_end(r"(?m:$)$"));
3373	assert!(is_start(r"(?m:^)^"));
3374	assert!(is_end(r"$(?m:$)"));
3375
3376	// Negative examples.
3377	assert!(!is_start(r"(?m)^"));
3378	assert!(!is_end(r"(?m)$"));
3379	assert!(!is_start(r"(?m:^$)\|$^"));
3380	assert!(!is_end(r"(?m:^$)\|$^"));
3381	assert!(!is_start(r"$^\|(?m:^$)"));
3382	assert!(!is_end(r"$^\|(?m:^$)"));
3383
3384	assert!(!is_start(r"a^"));
3385	assert!(!is_start(r"$a"));
3386
3387	assert!(!is_end(r"a^"));
3388	assert!(!is_end(r"$a"));
3389
3390	assert!(!is_start(r"^foo\|bar"));
3391	assert!(!is_end(r"foo\|bar$"));
3392
3393	assert!(!is_start(r"^*"));
3394	assert!(!is_end(r"$*"));
3395	assert!(!is_start(r"^*+"));
3396	assert!(!is_end(r"$*+"));
3397	assert!(!is_start(r"^+*"));
3398	assert!(!is_end(r"$+*"));
3399	assert!(!is_start(r"(^)*"));
3400	assert!(!is_end(r"($)*"));
3401	}
3402
3403	#[test]
3404	fn analysis_is_any_anchored() {
3405	let is_start = \|p\| props(p).look_set().contains(Look::Start);
3406	let is_end = \|p\| props(p).look_set().contains(Look::End);
3407
3408	// Positive examples.
3409	assert!(is_start(r"^"));
3410	assert!(is_end(r"$"));
3411	assert!(is_start(r"\A"));
3412	assert!(is_end(r"\z"));
3413
3414	// Negative examples.
3415	assert!(!is_start(r"(?m)^"));
3416	assert!(!is_end(r"(?m)$"));
3417	assert!(!is_start(r"$"));
3418	assert!(!is_end(r"^"));
3419	}
3420
3421	#[test]
3422	fn analysis_can_empty() {
3423	// Positive examples.
3424	let assert_empty =
3425	\|p\| assert_eq!(Some(`0`), props_bytes(p).minimum_len());
3426	assert_empty(r"");
3427	assert_empty(r"()");
3428	assert_empty(r"()*");
3429	assert_empty(r"()+");
3430	assert_empty(r"()?");
3431	assert_empty(r"a*");
3432	assert_empty(r"a?");
3433	assert_empty(r"a{0}");
3434	assert_empty(r"a{0,}");
3435	assert_empty(r"a{0,1}");
3436	assert_empty(r"a{0,10}");
3437	#[cfg(feature = "unicode-gencat")]
3438	assert_empty(r"\pL*");
3439	assert_empty(r"a*\|b");
3440	assert_empty(r"b\|a*");
3441	assert_empty(r"a\|");
3442	assert_empty(r"\|a");
3443	assert_empty(r"a\|\|b");
3444	assert_empty(r"aa?(abcd)");
3445	assert_empty(r"^");
3446	assert_empty(r"$");
3447	assert_empty(r"(?m)^");
3448	assert_empty(r"(?m)$");
3449	assert_empty(r"\A");
3450	assert_empty(r"\z");
3451	assert_empty(r"\B");
3452	assert_empty(r"(?-u)\B");
3453	assert_empty(r"\b");
3454	assert_empty(r"(?-u)\b");
3455
3456	// Negative examples.
3457	let assert_non_empty =
3458	\|p\| assert_ne!(Some(`0`), props_bytes(p).minimum_len());
3459	assert_non_empty(r"a+");
3460	assert_non_empty(r"a{1}");
3461	assert_non_empty(r"a{1,}");
3462	assert_non_empty(r"a{1,2}");
3463	assert_non_empty(r"a{1,10}");
3464	assert_non_empty(r"b\|a");
3465	assert_non_empty(r"aa+(abcd)");
3466	#[cfg(feature = "unicode-gencat")]
3467	assert_non_empty(r"\P{any}");
3468	assert_non_empty(r"[a--a]");
3469	assert_non_empty(r"[a&&b]");
3470	}
3471
3472	#[test]
3473	fn analysis_is_literal() {
3474	// Positive examples.
3475	assert!(props(r"a").is_literal());
3476	assert!(props(r"ab").is_literal());
3477	assert!(props(r"abc").is_literal());
3478	assert!(props(r"(?m)abc").is_literal());
3479	assert!(props(r"(?:a)").is_literal());
3480	assert!(props(r"foo(?:a)").is_literal());
3481	assert!(props(r"(?:a)foo").is_literal());
3482	assert!(props(r"[a]").is_literal());
3483
3484	// Negative examples.
3485	assert!(!props(r"").is_literal());
3486	assert!(!props(r"^").is_literal());
3487	assert!(!props(r"a\|b").is_literal());
3488	assert!(!props(r"(a)").is_literal());
3489	assert!(!props(r"a+").is_literal());
3490	assert!(!props(r"foo(a)").is_literal());
3491	assert!(!props(r"(a)foo").is_literal());
3492	assert!(!props(r"[ab]").is_literal());
3493	}
3494
3495	#[test]
3496	fn analysis_is_alternation_literal() {
3497	// Positive examples.
3498	assert!(props(r"a").is_alternation_literal());
3499	assert!(props(r"ab").is_alternation_literal());
3500	assert!(props(r"abc").is_alternation_literal());
3501	assert!(props(r"(?m)abc").is_alternation_literal());
3502	assert!(props(r"foo\|bar").is_alternation_literal());
3503	assert!(props(r"foo\|bar\|baz").is_alternation_literal());
3504	assert!(props(r"[a]").is_alternation_literal());
3505	assert!(props(r"(?:ab)\|cd").is_alternation_literal());
3506	assert!(props(r"ab\|(?:cd)").is_alternation_literal());
3507
3508	// Negative examples.
3509	assert!(!props(r"").is_alternation_literal());
3510	assert!(!props(r"^").is_alternation_literal());
3511	assert!(!props(r"(a)").is_alternation_literal());
3512	assert!(!props(r"a+").is_alternation_literal());
3513	assert!(!props(r"foo(a)").is_alternation_literal());
3514	assert!(!props(r"(a)foo").is_alternation_literal());
3515	assert!(!props(r"[ab]").is_alternation_literal());
3516	assert!(!props(r"[ab]\|b").is_alternation_literal());
3517	assert!(!props(r"a\|[ab]").is_alternation_literal());
3518	assert!(!props(r"(a)\|b").is_alternation_literal());
3519	assert!(!props(r"a\|(b)").is_alternation_literal());
3520	assert!(!props(r"a\|b").is_alternation_literal());
3521	assert!(!props(r"a\|b\|c").is_alternation_literal());
3522	assert!(!props(r"[a]\|b").is_alternation_literal());
3523	assert!(!props(r"a\|[b]").is_alternation_literal());
3524	assert!(!props(r"(?:a)\|b").is_alternation_literal());
3525	assert!(!props(r"a\|(?:b)").is_alternation_literal());
3526	assert!(!props(r"(?:z\|xx)@\|xx").is_alternation_literal());
3527	}
3528
3529	// This tests that the smart Hir::repetition constructors does some basic
3530	// simplifications.
3531	#[test]
3532	fn smart_repetition() {
3533	assert_eq!(t(r"a{0}"), Hir::empty());
3534	assert_eq!(t(r"a{1}"), hir_lit("a"));
3535	assert_eq!(t(r"\B{32111}"), hir_look(hir::Look::WordUnicodeNegate));
3536	}
3537
3538	// This tests that the smart Hir::concat constructor simplifies the given
3539	// exprs in a way we expect.
3540	#[test]
3541	fn smart_concat() {
3542	assert_eq!(t(""), Hir::empty());
3543	assert_eq!(t("(?:)"), Hir::empty());
3544	assert_eq!(t("abc"), hir_lit("abc"));
3545	assert_eq!(t("(?:foo)(?:bar)"), hir_lit("foobar"));
3546	assert_eq!(t("quux(?:foo)(?:bar)baz"), hir_lit("quuxfoobarbaz"));
3547	assert_eq!(
3548	t("foo(?:bar^baz)quux"),
3549	hir_cat(vec![
3550	hir_lit("foobar"),
3551	hir_look(hir::Look::Start),
3552	hir_lit("bazquux"),
3553	])
3554	);
3555	assert_eq!(
3556	t("foo(?:ba(?:r^b)az)quux"),
3557	hir_cat(vec![
3558	hir_lit("foobar"),
3559	hir_look(hir::Look::Start),
3560	hir_lit("bazquux"),
3561	])
3562	);
3563	}
3564
3565	// This tests that the smart Hir::alternation constructor simplifies the
3566	// given exprs in a way we expect.
3567	#[test]
3568	fn smart_alternation() {
3569	assert_eq!(
3570	t("(?:foo)\|(?:bar)"),
3571	hir_alt(vec![hir_lit("foo"), hir_lit("bar")])
3572	);
3573	assert_eq!(
3574	t("quux\|(?:abc\|def\|xyz)\|baz"),
3575	hir_alt(vec![
3576	hir_lit("quux"),
3577	hir_lit("abc"),
3578	hir_lit("def"),
3579	hir_lit("xyz"),
3580	hir_lit("baz"),
3581	])
3582	);
3583	assert_eq!(
3584	t("quux\|(?:abc\|(?:def\|mno)\|xyz)\|baz"),
3585	hir_alt(vec![
3586	hir_lit("quux"),
3587	hir_lit("abc"),
3588	hir_lit("def"),
3589	hir_lit("mno"),
3590	hir_lit("xyz"),
3591	hir_lit("baz"),
3592	])
3593	);
3594	assert_eq!(
3595	t("a\|b\|c\|d\|e\|f\|x\|y\|z"),
3596	hir_uclass(&[('a', 'f'), ('x', 'z')]),
3597	);
3598	// Tests that we lift common prefixes out of an alternation.
3599	assert_eq!(
3600	t("[A-Z]foo\|[A-Z]quux"),
3601	hir_cat(vec![
3602	hir_uclass(&[('A', 'Z')]),
3603	hir_alt(vec![hir_lit("foo"), hir_lit("quux")]),
3604	]),
3605	);
3606	assert_eq!(
3607	t("[A-Z][A-Z]\|[A-Z]quux"),
3608	hir_cat(vec![
3609	hir_uclass(&[('A', 'Z')]),
3610	hir_alt(vec![hir_uclass(&[('A', 'Z')]), hir_lit("quux")]),
3611	]),
3612	);
3613	assert_eq!(
3614	t("[A-Z][A-Z]\|[A-Z][A-Z]quux"),
3615	hir_cat(vec![
3616	hir_uclass(&[('A', 'Z')]),
3617	hir_uclass(&[('A', 'Z')]),
3618	hir_alt(vec![Hir::empty(), hir_lit("quux")]),
3619	]),
3620	);
3621	assert_eq!(
3622	t("[A-Z]foo\|[A-Z]foobar"),
3623	hir_cat(vec![
3624	hir_uclass(&[('A', 'Z')]),
3625	hir_alt(vec![hir_lit("foo"), hir_lit("foobar")]),
3626	]),
3627	);
3628	}
3629
3630	#[test]
3631	fn regression_alt_empty_concat() {
3632	use crate::ast::{self, Ast};
3633
3634	let span = Span::splat(Position::new(`0`, `0`, `0`));
3635	let ast = Ast::alternation(ast::Alternation {
3636	span,
3637	asts: vec![Ast::concat(ast::Concat { span, asts: vec![] })],
3638	});
3639
3640	let mut t = Translator::new();
3641	assert_eq!(Ok(Hir::empty()), t.translate("", &ast));
3642	}
3643
3644	#[test]
3645	fn regression_empty_alt() {
3646	use crate::ast::{self, Ast};
3647
3648	let span = Span::splat(Position::new(`0`, `0`, `0`));
3649	let ast = Ast::concat(ast::Concat {
3650	span,
3651	asts: vec![Ast::alternation(ast::Alternation {
3652	span,
3653	asts: vec![],
3654	})],
3655	});
3656
3657	let mut t = Translator::new();
3658	assert_eq!(Ok(Hir::fail()), t.translate("", &ast));
3659	}
3660
3661	#[test]
3662	fn regression_singleton_alt() {
3663	use crate::{
3664	ast::{self, Ast},
3665	hir::Dot,
3666	};
3667
3668	let span = Span::splat(Position::new(`0`, `0`, `0`));
3669	let ast = Ast::concat(ast::Concat {
3670	span,
3671	asts: vec![Ast::alternation(ast::Alternation {
3672	span,
3673	asts: vec![Ast::dot(span)],
3674	})],
3675	});
3676
3677	let mut t = Translator::new();
3678	assert_eq!(Ok(Hir::dot(Dot::AnyCharExceptLF)), t.translate("", &ast));
3679	}
3680
3681	// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63168
3682	#[test]
3683	fn regression_fuzz_match() {
3684	let pat = "[(`\u{6}` `\0`-`\u{afdf5}`] `\0` ";
3685	let ast = ParserBuilder::new()
3686	.octal(`false`)
3687	.ignore_whitespace(`true`)
3688	.build()
3689	.parse(pat)
3690	.unwrap();
3691	let hir = TranslatorBuilder::new()
3692	.utf8(`true`)
3693	.case_insensitive(`false`)
3694	.multi_line(`false`)
3695	.dot_matches_new_line(`false`)
3696	.swap_greed(`true`)
3697	.unicode(`true`)
3698	.build()
3699	.translate(pat, &ast)
3700	.unwrap();
3701	assert_eq!(
3702	hir,
3703	Hir::concat(vec![
3704	hir_uclass(&[('`\0`', '`\u{afdf5}`')]),
3705	hir_lit("`\0`"),
3706	])
3707	);
3708	}
3709
3710	// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63155
3711	#[cfg(feature = "unicode")]
3712	#[test]
3713	fn regression_fuzz_difference1() {
3714	let pat = r"\W\W\|\W[^\v--\W\W\P{Script_Extensions:Pau_Cin_Hau}\u10A1A1-\U{3E3E3}--~~~~--~~~~~~~~------~~~~~~--~~~~~~]*";
3715	let _ = t(pat); // shouldn't panic
3716	}
3717
3718	// See: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=63153
3719	#[test]
3720	fn regression_fuzz_char_decrement1() {
3721	let pat = "w[w[^w?`\r`w`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0`w?`\r`w[^w?`\r`w[^w?`\r`w[^w`\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\u{1}\0`]`\0\0\0\0\0\0\0\0\0``\0\0\u{1}\0`]`\0\0`-`\0`][^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w?`\r`w[^w`\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\u{1}\0`]`\0\0\0\0\0\0\0\0\0`x`\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\0\0\0`??`\0\u{7f}`{2}`\u{10}`??`\0\0\0\0\0\0\0\0\0\u{3}\0\0\0`}`\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\u{1}\0`]`\0\0`-`\0`]`\0\0\0\0\0\0\0\u{1}\0`]`\0\u{1}\u{1}`H-i]-]`\0\0\0\0\u{1}\0`]`\0\0\0\u{1}\0`]`\0\0`-`\0\0\0\0\u{1}`9-`\u{7f}`]`\0`'\|-`\u{7f}`]`\0`'\|(?i-ux)[-`\u{7f}`]`\0`'`\u{3}\0\0\0`}`\0`-`\0`]<D`\0\0\0\0\0\0\u{1}`]`\0\0\0\0`]`\0\0`-*`\0`]`\0\0` ";
3722	let _ = t(pat); // shouldn't panic
3723	}
3724	}
3725