translate.rs - Codebrowser

1	/!*
2	Defines a translator that converts an `Ast` to an `Hir`.
3	*/
4
5	use std::cell::{Cell, RefCell};
6	use std::result;
7
8	use crate::ast::{self, Ast, Span, Visitor};
9	use crate::hir::{self, Error, ErrorKind, Hir};
10	use crate::unicode::{self, ClassQuery};
11
12	type Result<T> = result::Result<T, Error>;
13
14	/// A builder for constructing an AST->HIR translator.
15	#[derive(Clone, Debug)]
16	pub struct TranslatorBuilder {
17	allow_invalid_utf8: bool,
18	flags: Flags,
19	}
20
21	impl Default for TranslatorBuilder {
22	fn default() -> TranslatorBuilder {
23	TranslatorBuilder::new()
24	}
25	}
26
27	impl TranslatorBuilder {
28	/// Create a new translator builder with a default c onfiguration.
29	pub fn new() -> TranslatorBuilder {
30	TranslatorBuilder {
31	allow_invalid_utf8: `false`,
32	flags: Flags::default(),
33	}
34	}
35
36	/// Build a translator using the current configuration.
37	pub fn build(&self) -> Translator {
38	Translator {
39	stack: RefCell::new(vec![]),
40	flags: Cell::new(self.flags),
41	allow_invalid_utf8: self.allow_invalid_utf8,
42	}
43	}
44
45	/// When enabled, translation will permit the construction of a regular
46	/// expression that may match invalid UTF-8.
47	///
48	/// When disabled (the default), the translator is guaranteed to produce
49	/// an expression that will only ever match valid UTF-8 (otherwise, the
50	/// translator will return an error).
51	///
52	/// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53	/// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54	/// the parser to return an error. Namely, a negated ASCII word boundary
55	/// can result in matching positions that aren't valid UTF-8 boundaries.
56	pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
57	self.allow_invalid_utf8 = yes;
58	self
59	}
60
61	/// Enable or disable the case insensitive flag (`i`) by default.
62	pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
63	self.flags.case_insensitive = if yes { Some(`true`) } else { None };
64	self
65	}
66
67	/// Enable or disable the multi-line matching flag (`m`) by default.
68	pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
69	self.flags.multi_line = if yes { Some(`true`) } else { None };
70	self
71	}
72
73	/// Enable or disable the "dot matches any character" flag (`s`) by
74	/// default.
75	pub fn dot_matches_new_line(
76	&mut self,
77	yes: bool,
78	) -> &mut TranslatorBuilder {
79	self.flags.dot_matches_new_line = if yes { Some(`true`) } else { None };
80	self
81	}
82
83	/// Enable or disable the "swap greed" flag (`U`) by default.
84	pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
85	self.flags.swap_greed = if yes { Some(`true`) } else { None };
86	self
87	}
88
89	/// Enable or disable the Unicode flag (`u`) by default.
90	pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
91	self.flags.unicode = if yes { None } else { Some(`false`) };
92	self
93	}
94	}
95
96	/// A translator maps abstract syntax to a high level intermediate
97	/// representation.
98	///
99	/// A translator may be benefit from reuse. That is, a translator can translate
100	/// many abstract syntax trees.
101	///
102	/// A `Translator` can be configured in more detail via a
103	/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104	#[derive(Clone, Debug)]
105	pub struct Translator {
106	/// Our call stack, but on the heap.
107	stack: RefCell<Vec<HirFrame>>,
108	/// The current flag settings.
109	flags: Cell<Flags>,
110	/// Whether we're allowed to produce HIR that can match arbitrary bytes.
111	allow_invalid_utf8: bool,
112	}
113
114	impl Translator {
115	/// Create a new translator using the default configuration.
116	pub fn new() -> Translator {
117	TranslatorBuilder::new().build()
118	}
119
120	/// Translate the given abstract syntax tree (AST) into a high level
121	/// intermediate representation (HIR).
122	///
123	/// If there was a problem doing the translation, then an HIR-specific
124	/// error is returned.
125	///
126	/// The original pattern string used to produce the `Ast` must* also be*
127	/// provided. The translator does not use the pattern string during any
128	/// correct translation, but is used for error reporting.
129	pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
130	ast::visit(ast, TranslatorI::new(self, pattern))
131	}
132	}
133
134	/// An HirFrame is a single stack frame, represented explicitly, which is
135	/// created for each item in the Ast that we traverse.
136	///
137	/// Note that technically, this type doesn't represent our entire stack
138	/// frame. In particular, the Ast visitor represents any state associated with
139	/// traversing the Ast itself.
140	#[derive(Clone, Debug)]
141	enum HirFrame {
142	/// An arbitrary HIR expression. These get pushed whenever we hit a base
143	/// case in the Ast. They get popped after an inductive (i.e., recursive)
144	/// step is complete.
145	Expr(Hir),
146	/// A Unicode character class. This frame is mutated as we descend into
147	/// the Ast of a character class (which is itself its own mini recursive
148	/// structure).
149	ClassUnicode(hir::ClassUnicode),
150	/// A byte-oriented character class. This frame is mutated as we descend
151	/// into the Ast of a character class (which is itself its own mini
152	/// recursive structure).
153	///
154	/// Byte character classes are created when Unicode mode (`u`) is disabled.
155	/// If `allow_invalid_utf8` is disabled (the default), then a byte
156	/// character is only permitted to match ASCII text.
157	ClassBytes(hir::ClassBytes),
158	/// This is pushed on to the stack upon first seeing any kind of group,
159	/// indicated by parentheses (including non-capturing groups). It is popped
160	/// upon leaving a group.
161	Group {
162	/// The old active flags when this group was opened.
163	///
164	/// If this group sets flags, then the new active flags are set to the
165	/// result of merging the old flags with the flags introduced by this
166	/// group. If the group doesn't set any flags, then this is simply
167	/// equivalent to whatever flags were set when the group was opened.
168	///
169	/// When this group is popped, the active flags should be restored to
170	/// the flags set here.
171	///
172	/// The "active" flags correspond to whatever flags are set in the
173	/// Translator.
174	old_flags: Flags,
175	},
176	/// This is pushed whenever a concatenation is observed. After visiting
177	/// every sub-expression in the concatenation, the translator's stack is
178	/// popped until it sees a Concat frame.
179	Concat,
180	/// This is pushed whenever an alternation is observed. After visiting
181	/// every sub-expression in the alternation, the translator's stack is
182	/// popped until it sees an Alternation frame.
183	Alternation,
184	}
185
186	impl HirFrame {
187	/// Assert that the current stack frame is an Hir expression and return it.
188	fn unwrap_expr(self) -> Hir {
189	match self {
190	HirFrame::Expr(expr) => expr,
191	_ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
192	}
193	}
194
195	/// Assert that the current stack frame is a Unicode class expression and
196	/// return it.
197	fn unwrap_class_unicode(self) -> hir::ClassUnicode {
198	match self {
199	HirFrame::ClassUnicode(cls) => cls,
200	_ => panic!(
201	"tried to unwrap Unicode class \
202	from HirFrame, got: {:?}",
203	self
204	),
205	}
206	}
207
208	/// Assert that the current stack frame is a byte class expression and
209	/// return it.
210	fn unwrap_class_bytes(self) -> hir::ClassBytes {
211	match self {
212	HirFrame::ClassBytes(cls) => cls,
213	_ => panic!(
214	"tried to unwrap byte class \
215	from HirFrame, got: {:?}",
216	self
217	),
218	}
219	}
220
221	/// Assert that the current stack frame is a group indicator and return
222	/// its corresponding flags (the flags that were active at the time the
223	/// group was entered).
224	fn unwrap_group(self) -> Flags {
225	match self {
226	HirFrame::Group { old_flags } => old_flags,
227	_ => {
228	panic!("tried to unwrap group from HirFrame, got: {:?}", self)
229	}
230	}
231	}
232	}
233
234	impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
235	type Output = Hir;
236	type Err = Error;
237
238	fn finish(self) -> Result<Hir> {
239	// ... otherwise, we should have exactly one HIR on the stack.
240	assert_eq!(self.trans().stack.borrow().len(), `1`);
241	Ok(self.pop().unwrap().unwrap_expr())
242	}
243
244	fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
245	match *ast {
246	Ast::Class(ast::Class::Bracketed(_)) => {
247	if self.flags().unicode() {
248	let cls = hir::ClassUnicode::empty();
249	self.push(HirFrame::ClassUnicode(cls));
250	} else {
251	let cls = hir::ClassBytes::empty();
252	self.push(HirFrame::ClassBytes(cls));
253	}
254	}
255	Ast::Group(ref x) => {
256	let old_flags = x
257	.flags()
258	.map(\|ast\| self.set_flags(ast))
259	.unwrap_or_else(\|\| self.flags());
260	self.push(HirFrame::Group { old_flags });
261	}
262	Ast::Concat(ref x) if x.asts.is_empty() => {}
263	Ast::Concat(_) => {
264	self.push(HirFrame::Concat);
265	}
266	Ast::Alternation(ref x) if x.asts.is_empty() => {}
267	Ast::Alternation(_) => {
268	self.push(HirFrame::Alternation);
269	}
270	_ => {}
271	}
272	Ok(())
273	}
274
275	fn visit_post(&mut self, ast: &Ast) -> Result<()> {
276	match *ast {
277	Ast::Empty(_) => {
278	self.push(HirFrame::Expr(Hir::empty()));
279	}
280	Ast::Flags(ref x) => {
281	self.set_flags(&x.flags);
282	// Flags in the AST are generally considered directives and
283	// not actual sub-expressions. However, they can be used in
284	// the concrete syntax like `((?i))`, and we need some kind of
285	// indication of an expression there, and Empty is the correct
286	// choice.
287	//
288	// There can also be things like `(?i)+`, but we rule those out
289	// in the parser. In the future, we might allow them for
290	// consistency sake.
291	self.push(HirFrame::Expr(Hir::empty()));
292	}
293	Ast::Literal(ref x) => {
294	self.push(HirFrame::Expr(self.hir_literal(x)?));
295	}
296	Ast::Dot(span) => {
297	self.push(HirFrame::Expr(self.hir_dot(span)?));
298	}
299	Ast::Assertion(ref x) => {
300	self.push(HirFrame::Expr(self.hir_assertion(x)?));
301	}
302	Ast::Class(ast::Class::Perl(ref x)) => {
303	if self.flags().unicode() {
304	let cls = self.hir_perl_unicode_class(x)?;
305	let hcls = hir::Class::Unicode(cls);
306	self.push(HirFrame::Expr(Hir::class(hcls)));
307	} else {
308	let cls = self.hir_perl_byte_class(x);
309	let hcls = hir::Class::Bytes(cls);
310	self.push(HirFrame::Expr(Hir::class(hcls)));
311	}
312	}
313	Ast::Class(ast::Class::Unicode(ref x)) => {
314	let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
315	self.push(HirFrame::Expr(Hir::class(cls)));
316	}
317	Ast::Class(ast::Class::Bracketed(ref ast)) => {
318	if self.flags().unicode() {
319	let mut cls = self.pop().unwrap().unwrap_class_unicode();
320	self.unicode_fold_and_negate(
321	&ast.span,
322	ast.negated,
323	&mut cls,
324	)?;
325	if cls.ranges().is_empty() {
326	return Err(self.error(
327	ast.span,
328	ErrorKind::EmptyClassNotAllowed,
329	));
330	}
331	let expr = Hir::class(hir::Class::Unicode(cls));
332	self.push(HirFrame::Expr(expr));
333	} else {
334	let mut cls = self.pop().unwrap().unwrap_class_bytes();
335	self.bytes_fold_and_negate(
336	&ast.span,
337	ast.negated,
338	&mut cls,
339	)?;
340	if cls.ranges().is_empty() {
341	return Err(self.error(
342	ast.span,
343	ErrorKind::EmptyClassNotAllowed,
344	));
345	}
346
347	let expr = Hir::class(hir::Class::Bytes(cls));
348	self.push(HirFrame::Expr(expr));
349	}
350	}
351	Ast::Repetition(ref x) => {
352	let expr = self.pop().unwrap().unwrap_expr();
353	self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
354	}
355	Ast::Group(ref x) => {
356	let expr = self.pop().unwrap().unwrap_expr();
357	let old_flags = self.pop().unwrap().unwrap_group();
358	self.trans().flags.set(old_flags);
359	self.push(HirFrame::Expr(self.hir_group(x, expr)));
360	}
361	Ast::Concat(_) => {
362	let mut exprs = vec![];
363	while let Some(HirFrame::Expr(expr)) = self.pop() {
364	if !expr.kind().is_empty() {
365	exprs.push(expr);
366	}
367	}
368	exprs.reverse();
369	self.push(HirFrame::Expr(Hir::concat(exprs)));
370	}
371	Ast::Alternation(_) => {
372	let mut exprs = vec![];
373	while let Some(HirFrame::Expr(expr)) = self.pop() {
374	exprs.push(expr);
375	}
376	exprs.reverse();
377	self.push(HirFrame::Expr(Hir::alternation(exprs)));
378	}
379	}
380	Ok(())
381	}
382
383	fn visit_class_set_item_pre(
384	&mut self,
385	ast: &ast::ClassSetItem,
386	) -> Result<()> {
387	match *ast {
388	ast::ClassSetItem::Bracketed(_) => {
389	if self.flags().unicode() {
390	let cls = hir::ClassUnicode::empty();
391	self.push(HirFrame::ClassUnicode(cls));
392	} else {
393	let cls = hir::ClassBytes::empty();
394	self.push(HirFrame::ClassBytes(cls));
395	}
396	}
397	// We needn't handle the Union case here since the visitor will
398	// do it for us.
399	_ => {}
400	}
401	Ok(())
402	}
403
404	fn visit_class_set_item_post(
405	&mut self,
406	ast: &ast::ClassSetItem,
407	) -> Result<()> {
408	match *ast {
409	ast::ClassSetItem::Empty(_) => {}
410	ast::ClassSetItem::Literal(ref x) => {
411	if self.flags().unicode() {
412	let mut cls = self.pop().unwrap().unwrap_class_unicode();
413	cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
414	self.push(HirFrame::ClassUnicode(cls));
415	} else {
416	let mut cls = self.pop().unwrap().unwrap_class_bytes();
417	let byte = self.class_literal_byte(x)?;
418	cls.push(hir::ClassBytesRange::new(byte, byte));
419	self.push(HirFrame::ClassBytes(cls));
420	}
421	}
422	ast::ClassSetItem::Range(ref x) => {
423	if self.flags().unicode() {
424	let mut cls = self.pop().unwrap().unwrap_class_unicode();
425	cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
426	self.push(HirFrame::ClassUnicode(cls));
427	} else {
428	let mut cls = self.pop().unwrap().unwrap_class_bytes();
429	let start = self.class_literal_byte(&x.start)?;
430	let end = self.class_literal_byte(&x.end)?;
431	cls.push(hir::ClassBytesRange::new(start, end));
432	self.push(HirFrame::ClassBytes(cls));
433	}
434	}
435	ast::ClassSetItem::Ascii(ref x) => {
436	if self.flags().unicode() {
437	let xcls = self.hir_ascii_unicode_class(x)?;
438	let mut cls = self.pop().unwrap().unwrap_class_unicode();
439	cls.union(&xcls);
440	self.push(HirFrame::ClassUnicode(cls));
441	} else {
442	let xcls = self.hir_ascii_byte_class(x)?;
443	let mut cls = self.pop().unwrap().unwrap_class_bytes();
444	cls.union(&xcls);
445	self.push(HirFrame::ClassBytes(cls));
446	}
447	}
448	ast::ClassSetItem::Unicode(ref x) => {
449	let xcls = self.hir_unicode_class(x)?;
450	let mut cls = self.pop().unwrap().unwrap_class_unicode();
451	cls.union(&xcls);
452	self.push(HirFrame::ClassUnicode(cls));
453	}
454	ast::ClassSetItem::Perl(ref x) => {
455	if self.flags().unicode() {
456	let xcls = self.hir_perl_unicode_class(x)?;
457	let mut cls = self.pop().unwrap().unwrap_class_unicode();
458	cls.union(&xcls);
459	self.push(HirFrame::ClassUnicode(cls));
460	} else {
461	let xcls = self.hir_perl_byte_class(x);
462	let mut cls = self.pop().unwrap().unwrap_class_bytes();
463	cls.union(&xcls);
464	self.push(HirFrame::ClassBytes(cls));
465	}
466	}
467	ast::ClassSetItem::Bracketed(ref ast) => {
468	if self.flags().unicode() {
469	let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
470	self.unicode_fold_and_negate(
471	&ast.span,
472	ast.negated,
473	&mut cls1,
474	)?;
475
476	let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
477	cls2.union(&cls1);
478	self.push(HirFrame::ClassUnicode(cls2));
479	} else {
480	let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
481	self.bytes_fold_and_negate(
482	&ast.span,
483	ast.negated,
484	&mut cls1,
485	)?;
486
487	let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
488	cls2.union(&cls1);
489	self.push(HirFrame::ClassBytes(cls2));
490	}
491	}
492	// This is handled automatically by the visitor.
493	ast::ClassSetItem::Union(_) => {}
494	}
495	Ok(())
496	}
497
498	fn visit_class_set_binary_op_pre(
499	&mut self,
500	_op: &ast::ClassSetBinaryOp,
501	) -> Result<()> {
502	if self.flags().unicode() {
503	let cls = hir::ClassUnicode::empty();
504	self.push(HirFrame::ClassUnicode(cls));
505	} else {
506	let cls = hir::ClassBytes::empty();
507	self.push(HirFrame::ClassBytes(cls));
508	}
509	Ok(())
510	}
511
512	fn visit_class_set_binary_op_in(
513	&mut self,
514	_op: &ast::ClassSetBinaryOp,
515	) -> Result<()> {
516	if self.flags().unicode() {
517	let cls = hir::ClassUnicode::empty();
518	self.push(HirFrame::ClassUnicode(cls));
519	} else {
520	let cls = hir::ClassBytes::empty();
521	self.push(HirFrame::ClassBytes(cls));
522	}
523	Ok(())
524	}
525
526	fn visit_class_set_binary_op_post(
527	&mut self,
528	op: &ast::ClassSetBinaryOp,
529	) -> Result<()> {
530	use crate::ast::ClassSetBinaryOpKind::*;
531
532	if self.flags().unicode() {
533	let mut rhs = self.pop().unwrap().unwrap_class_unicode();
534	let mut lhs = self.pop().unwrap().unwrap_class_unicode();
535	let mut cls = self.pop().unwrap().unwrap_class_unicode();
536	if self.flags().case_insensitive() {
537	rhs.try_case_fold_simple().map_err(\|_\| {
538	self.error(
539	op.rhs.span().clone(),
540	ErrorKind::UnicodeCaseUnavailable,
541	)
542	})?;
543	lhs.try_case_fold_simple().map_err(\|_\| {
544	self.error(
545	op.lhs.span().clone(),
546	ErrorKind::UnicodeCaseUnavailable,
547	)
548	})?;
549	}
550	match op.kind {
551	Intersection => lhs.intersect(&rhs),
552	Difference => lhs.difference(&rhs),
553	SymmetricDifference => lhs.symmetric_difference(&rhs),
554	}
555	cls.union(&lhs);
556	self.push(HirFrame::ClassUnicode(cls));
557	} else {
558	let mut rhs = self.pop().unwrap().unwrap_class_bytes();
559	let mut lhs = self.pop().unwrap().unwrap_class_bytes();
560	let mut cls = self.pop().unwrap().unwrap_class_bytes();
561	if self.flags().case_insensitive() {
562	rhs.case_fold_simple();
563	lhs.case_fold_simple();
564	}
565	match op.kind {
566	Intersection => lhs.intersect(&rhs),
567	Difference => lhs.difference(&rhs),
568	SymmetricDifference => lhs.symmetric_difference(&rhs),
569	}
570	cls.union(&lhs);
571	self.push(HirFrame::ClassBytes(cls));
572	}
573	Ok(())
574	}
575	}
576
577	/// The internal implementation of a translator.
578	///
579	/// This type is responsible for carrying around the original pattern string,
580	/// which is not tied to the internal state of a translator.
581	///
582	/// A TranslatorI exists for the time it takes to translate a single Ast.
583	#[derive(Clone, Debug)]
584	struct TranslatorI<'t, 'p> {
585	trans: &'t Translator,
586	pattern: &'p str,
587	}
588
589	impl<'t, 'p> TranslatorI<'t, 'p> {
590	/// Build a new internal translator.
591	fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
592	TranslatorI { trans, pattern }
593	}
594
595	/// Return a reference to the underlying translator.
596	fn trans(&self) -> &Translator {
597	&self.trans
598	}
599
600	/// Push the given frame on to the call stack.
601	fn push(&self, frame: HirFrame) {
602	self.trans().stack.borrow_mut().push(frame);
603	}
604
605	/// Pop the top of the call stack. If the call stack is empty, return None.
606	fn pop(&self) -> Option<HirFrame> {
607	self.trans().stack.borrow_mut().pop()
608	}
609
610	/// Create a new error with the given span and error type.
611	fn error(&self, span: Span, kind: ErrorKind) -> Error {
612	Error { kind, pattern: self.pattern.to_string(), span }
613	}
614
615	/// Return a copy of the active flags.
616	fn flags(&self) -> Flags {
617	self.trans().flags.get()
618	}
619
620	/// Set the flags of this translator from the flags set in the given AST.
621	/// Then, return the old flags.
622	fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
623	let old_flags = self.flags();
624	let mut new_flags = Flags::from_ast(ast_flags);
625	new_flags.merge(&old_flags);
626	self.trans().flags.set(new_flags);
627	old_flags
628	}
629
630	fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
631	let ch = match self.literal_to_char(lit)? {
632	byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
633	hir::Literal::Unicode(ch) => ch,
634	};
635	if self.flags().case_insensitive() {
636	self.hir_from_char_case_insensitive(lit.span, ch)
637	} else {
638	self.hir_from_char(lit.span, ch)
639	}
640	}
641
642	/// Convert an Ast literal to its scalar representation.
643	///
644	/// When Unicode mode is enabled, then this always succeeds and returns a
645	/// `char` (Unicode scalar value).
646	///
647	/// When Unicode mode is disabled, then a raw byte is returned. If that
648	/// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
649	/// an error.
650	fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
651	if self.flags().unicode() {
652	return Ok(hir::Literal::Unicode(lit.c));
653	}
654	let byte = match lit.byte() {
655	None => return Ok(hir::Literal::Unicode(lit.c)),
656	Some(byte) => byte,
657	};
658	if byte <= `0x7F` {
659	return Ok(hir::Literal::Unicode(byte as char));
660	}
661	if !self.trans().allow_invalid_utf8 {
662	return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
663	}
664	Ok(hir::Literal::Byte(byte))
665	}
666
667	fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
668	if !self.flags().unicode() && c.len_utf8() > `1` {
669	return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
670	}
671	Ok(Hir::literal(hir::Literal::Unicode(c)))
672	}
673
674	fn hir_from_char_case_insensitive(
675	&self,
676	span: Span,
677	c: char,
678	) -> Result<Hir> {
679	if self.flags().unicode() {
680	// If case folding won't do anything, then don't bother trying.
681	let map =
682	unicode::contains_simple_case_mapping(c, c).map_err(\|_\| {
683	self.error(span, ErrorKind::UnicodeCaseUnavailable)
684	})?;
685	if !map {
686	return self.hir_from_char(span, c);
687	}
688	let mut cls =
689	hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
690	c, c,
691	)]);
692	cls.try_case_fold_simple().map_err(\|_\| {
693	self.error(span, ErrorKind::UnicodeCaseUnavailable)
694	})?;
695	Ok(Hir::class(hir::Class::Unicode(cls)))
696	} else {
697	if c.len_utf8() > `1` {
698	return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
699	}
700	// If case folding won't do anything, then don't bother trying.
701	match c {
702	'A'..='Z' \| 'a'..='z' => {}
703	_ => return self.hir_from_char(span, c),
704	}
705	let mut cls =
706	hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
707	c as u8, c as u8,
708	)]);
709	cls.case_fold_simple();
710	Ok(Hir::class(hir::Class::Bytes(cls)))
711	}
712	}
713
714	fn hir_dot(&self, span: Span) -> Result<Hir> {
715	let unicode = self.flags().unicode();
716	if !unicode && !self.trans().allow_invalid_utf8 {
717	return Err(self.error(span, ErrorKind::InvalidUtf8));
718	}
719	Ok(if self.flags().dot_matches_new_line() {
720	Hir::any(!unicode)
721	} else {
722	Hir::dot(!unicode)
723	})
724	}
725
726	fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
727	let unicode = self.flags().unicode();
728	let multi_line = self.flags().multi_line();
729	Ok(match asst.kind {
730	ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
731	hir::Anchor::StartLine
732	} else {
733	hir::Anchor::StartText
734	}),
735	ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
736	hir::Anchor::EndLine
737	} else {
738	hir::Anchor::EndText
739	}),
740	ast::AssertionKind::StartText => {
741	Hir::anchor(hir::Anchor::StartText)
742	}
743	ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
744	ast::AssertionKind::WordBoundary => {
745	Hir::word_boundary(if unicode {
746	hir::WordBoundary::Unicode
747	} else {
748	hir::WordBoundary::Ascii
749	})
750	}
751	ast::AssertionKind::NotWordBoundary => {
752	Hir::word_boundary(if unicode {
753	hir::WordBoundary::UnicodeNegate
754	} else {
755	// It is possible for negated ASCII word boundaries to
756	// match at invalid UTF-8 boundaries, even when searching
757	// valid UTF-8.
758	if !self.trans().allow_invalid_utf8 {
759	return Err(
760	self.error(asst.span, ErrorKind::InvalidUtf8)
761	);
762	}
763	hir::WordBoundary::AsciiNegate
764	})
765	}
766	})
767	}
768
769	fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
770	let kind = match group.kind {
771	ast::GroupKind::CaptureIndex(idx) => {
772	hir::GroupKind::CaptureIndex(idx)
773	}
774	ast::GroupKind::CaptureName(ref capname) => {
775	hir::GroupKind::CaptureName {
776	name: capname.name.clone(),
777	index: capname.index,
778	}
779	}
780	ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
781	};
782	Hir::group(hir::Group { kind, hir: Box::new(expr) })
783	}
784
785	fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
786	let kind = match rep.op.kind {
787	ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
788	ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
789	ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
790	ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
791	hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
792	}
793	ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
794	hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
795	}
796	ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
797	m,
798	n,
799	)) => {
800	hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
801	}
802	};
803	let greedy =
804	if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
805	Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) })
806	}
807
808	fn hir_unicode_class(
809	&self,
810	ast_class: &ast::ClassUnicode,
811	) -> Result<hir::ClassUnicode> {
812	use crate::ast::ClassUnicodeKind::*;
813
814	if !self.flags().unicode() {
815	return Err(
816	self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
817	);
818	}
819	let query = match ast_class.kind {
820	OneLetter(name) => ClassQuery::OneLetter(name),
821	Named(ref name) => ClassQuery::Binary(name),
822	NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
823	property_name: name,
824	property_value: value,
825	},
826	};
827	let mut result = self.convert_unicode_class_error(
828	&ast_class.span,
829	unicode::class(query),
830	);
831	if let Ok(ref mut class) = result {
832	self.unicode_fold_and_negate(
833	&ast_class.span,
834	ast_class.negated,
835	class,
836	)?;
837	if class.ranges().is_empty() {
838	let err = self
839	.error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
840	return Err(err);
841	}
842	}
843	result
844	}
845
846	fn hir_ascii_unicode_class(
847	&self,
848	ast: &ast::ClassAscii,
849	) -> Result<hir::ClassUnicode> {
850	let mut cls = hir::ClassUnicode::new(
851	ascii_class(&ast.kind)
852	.iter()
853	.map(\|&(s, e)\| hir::ClassUnicodeRange::new(s, e)),
854	);
855	self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
856	Ok(cls)
857	}
858
859	fn hir_ascii_byte_class(
860	&self,
861	ast: &ast::ClassAscii,
862	) -> Result<hir::ClassBytes> {
863	let mut cls = hir::ClassBytes::new(
864	ascii_class(&ast.kind)
865	.iter()
866	.map(\|&(s, e)\| hir::ClassBytesRange::new(s as u8, e as u8)),
867	);
868	self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
869	Ok(cls)
870	}
871
872	fn hir_perl_unicode_class(
873	&self,
874	ast_class: &ast::ClassPerl,
875	) -> Result<hir::ClassUnicode> {
876	use crate::ast::ClassPerlKind::*;
877
878	assert!(self.flags().unicode());
879	let result = match ast_class.kind {
880	Digit => unicode::perl_digit(),
881	Space => unicode::perl_space(),
882	Word => unicode::perl_word(),
883	};
884	let mut class =
885	self.convert_unicode_class_error(&ast_class.span, result)?;
886	// We needn't apply case folding here because the Perl Unicode classes
887	// are already closed under Unicode simple case folding.
888	if ast_class.negated {
889	class.negate();
890	}
891	Ok(class)
892	}
893
894	fn hir_perl_byte_class(
895	&self,
896	ast_class: &ast::ClassPerl,
897	) -> hir::ClassBytes {
898	use crate::ast::ClassPerlKind::*;
899
900	assert!(!self.flags().unicode());
901	let mut class = match ast_class.kind {
902	Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
903	Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
904	Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
905	};
906	// We needn't apply case folding here because the Perl ASCII classes
907	// are already closed (under ASCII case folding).
908	if ast_class.negated {
909	class.negate();
910	}
911	class
912	}
913
914	/// Converts the given Unicode specific error to an HIR translation error.
915	///
916	/// The span given should approximate the position at which an error would
917	/// occur.
918	fn convert_unicode_class_error(
919	&self,
920	span: &Span,
921	result: unicode::Result<hir::ClassUnicode>,
922	) -> Result<hir::ClassUnicode> {
923	result.map_err(\|err\| {
924	let sp = span.clone();
925	match err {
926	unicode::Error::PropertyNotFound => {
927	self.error(sp, ErrorKind::UnicodePropertyNotFound)
928	}
929	unicode::Error::PropertyValueNotFound => {
930	self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
931	}
932	unicode::Error::PerlClassNotFound => {
933	self.error(sp, ErrorKind::UnicodePerlClassNotFound)
934	}
935	}
936	})
937	}
938
939	fn unicode_fold_and_negate(
940	&self,
941	span: &Span,
942	negated: bool,
943	class: &mut hir::ClassUnicode,
944	) -> Result<()> {
945	// Note that we must apply case folding before negation!
946	// Consider `(?i)[^x]`. If we applied negation field, then
947	// the result would be the character class that matched any
948	// Unicode scalar value.
949	if self.flags().case_insensitive() {
950	class.try_case_fold_simple().map_err(\|_\| {
951	self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
952	})?;
953	}
954	if negated {
955	class.negate();
956	}
957	Ok(())
958	}
959
960	fn bytes_fold_and_negate(
961	&self,
962	span: &Span,
963	negated: bool,
964	class: &mut hir::ClassBytes,
965	) -> Result<()> {
966	// Note that we must apply case folding before negation!
967	// Consider `(?i)[^x]`. If we applied negation first, then
968	// the result would be the character class that matched any
969	// Unicode scalar value.
970	if self.flags().case_insensitive() {
971	class.case_fold_simple();
972	}
973	if negated {
974	class.negate();
975	}
976	if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
977	return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
978	}
979	Ok(())
980	}
981
982	/// Return a scalar byte value suitable for use as a literal in a byte
983	/// character class.
984	fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
985	match self.literal_to_char(ast)? {
986	hir::Literal::Byte(byte) => Ok(byte),
987	hir::Literal::Unicode(ch) => {
988	if ch <= `0x7F` as char {
989	Ok(ch as u8)
990	} else {
991	// We can't feasibly support Unicode in
992	// byte oriented classes. Byte classes don't
993	// do Unicode case folding.
994	Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
995	}
996	}
997	}
998	}
999	}
1000
1001	/// A translator's representation of a regular expression's flags at any given
1002	/// moment in time.
1003	///
1004	/// Each flag can be in one of three states: absent, present but disabled or
1005	/// present but enabled.
1006	#[derive(Clone, Copy, Debug, Default)]
1007	struct Flags {
1008	case_insensitive: Option<bool>,
1009	multi_line: Option<bool>,
1010	dot_matches_new_line: Option<bool>,
1011	swap_greed: Option<bool>,
1012	unicode: Option<bool>,
1013	// Note that `ignore_whitespace` is omitted here because it is handled
1014	// entirely in the parser.
1015	}
1016
1017	impl Flags {
1018	fn from_ast(ast: &ast::Flags) -> Flags {
1019	let mut flags = Flags::default();
1020	let mut enable = `true`;
1021	for item in &ast.items {
1022	match item.kind {
1023	ast::FlagsItemKind::Negation => {
1024	enable = `false`;
1025	}
1026	ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1027	flags.case_insensitive = Some(enable);
1028	}
1029	ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1030	flags.multi_line = Some(enable);
1031	}
1032	ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1033	flags.dot_matches_new_line = Some(enable);
1034	}
1035	ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1036	flags.swap_greed = Some(enable);
1037	}
1038	ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1039	flags.unicode = Some(enable);
1040	}
1041	ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1042	}
1043	}
1044	flags
1045	}
1046
1047	fn merge(&mut self, previous: &Flags) {
1048	if self.case_insensitive.is_none() {
1049	self.case_insensitive = previous.case_insensitive;
1050	}
1051	if self.multi_line.is_none() {
1052	self.multi_line = previous.multi_line;
1053	}
1054	if self.dot_matches_new_line.is_none() {
1055	self.dot_matches_new_line = previous.dot_matches_new_line;
1056	}
1057	if self.swap_greed.is_none() {
1058	self.swap_greed = previous.swap_greed;
1059	}
1060	if self.unicode.is_none() {
1061	self.unicode = previous.unicode;
1062	}
1063	}
1064
1065	fn case_insensitive(&self) -> bool {
1066	self.case_insensitive.unwrap_or(`false`)
1067	}
1068
1069	fn multi_line(&self) -> bool {
1070	self.multi_line.unwrap_or(`false`)
1071	}
1072
1073	fn dot_matches_new_line(&self) -> bool {
1074	self.dot_matches_new_line.unwrap_or(`false`)
1075	}
1076
1077	fn swap_greed(&self) -> bool {
1078	self.swap_greed.unwrap_or(`false`)
1079	}
1080
1081	fn unicode(&self) -> bool {
1082	self.unicode.unwrap_or(`true`)
1083	}
1084	}
1085
1086	fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1087	let ranges: Vec<_> = ascii_class(kind)
1088	.iter()
1089	.cloned()
1090	.map(\|(s, e)\| hir::ClassBytesRange::new(s as u8, e as u8))
1091	.collect();
1092	hir::ClassBytes::new(ranges)
1093	}
1094
1095	fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1096	use crate::ast::ClassAsciiKind::*;
1097	match *kind {
1098	Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1099	Alpha => &[('A', 'Z'), ('a', 'z')],
1100	Ascii => &[('`\x00`', '`\x7F`')],
1101	Blank => &[('`\t`', '`\t`'), (' ', ' ')],
1102	Cntrl => &[('`\x00`', '`\x1F`'), ('`\x7F`', '`\x7F`')],
1103	Digit => &[('0', '9')],
1104	Graph => &[('!', '~')],
1105	Lower => &[('a', 'z')],
1106	Print => &[(' ', '~')],
1107	Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1108	Space => &[
1109	('`\t`', '`\t`'),
1110	('`\n`', '`\n`'),
1111	('`\x0B`', '`\x0B`'),
1112	('`\x0C`', '`\x0C`'),
1113	('`\r`', '`\r`'),
1114	(' ', ' '),
1115	],
1116	Upper => &[('A', 'Z')],
1117	Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1118	Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1119	}
1120	}
1121
1122	#[cfg(test)]
1123	mod tests {
1124	use crate::ast::parse::ParserBuilder;
1125	use crate::ast::{self, Ast, Position, Span};
1126	use crate::hir::{self, Hir, HirKind};
1127	use crate::unicode::{self, ClassQuery};
1128
1129	use super::{ascii_class, TranslatorBuilder};
1130
1131	// We create these errors to compare with real hir::Errors in the tests.
1132	// We define equality between TestError and hir::Error to disregard the
1133	// pattern string in hir::Error, which is annoying to provide in tests.
1134	#[derive(Clone, Debug)]
1135	struct TestError {
1136	span: Span,
1137	kind: hir::ErrorKind,
1138	}
1139
1140	impl PartialEq<hir::Error> for TestError {
1141	fn eq(&self, other: &hir::Error) -> bool {
1142	self.span == other.span && self.kind == other.kind
1143	}
1144	}
1145
1146	impl PartialEq<TestError> for hir::Error {
1147	fn eq(&self, other: &TestError) -> bool {
1148	self.span == other.span && self.kind == other.kind
1149	}
1150	}
1151
1152	fn parse(pattern: &str) -> Ast {
1153	ParserBuilder::new().octal(`true`).build().parse(pattern).unwrap()
1154	}
1155
1156	fn t(pattern: &str) -> Hir {
1157	TranslatorBuilder::new()
1158	.allow_invalid_utf8(`false`)
1159	.build()
1160	.translate(pattern, &parse(pattern))
1161	.unwrap()
1162	}
1163
1164	fn t_err(pattern: &str) -> hir::Error {
1165	TranslatorBuilder::new()
1166	.allow_invalid_utf8(`false`)
1167	.build()
1168	.translate(pattern, &parse(pattern))
1169	.unwrap_err()
1170	}
1171
1172	fn t_bytes(pattern: &str) -> Hir {
1173	TranslatorBuilder::new()
1174	.allow_invalid_utf8(`true`)
1175	.build()
1176	.translate(pattern, &parse(pattern))
1177	.unwrap()
1178	}
1179
1180	fn hir_lit(s: &str) -> Hir {
1181	match s.len() {
1182	`0` => Hir::empty(),
1183	_ => {
1184	let lits = s
1185	.chars()
1186	.map(hir::Literal::Unicode)
1187	.map(Hir::literal)
1188	.collect();
1189	Hir::concat(lits)
1190	}
1191	}
1192	}
1193
1194	fn hir_blit(s: &[u8]) -> Hir {
1195	match s.len() {
1196	`0` => Hir::empty(),
1197	`1` => Hir::literal(hir::Literal::Byte(s[`0`])),
1198	_ => {
1199	let lits = s
1200	.iter()
1201	.cloned()
1202	.map(hir::Literal::Byte)
1203	.map(Hir::literal)
1204	.collect();
1205	Hir::concat(lits)
1206	}
1207	}
1208	}
1209
1210	fn hir_group(i: u32, expr: Hir) -> Hir {
1211	Hir::group(hir::Group {
1212	kind: hir::GroupKind::CaptureIndex(i),
1213	hir: Box::new(expr),
1214	})
1215	}
1216
1217	fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1218	Hir::group(hir::Group {
1219	kind: hir::GroupKind::CaptureName {
1220	name: name.to_string(),
1221	index: i,
1222	},
1223	hir: Box::new(expr),
1224	})
1225	}
1226
1227	fn hir_group_nocap(expr: Hir) -> Hir {
1228	Hir::group(hir::Group {
1229	kind: hir::GroupKind::NonCapturing,
1230	hir: Box::new(expr),
1231	})
1232	}
1233
1234	fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1235	Hir::repetition(hir::Repetition {
1236	kind: hir::RepetitionKind::ZeroOrOne,
1237	greedy,
1238	hir: Box::new(expr),
1239	})
1240	}
1241
1242	fn hir_star(greedy: bool, expr: Hir) -> Hir {
1243	Hir::repetition(hir::Repetition {
1244	kind: hir::RepetitionKind::ZeroOrMore,
1245	greedy,
1246	hir: Box::new(expr),
1247	})
1248	}
1249
1250	fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1251	Hir::repetition(hir::Repetition {
1252	kind: hir::RepetitionKind::OneOrMore,
1253	greedy,
1254	hir: Box::new(expr),
1255	})
1256	}
1257
1258	fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1259	Hir::repetition(hir::Repetition {
1260	kind: hir::RepetitionKind::Range(range),
1261	greedy,
1262	hir: Box::new(expr),
1263	})
1264	}
1265
1266	fn hir_alt(alts: Vec<Hir>) -> Hir {
1267	Hir::alternation(alts)
1268	}
1269
1270	fn hir_cat(exprs: Vec<Hir>) -> Hir {
1271	Hir::concat(exprs)
1272	}
1273
1274	#[allow(dead_code)]
1275	fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1276	Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1277	}
1278
1279	#[allow(dead_code)]
1280	fn hir_uclass_perl_word() -> Hir {
1281	Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1282	}
1283
1284	fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1285	let ranges: Vec<hir::ClassUnicodeRange> = ranges
1286	.iter()
1287	.map(\|&(s, e)\| hir::ClassUnicodeRange::new(s, e))
1288	.collect();
1289	Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1290	}
1291
1292	fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1293	let ranges: Vec<hir::ClassBytesRange> = ranges
1294	.iter()
1295	.map(\|&(s, e)\| hir::ClassBytesRange::new(s, e))
1296	.collect();
1297	Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1298	}
1299
1300	fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1301	let ranges: Vec<hir::ClassBytesRange> = ranges
1302	.iter()
1303	.map(\|&(s, e)\| {
1304	assert!(s as u32 <= `0x7F`);
1305	assert!(e as u32 <= `0x7F`);
1306	hir::ClassBytesRange::new(s as u8, e as u8)
1307	})
1308	.collect();
1309	Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1310	}
1311
1312	fn hir_case_fold(expr: Hir) -> Hir {
1313	match expr.into_kind() {
1314	HirKind::Class(mut cls) => {
1315	cls.case_fold_simple();
1316	Hir::class(cls)
1317	}
1318	_ => panic!("cannot case fold non-class Hir expr"),
1319	}
1320	}
1321
1322	fn hir_negate(expr: Hir) -> Hir {
1323	match expr.into_kind() {
1324	HirKind::Class(mut cls) => {
1325	cls.negate();
1326	Hir::class(cls)
1327	}
1328	_ => panic!("cannot negate non-class Hir expr"),
1329	}
1330	}
1331
1332	#[allow(dead_code)]
1333	fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1334	use crate::hir::Class::{Bytes, Unicode};
1335
1336	match (expr1.into_kind(), expr2.into_kind()) {
1337	(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1338	c1.union(&c2);
1339	Hir::class(hir::Class::Unicode(c1))
1340	}
1341	(HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1342	c1.union(&c2);
1343	Hir::class(hir::Class::Bytes(c1))
1344	}
1345	_ => panic!("cannot union non-class Hir exprs"),
1346	}
1347	}
1348
1349	#[allow(dead_code)]
1350	fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1351	use crate::hir::Class::{Bytes, Unicode};
1352
1353	match (expr1.into_kind(), expr2.into_kind()) {
1354	(HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1355	c1.difference(&c2);
1356	Hir::class(hir::Class::Unicode(c1))
1357	}
1358	(HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1359	c1.difference(&c2);
1360	Hir::class(hir::Class::Bytes(c1))
1361	}
1362	_ => panic!("cannot difference non-class Hir exprs"),
1363	}
1364	}
1365
1366	fn hir_anchor(anchor: hir::Anchor) -> Hir {
1367	Hir::anchor(anchor)
1368	}
1369
1370	fn hir_word(wb: hir::WordBoundary) -> Hir {
1371	Hir::word_boundary(wb)
1372	}
1373
1374	#[test]
1375	fn empty() {
1376	assert_eq!(t(""), Hir::empty());
1377	assert_eq!(t("(?i)"), Hir::empty());
1378	assert_eq!(t("()"), hir_group(`1`, Hir::empty()));
1379	assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1380	assert_eq!(t("(?P<wat>)"), hir_group_name(`1`, "wat", Hir::empty()));
1381	assert_eq!(t("\|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1382	assert_eq!(
1383	t("()\|()"),
1384	hir_alt(vec![
1385	hir_group(`1`, Hir::empty()),
1386	hir_group(`2`, Hir::empty()),
1387	])
1388	);
1389	assert_eq!(
1390	t("(\|b)"),
1391	hir_group(`1`, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1392	);
1393	assert_eq!(
1394	t("(a\|)"),
1395	hir_group(`1`, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1396	);
1397	assert_eq!(
1398	t("(a\|\|c)"),
1399	hir_group(
1400	`1`,
1401	hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1402	)
1403	);
1404	assert_eq!(
1405	t("(\|\|)"),
1406	hir_group(
1407	`1`,
1408	hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1409	)
1410	);
1411	}
1412
1413	#[test]
1414	fn literal() {
1415	assert_eq!(t("a"), hir_lit("a"));
1416	assert_eq!(t("(?-u)a"), hir_lit("a"));
1417	assert_eq!(t("☃"), hir_lit("☃"));
1418	assert_eq!(t("abcd"), hir_lit("abcd"));
1419
1420	assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1421	assert_eq!(t_bytes("(?-u)`\x61`"), hir_lit("a"));
1422	assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1423	assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"`\xFF`"));
1424
1425	assert_eq!(
1426	t_err("(?-u)☃"),
1427	TestError {
1428	kind: hir::ErrorKind::UnicodeNotAllowed,
1429	span: Span::new(
1430	Position::new(`5`, `1`, `6`),
1431	Position::new(`8`, `1`, `7`)
1432	),
1433	}
1434	);
1435	assert_eq!(
1436	t_err(r"(?-u)\xFF"),
1437	TestError {
1438	kind: hir::ErrorKind::InvalidUtf8,
1439	span: Span::new(
1440	Position::new(`5`, `1`, `6`),
1441	Position::new(`9`, `1`, `10`)
1442	),
1443	}
1444	);
1445	}
1446
1447	#[test]
1448	fn literal_case_insensitive() {
1449	#[cfg(feature = "unicode-case")]
1450	assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1451	#[cfg(feature = "unicode-case")]
1452	assert_eq!(
1453	t("(?i:a)"),
1454	hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1455	);
1456	#[cfg(feature = "unicode-case")]
1457	assert_eq!(
1458	t("a(?i)a(?-i)a"),
1459	hir_cat(vec![
1460	hir_lit("a"),
1461	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1462	hir_lit("a"),
1463	])
1464	);
1465	#[cfg(feature = "unicode-case")]
1466	assert_eq!(
1467	t("(?i)ab@c"),
1468	hir_cat(vec![
1469	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1470	hir_uclass(&[('B', 'B'), ('b', 'b')]),
1471	hir_lit("@"),
1472	hir_uclass(&[('C', 'C'), ('c', 'c')]),
1473	])
1474	);
1475	#[cfg(feature = "unicode-case")]
1476	assert_eq!(
1477	t("(?i)β"),
1478	hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1479	);
1480
1481	assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1482	#[cfg(feature = "unicode-case")]
1483	assert_eq!(
1484	t("(?-u)a(?i)a(?-i)a"),
1485	hir_cat(vec![
1486	hir_lit("a"),
1487	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1488	hir_lit("a"),
1489	])
1490	);
1491	assert_eq!(
1492	t("(?i-u)ab@c"),
1493	hir_cat(vec![
1494	hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1495	hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1496	hir_lit("@"),
1497	hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1498	])
1499	);
1500
1501	assert_eq!(
1502	t_bytes("(?i-u)a"),
1503	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1504	);
1505	assert_eq!(
1506	t_bytes("(?i-u)`\x61`"),
1507	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1508	);
1509	assert_eq!(
1510	t_bytes(r"(?i-u)\x61"),
1511	hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1512	);
1513	assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"`\xFF`"));
1514
1515	assert_eq!(
1516	t_err("(?i-u)β"),
1517	TestError {
1518	kind: hir::ErrorKind::UnicodeNotAllowed,
1519	span: Span::new(
1520	Position::new(`6`, `1`, `7`),
1521	Position::new(`8`, `1`, `8`),
1522	),
1523	}
1524	);
1525	}
1526
1527	#[test]
1528	fn dot() {
1529	assert_eq!(
1530	t("."),
1531	hir_uclass(&[('`\0`', '`\t`'), ('`\x0B`', '`\u{10FFFF}`'),])
1532	);
1533	assert_eq!(t("(?s)."), hir_uclass(&[('`\0`', '`\u{10FFFF}`'),]));
1534	assert_eq!(
1535	t_bytes("(?-u)."),
1536	hir_bclass(&[(b'`\0`', b'`\t`'), (b'`\x0B`', b'`\xFF`'),])
1537	);
1538	assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'`\0`', b'`\xFF`'),]));
1539
1540	// If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1541	assert_eq!(
1542	t_err("(?-u)."),
1543	TestError {
1544	kind: hir::ErrorKind::InvalidUtf8,
1545	span: Span::new(
1546	Position::new(`5`, `1`, `6`),
1547	Position::new(`6`, `1`, `7`)
1548	),
1549	}
1550	);
1551	assert_eq!(
1552	t_err("(?s-u)."),
1553	TestError {
1554	kind: hir::ErrorKind::InvalidUtf8,
1555	span: Span::new(
1556	Position::new(`6`, `1`, `7`),
1557	Position::new(`7`, `1`, `8`)
1558	),
1559	}
1560	);
1561	}
1562
1563	#[test]
1564	fn assertions() {
1565	assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1566	assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1567	assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1568	assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1569	assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1570	assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1571	assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1572	assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1573
1574	assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1575	assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1576	assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1577	assert_eq!(
1578	t_bytes(r"(?-u)\B"),
1579	hir_word(hir::WordBoundary::AsciiNegate)
1580	);
1581
1582	assert_eq!(
1583	t_err(r"(?-u)\B"),
1584	TestError {
1585	kind: hir::ErrorKind::InvalidUtf8,
1586	span: Span::new(
1587	Position::new(`5`, `1`, `6`),
1588	Position::new(`7`, `1`, `8`)
1589	),
1590	}
1591	);
1592	}
1593
1594	#[test]
1595	fn group() {
1596	assert_eq!(t("(a)"), hir_group(`1`, hir_lit("a")));
1597	assert_eq!(
1598	t("(a)(b)"),
1599	hir_cat(vec![
1600	hir_group(`1`, hir_lit("a")),
1601	hir_group(`2`, hir_lit("b")),
1602	])
1603	);
1604	assert_eq!(
1605	t("(a)\|(b)"),
1606	hir_alt(vec![
1607	hir_group(`1`, hir_lit("a")),
1608	hir_group(`2`, hir_lit("b")),
1609	])
1610	);
1611	assert_eq!(t("(?P<foo>)"), hir_group_name(`1`, "foo", Hir::empty()));
1612	assert_eq!(t("(?P<foo>a)"), hir_group_name(`1`, "foo", hir_lit("a")));
1613	assert_eq!(
1614	t("(?P<foo>a)(?P<bar>b)"),
1615	hir_cat(vec![
1616	hir_group_name(`1`, "foo", hir_lit("a")),
1617	hir_group_name(`2`, "bar", hir_lit("b")),
1618	])
1619	);
1620	assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1621	assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1622	assert_eq!(
1623	t("(?:a)(b)"),
1624	hir_cat(vec![
1625	hir_group_nocap(hir_lit("a")),
1626	hir_group(`1`, hir_lit("b")),
1627	])
1628	);
1629	assert_eq!(
1630	t("(a)(?:b)(c)"),
1631	hir_cat(vec![
1632	hir_group(`1`, hir_lit("a")),
1633	hir_group_nocap(hir_lit("b")),
1634	hir_group(`2`, hir_lit("c")),
1635	])
1636	);
1637	assert_eq!(
1638	t("(a)(?P<foo>b)(c)"),
1639	hir_cat(vec![
1640	hir_group(`1`, hir_lit("a")),
1641	hir_group_name(`2`, "foo", hir_lit("b")),
1642	hir_group(`3`, hir_lit("c")),
1643	])
1644	);
1645	assert_eq!(t("()"), hir_group(`1`, Hir::empty()));
1646	assert_eq!(t("((?i))"), hir_group(`1`, Hir::empty()));
1647	assert_eq!(t("((?x))"), hir_group(`1`, Hir::empty()));
1648	assert_eq!(t("(((?x)))"), hir_group(`1`, hir_group(`2`, Hir::empty())));
1649	}
1650
1651	#[test]
1652	fn flags() {
1653	#[cfg(feature = "unicode-case")]
1654	assert_eq!(
1655	t("(?i:a)a"),
1656	hir_cat(vec![
1657	hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1658	hir_lit("a"),
1659	])
1660	);
1661	assert_eq!(
1662	t("(?i-u:a)β"),
1663	hir_cat(vec![
1664	hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1665	hir_lit("β"),
1666	])
1667	);
1668	assert_eq!(
1669	t("(?:(?i-u)a)b"),
1670	hir_cat(vec![
1671	hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1672	hir_lit("b"),
1673	])
1674	);
1675	assert_eq!(
1676	t("((?i-u)a)b"),
1677	hir_cat(vec![
1678	hir_group(`1`, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1679	hir_lit("b"),
1680	])
1681	);
1682	#[cfg(feature = "unicode-case")]
1683	assert_eq!(
1684	t("(?i)(?-i:a)a"),
1685	hir_cat(vec![
1686	hir_group_nocap(hir_lit("a")),
1687	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1688	])
1689	);
1690	#[cfg(feature = "unicode-case")]
1691	assert_eq!(
1692	t("(?im)a^"),
1693	hir_cat(vec![
1694	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1695	hir_anchor(hir::Anchor::StartLine),
1696	])
1697	);
1698	#[cfg(feature = "unicode-case")]
1699	assert_eq!(
1700	t("(?im)a^(?i-m)a^"),
1701	hir_cat(vec![
1702	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1703	hir_anchor(hir::Anchor::StartLine),
1704	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1705	hir_anchor(hir::Anchor::StartText),
1706	])
1707	);
1708	assert_eq!(
1709	t("(?U)aa?(?-U)aa?"),
1710	hir_cat(vec![
1711	hir_star(`false`, hir_lit("a")),
1712	hir_star(`true`, hir_lit("a")),
1713	hir_star(`true`, hir_lit("a")),
1714	hir_star(`false`, hir_lit("a")),
1715	])
1716	);
1717	#[cfg(feature = "unicode-case")]
1718	assert_eq!(
1719	t("(?:a(?i)a)a"),
1720	hir_cat(vec![
1721	hir_group_nocap(hir_cat(vec![
1722	hir_lit("a"),
1723	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1724	])),
1725	hir_lit("a"),
1726	])
1727	);
1728	#[cfg(feature = "unicode-case")]
1729	assert_eq!(
1730	t("(?i)(?:a(?-i)a)a"),
1731	hir_cat(vec![
1732	hir_group_nocap(hir_cat(vec![
1733	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1734	hir_lit("a"),
1735	])),
1736	hir_uclass(&[('A', 'A'), ('a', 'a')]),
1737	])
1738	);
1739	}
1740
1741	#[test]
1742	fn escape() {
1743	assert_eq!(
1744	t(r"\\\.\+\*\?\\|\[\]\{\}\^\$\#"),
1745	hir_lit(r"\.+*?()\|[]{}^$#")
1746	);
1747	}
1748
1749	#[test]
1750	fn repetition() {
1751	assert_eq!(t("a?"), hir_quest(`true`, hir_lit("a")));
1752	assert_eq!(t("a*"), hir_star(`true`, hir_lit("a")));
1753	assert_eq!(t("a+"), hir_plus(`true`, hir_lit("a")));
1754	assert_eq!(t("a??"), hir_quest(`false`, hir_lit("a")));
1755	assert_eq!(t("a*?"), hir_star(`false`, hir_lit("a")));
1756	assert_eq!(t("a+?"), hir_plus(`false`, hir_lit("a")));
1757
1758	assert_eq!(
1759	t("a{1}"),
1760	hir_range(`true`, hir::RepetitionRange::Exactly(`1`), hir_lit("a"),)
1761	);
1762	assert_eq!(
1763	t("a{1,}"),
1764	hir_range(`true`, hir::RepetitionRange::AtLeast(`1`), hir_lit("a"),)
1765	);
1766	assert_eq!(
1767	t("a{1,2}"),
1768	hir_range(`true`, hir::RepetitionRange::Bounded(`1`, `2`), hir_lit("a"),)
1769	);
1770	assert_eq!(
1771	t("a{1}?"),
1772	hir_range(`false`, hir::RepetitionRange::Exactly(`1`), hir_lit("a"),)
1773	);
1774	assert_eq!(
1775	t("a{1,}?"),
1776	hir_range(`false`, hir::RepetitionRange::AtLeast(`1`), hir_lit("a"),)
1777	);
1778	assert_eq!(
1779	t("a{1,2}?"),
1780	hir_range(
1781	`false`,
1782	hir::RepetitionRange::Bounded(`1`, `2`),
1783	hir_lit("a"),
1784	)
1785	);
1786
1787	assert_eq!(
1788	t("ab?"),
1789	hir_cat(vec![hir_lit("a"), hir_quest(`true`, hir_lit("b")),])
1790	);
1791	assert_eq!(
1792	t("(ab)?"),
1793	hir_quest(
1794	`true`,
1795	hir_group(`1`, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1796	)
1797	);
1798	assert_eq!(
1799	t("a\|b?"),
1800	hir_alt(vec![hir_lit("a"), hir_quest(`true`, hir_lit("b")),])
1801	);
1802	}
1803
1804	#[test]
1805	fn cat_alt() {
1806	assert_eq!(
1807	t("(ab)"),
1808	hir_group(`1`, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1809	);
1810	assert_eq!(t("a\|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1811	assert_eq!(
1812	t("a\|b\|c"),
1813	hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1814	);
1815	assert_eq!(
1816	t("ab\|bc\|cd"),
1817	hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1818	);
1819	assert_eq!(
1820	t("(a\|b)"),
1821	hir_group(`1`, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1822	);
1823	assert_eq!(
1824	t("(a\|b\|c)"),
1825	hir_group(
1826	`1`,
1827	hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1828	)
1829	);
1830	assert_eq!(
1831	t("(ab\|bc\|cd)"),
1832	hir_group(
1833	`1`,
1834	hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1835	)
1836	);
1837	assert_eq!(
1838	t("(ab\|(bc\|(cd)))"),
1839	hir_group(
1840	`1`,
1841	hir_alt(vec![
1842	hir_lit("ab"),
1843	hir_group(
1844	`2`,
1845	hir_alt(vec![
1846	hir_lit("bc"),
1847	hir_group(`3`, hir_lit("cd")),
1848	])
1849	),
1850	])
1851	)
1852	);
1853	}
1854
1855	#[test]
1856	fn class_ascii() {
1857	assert_eq!(
1858	t("[[:alnum:]]"),
1859	hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1860	);
1861	assert_eq!(
1862	t("[[:alpha:]]"),
1863	hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1864	);
1865	assert_eq!(
1866	t("[[:ascii:]]"),
1867	hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1868	);
1869	assert_eq!(
1870	t("[[:blank:]]"),
1871	hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1872	);
1873	assert_eq!(
1874	t("[[:cntrl:]]"),
1875	hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1876	);
1877	assert_eq!(
1878	t("[[:digit:]]"),
1879	hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1880	);
1881	assert_eq!(
1882	t("[[:graph:]]"),
1883	hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1884	);
1885	assert_eq!(
1886	t("[[:lower:]]"),
1887	hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1888	);
1889	assert_eq!(
1890	t("[[:print:]]"),
1891	hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1892	);
1893	assert_eq!(
1894	t("[[:punct:]]"),
1895	hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1896	);
1897	assert_eq!(
1898	t("[[:space:]]"),
1899	hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1900	);
1901	assert_eq!(
1902	t("[[:upper:]]"),
1903	hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1904	);
1905	assert_eq!(
1906	t("[[:word:]]"),
1907	hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1908	);
1909	assert_eq!(
1910	t("[[:xdigit:]]"),
1911	hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1912	);
1913
1914	assert_eq!(
1915	t("[[:^lower:]]"),
1916	hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1917	);
1918	#[cfg(feature = "unicode-case")]
1919	assert_eq!(
1920	t("(?i)[[:lower:]]"),
1921	hir_uclass(&[
1922	('A', 'Z'),
1923	('a', 'z'),
1924	('`\u{17F}`', '`\u{17F}`'),
1925	('`\u{212A}`', '`\u{212A}`'),
1926	])
1927	);
1928
1929	assert_eq!(
1930	t("(?-u)[[:lower:]]"),
1931	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1932	);
1933	assert_eq!(
1934	t("(?i-u)[[:lower:]]"),
1935	hir_case_fold(hir_bclass_from_char(ascii_class(
1936	&ast::ClassAsciiKind::Lower
1937	)))
1938	);
1939
1940	assert_eq!(
1941	t_err("(?-u)[[:^lower:]]"),
1942	TestError {
1943	kind: hir::ErrorKind::InvalidUtf8,
1944	span: Span::new(
1945	Position::new(`6`, `1`, `7`),
1946	Position::new(`16`, `1`, `17`)
1947	),
1948	}
1949	);
1950	assert_eq!(
1951	t_err("(?i-u)[[:^lower:]]"),
1952	TestError {
1953	kind: hir::ErrorKind::InvalidUtf8,
1954	span: Span::new(
1955	Position::new(`7`, `1`, `8`),
1956	Position::new(`17`, `1`, `18`)
1957	),
1958	}
1959	);
1960	}
1961
1962	#[test]
1963	fn class_ascii_multiple() {
1964	// See: https://github.com/rust-lang/regex/issues/680
1965	assert_eq!(
1966	t("[[:alnum:][:^ascii:]]"),
1967	hir_union(
1968	hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
1969	hir_uclass(&[('`\u{80}`', '`\u{10FFFF}`')]),
1970	),
1971	);
1972	assert_eq!(
1973	t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
1974	hir_union(
1975	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
1976	hir_bclass(&[(`0x80`, `0xFF`)]),
1977	),
1978	);
1979	}
1980
1981	#[test]
1982	#[cfg(feature = "unicode-perl")]
1983	fn class_perl() {
1984	// Unicode
1985	assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1986	assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1987	assert_eq!(t(r"\w"), hir_uclass_perl_word());
1988	#[cfg(feature = "unicode-case")]
1989	assert_eq!(
1990	t(r"(?i)\d"),
1991	hir_uclass_query(ClassQuery::Binary("digit"))
1992	);
1993	#[cfg(feature = "unicode-case")]
1994	assert_eq!(
1995	t(r"(?i)\s"),
1996	hir_uclass_query(ClassQuery::Binary("space"))
1997	);
1998	#[cfg(feature = "unicode-case")]
1999	assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2000
2001	// Unicode, negated
2002	assert_eq!(
2003	t(r"\D"),
2004	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2005	);
2006	assert_eq!(
2007	t(r"\S"),
2008	hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2009	);
2010	assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2011	#[cfg(feature = "unicode-case")]
2012	assert_eq!(
2013	t(r"(?i)\D"),
2014	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2015	);
2016	#[cfg(feature = "unicode-case")]
2017	assert_eq!(
2018	t(r"(?i)\S"),
2019	hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2020	);
2021	#[cfg(feature = "unicode-case")]
2022	assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2023
2024	// ASCII only
2025	assert_eq!(
2026	t(r"(?-u)\d"),
2027	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2028	);
2029	assert_eq!(
2030	t(r"(?-u)\s"),
2031	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2032	);
2033	assert_eq!(
2034	t(r"(?-u)\w"),
2035	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2036	);
2037	assert_eq!(
2038	t(r"(?i-u)\d"),
2039	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2040	);
2041	assert_eq!(
2042	t(r"(?i-u)\s"),
2043	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2044	);
2045	assert_eq!(
2046	t(r"(?i-u)\w"),
2047	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2048	);
2049
2050	// ASCII only, negated
2051	assert_eq!(
2052	t(r"(?-u)\D"),
2053	hir_negate(hir_bclass_from_char(ascii_class(
2054	&ast::ClassAsciiKind::Digit
2055	)))
2056	);
2057	assert_eq!(
2058	t(r"(?-u)\S"),
2059	hir_negate(hir_bclass_from_char(ascii_class(
2060	&ast::ClassAsciiKind::Space
2061	)))
2062	);
2063	assert_eq!(
2064	t(r"(?-u)\W"),
2065	hir_negate(hir_bclass_from_char(ascii_class(
2066	&ast::ClassAsciiKind::Word
2067	)))
2068	);
2069	assert_eq!(
2070	t(r"(?i-u)\D"),
2071	hir_negate(hir_bclass_from_char(ascii_class(
2072	&ast::ClassAsciiKind::Digit
2073	)))
2074	);
2075	assert_eq!(
2076	t(r"(?i-u)\S"),
2077	hir_negate(hir_bclass_from_char(ascii_class(
2078	&ast::ClassAsciiKind::Space
2079	)))
2080	);
2081	assert_eq!(
2082	t(r"(?i-u)\W"),
2083	hir_negate(hir_bclass_from_char(ascii_class(
2084	&ast::ClassAsciiKind::Word
2085	)))
2086	);
2087	}
2088
2089	#[test]
2090	#[cfg(not(feature = "unicode-perl"))]
2091	fn class_perl_word_disabled() {
2092	assert_eq!(
2093	t_err(r"\w"),
2094	TestError {
2095	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2096	span: Span::new(
2097	Position::new(`0`, `1`, `1`),
2098	Position::new(`2`, `1`, `3`)
2099	),
2100	}
2101	);
2102	}
2103
2104	#[test]
2105	#[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2106	fn class_perl_space_disabled() {
2107	assert_eq!(
2108	t_err(r"\s"),
2109	TestError {
2110	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2111	span: Span::new(
2112	Position::new(`0`, `1`, `1`),
2113	Position::new(`2`, `1`, `3`)
2114	),
2115	}
2116	);
2117	}
2118
2119	#[test]
2120	#[cfg(all(
2121	not(feature = "unicode-perl"),
2122	not(feature = "unicode-gencat")
2123	))]
2124	fn class_perl_digit_disabled() {
2125	assert_eq!(
2126	t_err(r"\d"),
2127	TestError {
2128	kind: hir::ErrorKind::UnicodePerlClassNotFound,
2129	span: Span::new(
2130	Position::new(`0`, `1`, `1`),
2131	Position::new(`2`, `1`, `3`)
2132	),
2133	}
2134	);
2135	}
2136
2137	#[test]
2138	#[cfg(feature = "unicode-gencat")]
2139	fn class_unicode_gencat() {
2140	assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2141	assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2142	assert_eq!(
2143	t(r"\p{Separator}"),
2144	hir_uclass_query(ClassQuery::Binary("Z"))
2145	);
2146	assert_eq!(
2147	t(r"\p{se PaRa ToR}"),
2148	hir_uclass_query(ClassQuery::Binary("Z"))
2149	);
2150	assert_eq!(
2151	t(r"\p{gc:Separator}"),
2152	hir_uclass_query(ClassQuery::Binary("Z"))
2153	);
2154	assert_eq!(
2155	t(r"\p{gc=Separator}"),
2156	hir_uclass_query(ClassQuery::Binary("Z"))
2157	);
2158	assert_eq!(
2159	t(r"\p{Other}"),
2160	hir_uclass_query(ClassQuery::Binary("Other"))
2161	);
2162	assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2163
2164	assert_eq!(
2165	t(r"\PZ"),
2166	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2167	);
2168	assert_eq!(
2169	t(r"\P{separator}"),
2170	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2171	);
2172	assert_eq!(
2173	t(r"\P{gc!=separator}"),
2174	hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2175	);
2176
2177	assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2178	assert_eq!(
2179	t(r"\p{assigned}"),
2180	hir_uclass_query(ClassQuery::Binary("Assigned"))
2181	);
2182	assert_eq!(
2183	t(r"\p{ascii}"),
2184	hir_uclass_query(ClassQuery::Binary("ASCII"))
2185	);
2186	assert_eq!(
2187	t(r"\p{gc:any}"),
2188	hir_uclass_query(ClassQuery::Binary("Any"))
2189	);
2190	assert_eq!(
2191	t(r"\p{gc:assigned}"),
2192	hir_uclass_query(ClassQuery::Binary("Assigned"))
2193	);
2194	assert_eq!(
2195	t(r"\p{gc:ascii}"),
2196	hir_uclass_query(ClassQuery::Binary("ASCII"))
2197	);
2198
2199	assert_eq!(
2200	t_err(r"(?-u)\pZ"),
2201	TestError {
2202	kind: hir::ErrorKind::UnicodeNotAllowed,
2203	span: Span::new(
2204	Position::new(`5`, `1`, `6`),
2205	Position::new(`8`, `1`, `9`)
2206	),
2207	}
2208	);
2209	assert_eq!(
2210	t_err(r"(?-u)\p{Separator}"),
2211	TestError {
2212	kind: hir::ErrorKind::UnicodeNotAllowed,
2213	span: Span::new(
2214	Position::new(`5`, `1`, `6`),
2215	Position::new(`18`, `1`, `19`)
2216	),
2217	}
2218	);
2219	assert_eq!(
2220	t_err(r"\pE"),
2221	TestError {
2222	kind: hir::ErrorKind::UnicodePropertyNotFound,
2223	span: Span::new(
2224	Position::new(`0`, `1`, `1`),
2225	Position::new(`3`, `1`, `4`)
2226	),
2227	}
2228	);
2229	assert_eq!(
2230	t_err(r"\p{Foo}"),
2231	TestError {
2232	kind: hir::ErrorKind::UnicodePropertyNotFound,
2233	span: Span::new(
2234	Position::new(`0`, `1`, `1`),
2235	Position::new(`7`, `1`, `8`)
2236	),
2237	}
2238	);
2239	assert_eq!(
2240	t_err(r"\p{gc:Foo}"),
2241	TestError {
2242	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2243	span: Span::new(
2244	Position::new(`0`, `1`, `1`),
2245	Position::new(`10`, `1`, `11`)
2246	),
2247	}
2248	);
2249	}
2250
2251	#[test]
2252	#[cfg(not(feature = "unicode-gencat"))]
2253	fn class_unicode_gencat_disabled() {
2254	assert_eq!(
2255	t_err(r"\p{Separator}"),
2256	TestError {
2257	kind: hir::ErrorKind::UnicodePropertyNotFound,
2258	span: Span::new(
2259	Position::new(`0`, `1`, `1`),
2260	Position::new(`13`, `1`, `14`)
2261	),
2262	}
2263	);
2264
2265	assert_eq!(
2266	t_err(r"\p{Any}"),
2267	TestError {
2268	kind: hir::ErrorKind::UnicodePropertyNotFound,
2269	span: Span::new(
2270	Position::new(`0`, `1`, `1`),
2271	Position::new(`7`, `1`, `8`)
2272	),
2273	}
2274	);
2275	}
2276
2277	#[test]
2278	#[cfg(feature = "unicode-script")]
2279	fn class_unicode_script() {
2280	assert_eq!(
2281	t(r"\p{Greek}"),
2282	hir_uclass_query(ClassQuery::Binary("Greek"))
2283	);
2284	#[cfg(feature = "unicode-case")]
2285	assert_eq!(
2286	t(r"(?i)\p{Greek}"),
2287	hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2288	);
2289	#[cfg(feature = "unicode-case")]
2290	assert_eq!(
2291	t(r"(?i)\P{Greek}"),
2292	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2293	"Greek"
2294	))))
2295	);
2296
2297	assert_eq!(
2298	t_err(r"\p{sc:Foo}"),
2299	TestError {
2300	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2301	span: Span::new(
2302	Position::new(`0`, `1`, `1`),
2303	Position::new(`10`, `1`, `11`)
2304	),
2305	}
2306	);
2307	assert_eq!(
2308	t_err(r"\p{scx:Foo}"),
2309	TestError {
2310	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2311	span: Span::new(
2312	Position::new(`0`, `1`, `1`),
2313	Position::new(`11`, `1`, `12`)
2314	),
2315	}
2316	);
2317	}
2318
2319	#[test]
2320	#[cfg(not(feature = "unicode-script"))]
2321	fn class_unicode_script_disabled() {
2322	assert_eq!(
2323	t_err(r"\p{Greek}"),
2324	TestError {
2325	kind: hir::ErrorKind::UnicodePropertyNotFound,
2326	span: Span::new(
2327	Position::new(`0`, `1`, `1`),
2328	Position::new(`9`, `1`, `10`)
2329	),
2330	}
2331	);
2332
2333	assert_eq!(
2334	t_err(r"\p{scx:Greek}"),
2335	TestError {
2336	kind: hir::ErrorKind::UnicodePropertyNotFound,
2337	span: Span::new(
2338	Position::new(`0`, `1`, `1`),
2339	Position::new(`13`, `1`, `14`)
2340	),
2341	}
2342	);
2343	}
2344
2345	#[test]
2346	#[cfg(feature = "unicode-age")]
2347	fn class_unicode_age() {
2348	assert_eq!(
2349	t_err(r"\p{age:Foo}"),
2350	TestError {
2351	kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2352	span: Span::new(
2353	Position::new(`0`, `1`, `1`),
2354	Position::new(`11`, `1`, `12`)
2355	),
2356	}
2357	);
2358	}
2359
2360	#[test]
2361	#[cfg(feature = "unicode-gencat")]
2362	fn class_unicode_any_empty() {
2363	assert_eq!(
2364	t_err(r"\P{any}"),
2365	TestError {
2366	kind: hir::ErrorKind::EmptyClassNotAllowed,
2367	span: Span::new(
2368	Position::new(`0`, `1`, `1`),
2369	Position::new(`7`, `1`, `8`)
2370	),
2371	}
2372	);
2373	}
2374
2375	#[test]
2376	#[cfg(not(feature = "unicode-age"))]
2377	fn class_unicode_age_disabled() {
2378	assert_eq!(
2379	t_err(r"\p{age:3.0}"),
2380	TestError {
2381	kind: hir::ErrorKind::UnicodePropertyNotFound,
2382	span: Span::new(
2383	Position::new(`0`, `1`, `1`),
2384	Position::new(`11`, `1`, `12`)
2385	),
2386	}
2387	);
2388	}
2389
2390	#[test]
2391	fn class_bracketed() {
2392	assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2393	assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2394	assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2395	assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2396	assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2397	assert_eq!(t(r"[\x00]"), hir_uclass(&[('`\0`', '`\0`')]));
2398	assert_eq!(t(r"[\n]"), hir_uclass(&[('`\n`', '`\n`')]));
2399	assert_eq!(t("[`\n`]"), hir_uclass(&[('`\n`', '`\n`')]));
2400	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2401	assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2402	#[cfg(feature = "unicode-gencat")]
2403	assert_eq!(
2404	t(r"[\pZ]"),
2405	hir_uclass_query(ClassQuery::Binary("separator"))
2406	);
2407	#[cfg(feature = "unicode-gencat")]
2408	assert_eq!(
2409	t(r"[\p{separator}]"),
2410	hir_uclass_query(ClassQuery::Binary("separator"))
2411	);
2412	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2413	assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2414	#[cfg(feature = "unicode-gencat")]
2415	assert_eq!(
2416	t(r"[^\PZ]"),
2417	hir_uclass_query(ClassQuery::Binary("separator"))
2418	);
2419	#[cfg(feature = "unicode-gencat")]
2420	assert_eq!(
2421	t(r"[^\P{separator}]"),
2422	hir_uclass_query(ClassQuery::Binary("separator"))
2423	);
2424	#[cfg(all(
2425	feature = "unicode-case",
2426	any(feature = "unicode-perl", feature = "unicode-gencat")
2427	))]
2428	assert_eq!(
2429	t(r"(?i)[^\D]"),
2430	hir_uclass_query(ClassQuery::Binary("digit"))
2431	);
2432	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2433	assert_eq!(
2434	t(r"(?i)[^\P{greek}]"),
2435	hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2436	);
2437
2438	assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2439	assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'`\0`', b'`\0`')]));
2440	assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'`\xFF`', b'`\xFF`')]));
2441
2442	#[cfg(feature = "unicode-case")]
2443	assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2444	#[cfg(feature = "unicode-case")]
2445	assert_eq!(
2446	t("(?i)[k]"),
2447	hir_uclass(&[('K', 'K'), ('k', 'k'), ('`\u{212A}`', '`\u{212A}`'),])
2448	);
2449	#[cfg(feature = "unicode-case")]
2450	assert_eq!(
2451	t("(?i)[β]"),
2452	hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2453	);
2454	assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2455
2456	assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2457	assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('`\0`', '`\0`')])));
2458	assert_eq!(
2459	t_bytes("(?-u)[^a]"),
2460	hir_negate(hir_bclass(&[(b'a', b'a')]))
2461	);
2462	#[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2463	assert_eq!(
2464	t(r"[^\d]"),
2465	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2466	);
2467	#[cfg(feature = "unicode-gencat")]
2468	assert_eq!(
2469	t(r"[^\pZ]"),
2470	hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2471	);
2472	#[cfg(feature = "unicode-gencat")]
2473	assert_eq!(
2474	t(r"[^\p{separator}]"),
2475	hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2476	);
2477	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2478	assert_eq!(
2479	t(r"(?i)[^\p{greek}]"),
2480	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2481	"greek"
2482	))))
2483	);
2484	#[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2485	assert_eq!(
2486	t(r"(?i)[\P{greek}]"),
2487	hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2488	"greek"
2489	))))
2490	);
2491
2492	// Test some weird cases.
2493	assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2494
2495	assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2496	assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2497	assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2498	assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('`\0`', '&')]));
2499	assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '`\u{FF}`')]));
2500
2501	assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2502	assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2503	assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2504	assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('`\0`', '~')]));
2505	assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '`\u{FF}`')]));
2506
2507	assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2508	assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2509	assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2510	assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('`\0`', '-')]));
2511	assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '`\u{FF}`')]));
2512
2513	assert_eq!(
2514	t_err("(?-u)[^a]"),
2515	TestError {
2516	kind: hir::ErrorKind::InvalidUtf8,
2517	span: Span::new(
2518	Position::new(`5`, `1`, `6`),
2519	Position::new(`9`, `1`, `10`)
2520	),
2521	}
2522	);
2523	#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2524	assert_eq!(
2525	t_err(r"[^\s\S]"),
2526	TestError {
2527	kind: hir::ErrorKind::EmptyClassNotAllowed,
2528	span: Span::new(
2529	Position::new(`0`, `1`, `1`),
2530	Position::new(`7`, `1`, `8`)
2531	),
2532	}
2533	);
2534	#[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2535	assert_eq!(
2536	t_err(r"(?-u)[^\s\S]"),
2537	TestError {
2538	kind: hir::ErrorKind::EmptyClassNotAllowed,
2539	span: Span::new(
2540	Position::new(`5`, `1`, `6`),
2541	Position::new(`12`, `1`, `13`)
2542	),
2543	}
2544	);
2545	}
2546
2547	#[test]
2548	fn class_bracketed_union() {
2549	assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2550	#[cfg(feature = "unicode-gencat")]
2551	assert_eq!(
2552	t(r"[a\pZb]"),
2553	hir_union(
2554	hir_uclass(&[('a', 'b')]),
2555	hir_uclass_query(ClassQuery::Binary("separator"))
2556	)
2557	);
2558	#[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2559	assert_eq!(
2560	t(r"[\pZ\p{Greek}]"),
2561	hir_union(
2562	hir_uclass_query(ClassQuery::Binary("greek")),
2563	hir_uclass_query(ClassQuery::Binary("separator"))
2564	)
2565	);
2566	#[cfg(all(
2567	feature = "unicode-age",
2568	feature = "unicode-gencat",
2569	feature = "unicode-script"
2570	))]
2571	assert_eq!(
2572	t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2573	hir_union(
2574	hir_uclass_query(ClassQuery::ByValue {
2575	property_name: "age",
2576	property_value: "3.0",
2577	}),
2578	hir_union(
2579	hir_uclass_query(ClassQuery::Binary("greek")),
2580	hir_uclass_query(ClassQuery::Binary("separator"))
2581	)
2582	)
2583	);
2584	#[cfg(all(
2585	feature = "unicode-age",
2586	feature = "unicode-gencat",
2587	feature = "unicode-script"
2588	))]
2589	assert_eq!(
2590	t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2591	hir_union(
2592	hir_uclass_query(ClassQuery::ByValue {
2593	property_name: "age",
2594	property_value: "3.0",
2595	}),
2596	hir_union(
2597	hir_uclass_query(ClassQuery::Binary("cyrillic")),
2598	hir_union(
2599	hir_uclass_query(ClassQuery::Binary("greek")),
2600	hir_uclass_query(ClassQuery::Binary("separator"))
2601	)
2602	)
2603	)
2604	);
2605
2606	#[cfg(all(
2607	feature = "unicode-age",
2608	feature = "unicode-case",
2609	feature = "unicode-gencat",
2610	feature = "unicode-script"
2611	))]
2612	assert_eq!(
2613	t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2614	hir_case_fold(hir_union(
2615	hir_uclass_query(ClassQuery::ByValue {
2616	property_name: "age",
2617	property_value: "3.0",
2618	}),
2619	hir_union(
2620	hir_uclass_query(ClassQuery::Binary("greek")),
2621	hir_uclass_query(ClassQuery::Binary("separator"))
2622	)
2623	))
2624	);
2625	#[cfg(all(
2626	feature = "unicode-age",
2627	feature = "unicode-gencat",
2628	feature = "unicode-script"
2629	))]
2630	assert_eq!(
2631	t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2632	hir_negate(hir_union(
2633	hir_uclass_query(ClassQuery::ByValue {
2634	property_name: "age",
2635	property_value: "3.0",
2636	}),
2637	hir_union(
2638	hir_uclass_query(ClassQuery::Binary("greek")),
2639	hir_uclass_query(ClassQuery::Binary("separator"))
2640	)
2641	))
2642	);
2643	#[cfg(all(
2644	feature = "unicode-age",
2645	feature = "unicode-case",
2646	feature = "unicode-gencat",
2647	feature = "unicode-script"
2648	))]
2649	assert_eq!(
2650	t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2651	hir_negate(hir_case_fold(hir_union(
2652	hir_uclass_query(ClassQuery::ByValue {
2653	property_name: "age",
2654	property_value: "3.0",
2655	}),
2656	hir_union(
2657	hir_uclass_query(ClassQuery::Binary("greek")),
2658	hir_uclass_query(ClassQuery::Binary("separator"))
2659	)
2660	)))
2661	);
2662	}
2663
2664	#[test]
2665	fn class_bracketed_nested() {
2666	assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2667	assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2668	assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2669
2670	assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2671	assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2672
2673	#[cfg(feature = "unicode-case")]
2674	assert_eq!(
2675	t(r"(?i)[a[^c]]"),
2676	hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2677	);
2678	#[cfg(feature = "unicode-case")]
2679	assert_eq!(
2680	t(r"(?i)[a-b[^c]]"),
2681	hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2682	);
2683
2684	#[cfg(feature = "unicode-case")]
2685	assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2686	#[cfg(feature = "unicode-case")]
2687	assert_eq!(
2688	t(r"(?i)[^a-b[^c]]"),
2689	hir_uclass(&[('C', 'C'), ('c', 'c')])
2690	);
2691
2692	assert_eq!(
2693	t_err(r"[^a-c[^c]]"),
2694	TestError {
2695	kind: hir::ErrorKind::EmptyClassNotAllowed,
2696	span: Span::new(
2697	Position::new(`0`, `1`, `1`),
2698	Position::new(`10`, `1`, `11`)
2699	),
2700	}
2701	);
2702	#[cfg(feature = "unicode-case")]
2703	assert_eq!(
2704	t_err(r"(?i)[^a-c[^c]]"),
2705	TestError {
2706	kind: hir::ErrorKind::EmptyClassNotAllowed,
2707	span: Span::new(
2708	Position::new(`4`, `1`, `5`),
2709	Position::new(`14`, `1`, `15`)
2710	),
2711	}
2712	);
2713	}
2714
2715	#[test]
2716	fn class_bracketed_intersect() {
2717	assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2718	assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2719	assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2720	assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2721	assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2722	assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2723	assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2724	assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2725	assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2726
2727	assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2728	assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2729	assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2730	assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2731	assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2732	assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2733
2734	#[cfg(feature = "unicode-case")]
2735	assert_eq!(
2736	t("(?i)[abc&&b-c]"),
2737	hir_case_fold(hir_uclass(&[('b', 'c')]))
2738	);
2739	#[cfg(feature = "unicode-case")]
2740	assert_eq!(
2741	t("(?i)[abc&&[b-c]]"),
2742	hir_case_fold(hir_uclass(&[('b', 'c')]))
2743	);
2744	#[cfg(feature = "unicode-case")]
2745	assert_eq!(
2746	t("(?i)[[abc]&&[b-c]]"),
2747	hir_case_fold(hir_uclass(&[('b', 'c')]))
2748	);
2749	#[cfg(feature = "unicode-case")]
2750	assert_eq!(
2751	t("(?i)[a-z&&b-y&&c-x]"),
2752	hir_case_fold(hir_uclass(&[('c', 'x')]))
2753	);
2754	#[cfg(feature = "unicode-case")]
2755	assert_eq!(
2756	t("(?i)[c-da-b&&a-d]"),
2757	hir_case_fold(hir_uclass(&[('a', 'd')]))
2758	);
2759	#[cfg(feature = "unicode-case")]
2760	assert_eq!(
2761	t("(?i)[a-d&&c-da-b]"),
2762	hir_case_fold(hir_uclass(&[('a', 'd')]))
2763	);
2764
2765	assert_eq!(
2766	t("(?i-u)[abc&&b-c]"),
2767	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2768	);
2769	assert_eq!(
2770	t("(?i-u)[abc&&[b-c]]"),
2771	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2772	);
2773	assert_eq!(
2774	t("(?i-u)[[abc]&&[b-c]]"),
2775	hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2776	);
2777	assert_eq!(
2778	t("(?i-u)[a-z&&b-y&&c-x]"),
2779	hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2780	);
2781	assert_eq!(
2782	t("(?i-u)[c-da-b&&a-d]"),
2783	hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2784	);
2785	assert_eq!(
2786	t("(?i-u)[a-d&&c-da-b]"),
2787	hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2788	);
2789
2790	// In `[a^]`, `^` does not need to be escaped, so it makes sense that
2791	// `^` is also allowed to be unescaped after `&&`.
2792	assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2793	// `]` needs to be escaped after `&&` since it's not at start of class.
2794	assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2795	assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2796	assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2797	assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2798	// Test precedence.
2799	assert_eq!(
2800	t(r"[a-w&&[^c-g]z]"),
2801	hir_uclass(&[('a', 'b'), ('h', 'w')])
2802	);
2803	}
2804
2805	#[test]
2806	fn class_bracketed_intersect_negate() {
2807	#[cfg(feature = "unicode-perl")]
2808	assert_eq!(
2809	t(r"[^\w&&\d]"),
2810	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2811	);
2812	assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2813	#[cfg(feature = "unicode-perl")]
2814	assert_eq!(
2815	t(r"[^[\w&&\d]]"),
2816	hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2817	);
2818	#[cfg(feature = "unicode-perl")]
2819	assert_eq!(
2820	t(r"[^[^\w&&\d]]"),
2821	hir_uclass_query(ClassQuery::Binary("digit"))
2822	);
2823	#[cfg(feature = "unicode-perl")]
2824	assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2825
2826	#[cfg(feature = "unicode-perl")]
2827	assert_eq!(
2828	t_bytes(r"(?-u)[^\w&&\d]"),
2829	hir_negate(hir_bclass_from_char(ascii_class(
2830	&ast::ClassAsciiKind::Digit
2831	)))
2832	);
2833	assert_eq!(
2834	t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2835	hir_negate(hir_bclass(&[(b'a', b'c')]))
2836	);
2837	assert_eq!(
2838	t_bytes(r"(?-u)[^[\w&&\d]]"),
2839	hir_negate(hir_bclass_from_char(ascii_class(
2840	&ast::ClassAsciiKind::Digit
2841	)))
2842	);
2843	assert_eq!(
2844	t_bytes(r"(?-u)[^[^\w&&\d]]"),
2845	hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2846	);
2847	assert_eq!(
2848	t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2849	hir_negate(hir_bclass_from_char(ascii_class(
2850	&ast::ClassAsciiKind::Word
2851	)))
2852	);
2853	}
2854
2855	#[test]
2856	fn class_bracketed_difference() {
2857	#[cfg(feature = "unicode-gencat")]
2858	assert_eq!(
2859	t(r"[\pL--[:ascii:]]"),
2860	hir_difference(
2861	hir_uclass_query(ClassQuery::Binary("letter")),
2862	hir_uclass(&[('`\0`', '`\x7F`')])
2863	)
2864	);
2865
2866	assert_eq!(
2867	t(r"(?-u)[[:alpha:]--[:lower:]]"),
2868	hir_bclass(&[(b'A', b'Z')])
2869	);
2870	}
2871
2872	#[test]
2873	fn class_bracketed_symmetric_difference() {
2874	#[cfg(feature = "unicode-script")]
2875	assert_eq!(
2876	t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2877	hir_uclass(&[
2878	('`\u{0342}`', '`\u{0342}`'),
2879	('`\u{0345}`', '`\u{0345}`'),
2880	('`\u{1DC0}`', '`\u{1DC1}`'),
2881	])
2882	);
2883	assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2884
2885	assert_eq!(
2886	t(r"(?-u)[a-g~~c-j]"),
2887	hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2888	);
2889	}
2890
2891	#[test]
2892	fn ignore_whitespace() {
2893	assert_eq!(t(r"(?x)\12 3"), hir_lit("`\n`3"));
2894	assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2895	assert_eq!(
2896	t(r"(?x)\x # comment
2897	{ # comment
2898	53 # comment
2899	} #comment"),
2900	hir_lit("S")
2901	);
2902
2903	assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2904	assert_eq!(
2905	t(r"(?x)\x # comment
2906	53 # comment"),
2907	hir_lit("S")
2908	);
2909	assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2910
2911	#[cfg(feature = "unicode-gencat")]
2912	assert_eq!(
2913	t(r"(?x)\p # comment
2914	{ # comment
2915	Separator # comment
2916	} # comment"),
2917	hir_uclass_query(ClassQuery::Binary("separator"))
2918	);
2919
2920	assert_eq!(
2921	t(r"(?x)a # comment
2922	{ # comment
2923	5 # comment
2924	, # comment
2925	10 # comment
2926	} # comment"),
2927	hir_range(
2928	`true`,
2929	hir::RepetitionRange::Bounded(`5`, `10`),
2930	hir_lit("a")
2931	)
2932	);
2933
2934	assert_eq!(t(r"(?x)a\ # hi there"), hir_lit("a "));
2935	}
2936
2937	#[test]
2938	fn analysis_is_always_utf8() {
2939	// Positive examples.
2940	assert!(t_bytes(r"a").is_always_utf8());
2941	assert!(t_bytes(r"ab").is_always_utf8());
2942	assert!(t_bytes(r"(?-u)a").is_always_utf8());
2943	assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2944	assert!(t_bytes(r"\xFF").is_always_utf8());
2945	assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2946	assert!(t_bytes(r"[^a]").is_always_utf8());
2947	assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2948	assert!(t_bytes(r"\b").is_always_utf8());
2949	assert!(t_bytes(r"\B").is_always_utf8());
2950	assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2951
2952	// Negative examples.
2953	assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2954	assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2955	assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2956	assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2957	assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2958	}
2959
2960	#[test]
2961	fn analysis_is_all_assertions() {
2962	// Positive examples.
2963	assert!(t(r"\b").is_all_assertions());
2964	assert!(t(r"\B").is_all_assertions());
2965	assert!(t(r"^").is_all_assertions());
2966	assert!(t(r"$").is_all_assertions());
2967	assert!(t(r"\A").is_all_assertions());
2968	assert!(t(r"\z").is_all_assertions());
2969	assert!(t(r"$^\z\A\b\B").is_all_assertions());
2970	assert!(t(r"$\|^\|\z\|\A\|\b\|\B").is_all_assertions());
2971	assert!(t(r"^$\|$^").is_all_assertions());
2972	assert!(t(r"((\b)+())*^").is_all_assertions());
2973
2974	// Negative examples.
2975	assert!(!t(r"^a").is_all_assertions());
2976	}
2977
2978	#[test]
2979	fn analysis_is_anchored() {
2980	// Positive examples.
2981	assert!(t(r"^").is_anchored_start());
2982	assert!(t(r"$").is_anchored_end());
2983	assert!(t(r"^").is_line_anchored_start());
2984	assert!(t(r"$").is_line_anchored_end());
2985
2986	assert!(t(r"^^").is_anchored_start());
2987	assert!(t(r"$$").is_anchored_end());
2988	assert!(t(r"^^").is_line_anchored_start());
2989	assert!(t(r"$$").is_line_anchored_end());
2990
2991	assert!(t(r"^$").is_anchored_start());
2992	assert!(t(r"^$").is_anchored_end());
2993	assert!(t(r"^$").is_line_anchored_start());
2994	assert!(t(r"^$").is_line_anchored_end());
2995
2996	assert!(t(r"^foo").is_anchored_start());
2997	assert!(t(r"foo$").is_anchored_end());
2998	assert!(t(r"^foo").is_line_anchored_start());
2999	assert!(t(r"foo$").is_line_anchored_end());
3000
3001	assert!(t(r"^foo\|^bar").is_anchored_start());
3002	assert!(t(r"foo$\|bar$").is_anchored_end());
3003	assert!(t(r"^foo\|^bar").is_line_anchored_start());
3004	assert!(t(r"foo$\|bar$").is_line_anchored_end());
3005
3006	assert!(t(r"^(foo\|bar)").is_anchored_start());
3007	assert!(t(r"(foo\|bar)$").is_anchored_end());
3008	assert!(t(r"^(foo\|bar)").is_line_anchored_start());
3009	assert!(t(r"(foo\|bar)$").is_line_anchored_end());
3010
3011	assert!(t(r"^+").is_anchored_start());
3012	assert!(t(r"$+").is_anchored_end());
3013	assert!(t(r"^+").is_line_anchored_start());
3014	assert!(t(r"$+").is_line_anchored_end());
3015	assert!(t(r"^++").is_anchored_start());
3016	assert!(t(r"$++").is_anchored_end());
3017	assert!(t(r"^++").is_line_anchored_start());
3018	assert!(t(r"$++").is_line_anchored_end());
3019	assert!(t(r"(^)+").is_anchored_start());
3020	assert!(t(r"($)+").is_anchored_end());
3021	assert!(t(r"(^)+").is_line_anchored_start());
3022	assert!(t(r"($)+").is_line_anchored_end());
3023
3024	assert!(t(r"$^").is_anchored_start());
3025	assert!(t(r"$^").is_anchored_start());
3026	assert!(t(r"$^").is_line_anchored_end());
3027	assert!(t(r"$^").is_line_anchored_end());
3028	assert!(t(r"$^\|^$").is_anchored_start());
3029	assert!(t(r"$^\|^$").is_anchored_end());
3030	assert!(t(r"$^\|^$").is_line_anchored_start());
3031	assert!(t(r"$^\|^$").is_line_anchored_end());
3032
3033	assert!(t(r"\b^").is_anchored_start());
3034	assert!(t(r"$\b").is_anchored_end());
3035	assert!(t(r"\b^").is_line_anchored_start());
3036	assert!(t(r"$\b").is_line_anchored_end());
3037	assert!(t(r"^(?m:^)").is_anchored_start());
3038	assert!(t(r"(?m:$)$").is_anchored_end());
3039	assert!(t(r"^(?m:^)").is_line_anchored_start());
3040	assert!(t(r"(?m:$)$").is_line_anchored_end());
3041	assert!(t(r"(?m:^)^").is_anchored_start());
3042	assert!(t(r"$(?m:$)").is_anchored_end());
3043	assert!(t(r"(?m:^)^").is_line_anchored_start());
3044	assert!(t(r"$(?m:$)").is_line_anchored_end());
3045
3046	// Negative examples.
3047	assert!(!t(r"(?m)^").is_anchored_start());
3048	assert!(!t(r"(?m)$").is_anchored_end());
3049	assert!(!t(r"(?m:^$)\|$^").is_anchored_start());
3050	assert!(!t(r"(?m:^$)\|$^").is_anchored_end());
3051	assert!(!t(r"$^\|(?m:^$)").is_anchored_start());
3052	assert!(!t(r"$^\|(?m:^$)").is_anchored_end());
3053
3054	assert!(!t(r"a^").is_anchored_start());
3055	assert!(!t(r"$a").is_anchored_start());
3056	assert!(!t(r"a^").is_line_anchored_start());
3057	assert!(!t(r"$a").is_line_anchored_start());
3058
3059	assert!(!t(r"a^").is_anchored_end());
3060	assert!(!t(r"$a").is_anchored_end());
3061	assert!(!t(r"a^").is_line_anchored_end());
3062	assert!(!t(r"$a").is_line_anchored_end());
3063
3064	assert!(!t(r"^foo\|bar").is_anchored_start());
3065	assert!(!t(r"foo\|bar$").is_anchored_end());
3066	assert!(!t(r"^foo\|bar").is_line_anchored_start());
3067	assert!(!t(r"foo\|bar$").is_line_anchored_end());
3068
3069	assert!(!t(r"^*").is_anchored_start());
3070	assert!(!t(r"$*").is_anchored_end());
3071	assert!(!t(r"^*").is_line_anchored_start());
3072	assert!(!t(r"$*").is_line_anchored_end());
3073	assert!(!t(r"^*+").is_anchored_start());
3074	assert!(!t(r"$*+").is_anchored_end());
3075	assert!(!t(r"^*+").is_line_anchored_start());
3076	assert!(!t(r"$*+").is_line_anchored_end());
3077	assert!(!t(r"^+*").is_anchored_start());
3078	assert!(!t(r"$+*").is_anchored_end());
3079	assert!(!t(r"^+*").is_line_anchored_start());
3080	assert!(!t(r"$+*").is_line_anchored_end());
3081	assert!(!t(r"(^)*").is_anchored_start());
3082	assert!(!t(r"($)*").is_anchored_end());
3083	assert!(!t(r"(^)*").is_line_anchored_start());
3084	assert!(!t(r"($)*").is_line_anchored_end());
3085	}
3086
3087	#[test]
3088	fn analysis_is_line_anchored() {
3089	assert!(t(r"(?m)^(foo\|bar)").is_line_anchored_start());
3090	assert!(t(r"(?m)(foo\|bar)$").is_line_anchored_end());
3091
3092	assert!(t(r"(?m)^foo\|^bar").is_line_anchored_start());
3093	assert!(t(r"(?m)foo$\|bar$").is_line_anchored_end());
3094
3095	assert!(t(r"(?m)^").is_line_anchored_start());
3096	assert!(t(r"(?m)$").is_line_anchored_end());
3097
3098	assert!(t(r"(?m:^$)\|$^").is_line_anchored_start());
3099	assert!(t(r"(?m:^$)\|$^").is_line_anchored_end());
3100
3101	assert!(t(r"$^\|(?m:^$)").is_line_anchored_start());
3102	assert!(t(r"$^\|(?m:^$)").is_line_anchored_end());
3103	}
3104
3105	#[test]
3106	fn analysis_is_any_anchored() {
3107	// Positive examples.
3108	assert!(t(r"^").is_any_anchored_start());
3109	assert!(t(r"$").is_any_anchored_end());
3110	assert!(t(r"\A").is_any_anchored_start());
3111	assert!(t(r"\z").is_any_anchored_end());
3112
3113	// Negative examples.
3114	assert!(!t(r"(?m)^").is_any_anchored_start());
3115	assert!(!t(r"(?m)$").is_any_anchored_end());
3116	assert!(!t(r"$").is_any_anchored_start());
3117	assert!(!t(r"^").is_any_anchored_end());
3118	}
3119
3120	#[test]
3121	fn analysis_is_match_empty() {
3122	// Positive examples.
3123	assert!(t(r"").is_match_empty());
3124	assert!(t(r"()").is_match_empty());
3125	assert!(t(r"()*").is_match_empty());
3126	assert!(t(r"()+").is_match_empty());
3127	assert!(t(r"()?").is_match_empty());
3128	assert!(t(r"a*").is_match_empty());
3129	assert!(t(r"a?").is_match_empty());
3130	assert!(t(r"a{0}").is_match_empty());
3131	assert!(t(r"a{0,}").is_match_empty());
3132	assert!(t(r"a{0,1}").is_match_empty());
3133	assert!(t(r"a{0,10}").is_match_empty());
3134	#[cfg(feature = "unicode-gencat")]
3135	assert!(t(r"\pL*").is_match_empty());
3136	assert!(t(r"a*\|b").is_match_empty());
3137	assert!(t(r"b\|a*").is_match_empty());
3138	assert!(t(r"a\|").is_match_empty());
3139	assert!(t(r"\|a").is_match_empty());
3140	assert!(t(r"a\|\|b").is_match_empty());
3141	assert!(t(r"aa?(abcd)").is_match_empty());
3142	assert!(t(r"^").is_match_empty());
3143	assert!(t(r"$").is_match_empty());
3144	assert!(t(r"(?m)^").is_match_empty());
3145	assert!(t(r"(?m)$").is_match_empty());
3146	assert!(t(r"\A").is_match_empty());
3147	assert!(t(r"\z").is_match_empty());
3148	assert!(t(r"\B").is_match_empty());
3149	assert!(t_bytes(r"(?-u)\B").is_match_empty());
3150	assert!(t(r"\b").is_match_empty());
3151	assert!(t(r"(?-u)\b").is_match_empty());
3152
3153	// Negative examples.
3154	assert!(!t(r"a+").is_match_empty());
3155	assert!(!t(r"a{1}").is_match_empty());
3156	assert!(!t(r"a{1,}").is_match_empty());
3157	assert!(!t(r"a{1,2}").is_match_empty());
3158	assert!(!t(r"a{1,10}").is_match_empty());
3159	assert!(!t(r"b\|a").is_match_empty());
3160	assert!(!t(r"aa+(abcd)").is_match_empty());
3161	}
3162
3163	#[test]
3164	fn analysis_is_literal() {
3165	// Positive examples.
3166	assert!(t(r"a").is_literal());
3167	assert!(t(r"ab").is_literal());
3168	assert!(t(r"abc").is_literal());
3169	assert!(t(r"(?m)abc").is_literal());
3170
3171	// Negative examples.
3172	assert!(!t(r"").is_literal());
3173	assert!(!t(r"^").is_literal());
3174	assert!(!t(r"a\|b").is_literal());
3175	assert!(!t(r"(a)").is_literal());
3176	assert!(!t(r"a+").is_literal());
3177	assert!(!t(r"foo(a)").is_literal());
3178	assert!(!t(r"(a)foo").is_literal());
3179	assert!(!t(r"[a]").is_literal());
3180	}
3181
3182	#[test]
3183	fn analysis_is_alternation_literal() {
3184	// Positive examples.
3185	assert!(t(r"a").is_alternation_literal());
3186	assert!(t(r"ab").is_alternation_literal());
3187	assert!(t(r"abc").is_alternation_literal());
3188	assert!(t(r"(?m)abc").is_alternation_literal());
3189	assert!(t(r"a\|b").is_alternation_literal());
3190	assert!(t(r"a\|b\|c").is_alternation_literal());
3191	assert!(t(r"foo\|bar").is_alternation_literal());
3192	assert!(t(r"foo\|bar\|baz").is_alternation_literal());
3193
3194	// Negative examples.
3195	assert!(!t(r"").is_alternation_literal());
3196	assert!(!t(r"^").is_alternation_literal());
3197	assert!(!t(r"(a)").is_alternation_literal());
3198	assert!(!t(r"a+").is_alternation_literal());
3199	assert!(!t(r"foo(a)").is_alternation_literal());
3200	assert!(!t(r"(a)foo").is_alternation_literal());
3201	assert!(!t(r"[a]").is_alternation_literal());
3202	assert!(!t(r"[a]\|b").is_alternation_literal());
3203	assert!(!t(r"a\|[b]").is_alternation_literal());
3204	assert!(!t(r"(a)\|b").is_alternation_literal());
3205	assert!(!t(r"a\|(b)").is_alternation_literal());
3206	}
3207	}
3208

Provided by KDAB

Definitions