1 | /*! |
2 | Defines a translator that converts an `Ast` to an `Hir`. |
3 | */ |
4 | |
5 | use std::cell::{Cell, RefCell}; |
6 | use std::result; |
7 | |
8 | use crate::ast::{self, Ast, Span, Visitor}; |
9 | use crate::hir::{self, Error, ErrorKind, Hir}; |
10 | use crate::unicode::{self, ClassQuery}; |
11 | |
12 | type Result<T> = result::Result<T, Error>; |
13 | |
14 | /// A builder for constructing an AST->HIR translator. |
15 | #[derive (Clone, Debug)] |
16 | pub struct TranslatorBuilder { |
17 | allow_invalid_utf8: bool, |
18 | flags: Flags, |
19 | } |
20 | |
21 | impl Default for TranslatorBuilder { |
22 | fn default() -> TranslatorBuilder { |
23 | TranslatorBuilder::new() |
24 | } |
25 | } |
26 | |
27 | impl TranslatorBuilder { |
28 | /// Create a new translator builder with a default c onfiguration. |
29 | pub fn new() -> TranslatorBuilder { |
30 | TranslatorBuilder { |
31 | allow_invalid_utf8: false, |
32 | flags: Flags::default(), |
33 | } |
34 | } |
35 | |
36 | /// Build a translator using the current configuration. |
37 | pub fn build(&self) -> Translator { |
38 | Translator { |
39 | stack: RefCell::new(vec![]), |
40 | flags: Cell::new(self.flags), |
41 | allow_invalid_utf8: self.allow_invalid_utf8, |
42 | } |
43 | } |
44 | |
45 | /// When enabled, translation will permit the construction of a regular |
46 | /// expression that may match invalid UTF-8. |
47 | /// |
48 | /// When disabled (the default), the translator is guaranteed to produce |
49 | /// an expression that will only ever match valid UTF-8 (otherwise, the |
50 | /// translator will return an error). |
51 | /// |
52 | /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII |
53 | /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause |
54 | /// the parser to return an error. Namely, a negated ASCII word boundary |
55 | /// can result in matching positions that aren't valid UTF-8 boundaries. |
56 | pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder { |
57 | self.allow_invalid_utf8 = yes; |
58 | self |
59 | } |
60 | |
61 | /// Enable or disable the case insensitive flag (`i`) by default. |
62 | pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder { |
63 | self.flags.case_insensitive = if yes { Some(true) } else { None }; |
64 | self |
65 | } |
66 | |
67 | /// Enable or disable the multi-line matching flag (`m`) by default. |
68 | pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder { |
69 | self.flags.multi_line = if yes { Some(true) } else { None }; |
70 | self |
71 | } |
72 | |
73 | /// Enable or disable the "dot matches any character" flag (`s`) by |
74 | /// default. |
75 | pub fn dot_matches_new_line( |
76 | &mut self, |
77 | yes: bool, |
78 | ) -> &mut TranslatorBuilder { |
79 | self.flags.dot_matches_new_line = if yes { Some(true) } else { None }; |
80 | self |
81 | } |
82 | |
83 | /// Enable or disable the "swap greed" flag (`U`) by default. |
84 | pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder { |
85 | self.flags.swap_greed = if yes { Some(true) } else { None }; |
86 | self |
87 | } |
88 | |
89 | /// Enable or disable the Unicode flag (`u`) by default. |
90 | pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder { |
91 | self.flags.unicode = if yes { None } else { Some(false) }; |
92 | self |
93 | } |
94 | } |
95 | |
96 | /// A translator maps abstract syntax to a high level intermediate |
97 | /// representation. |
98 | /// |
99 | /// A translator may be benefit from reuse. That is, a translator can translate |
100 | /// many abstract syntax trees. |
101 | /// |
102 | /// A `Translator` can be configured in more detail via a |
103 | /// [`TranslatorBuilder`](struct.TranslatorBuilder.html). |
104 | #[derive (Clone, Debug)] |
105 | pub struct Translator { |
106 | /// Our call stack, but on the heap. |
107 | stack: RefCell<Vec<HirFrame>>, |
108 | /// The current flag settings. |
109 | flags: Cell<Flags>, |
110 | /// Whether we're allowed to produce HIR that can match arbitrary bytes. |
111 | allow_invalid_utf8: bool, |
112 | } |
113 | |
114 | impl Translator { |
115 | /// Create a new translator using the default configuration. |
116 | pub fn new() -> Translator { |
117 | TranslatorBuilder::new().build() |
118 | } |
119 | |
120 | /// Translate the given abstract syntax tree (AST) into a high level |
121 | /// intermediate representation (HIR). |
122 | /// |
123 | /// If there was a problem doing the translation, then an HIR-specific |
124 | /// error is returned. |
125 | /// |
126 | /// The original pattern string used to produce the `Ast` *must* also be |
127 | /// provided. The translator does not use the pattern string during any |
128 | /// correct translation, but is used for error reporting. |
129 | pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> { |
130 | ast::visit(ast, visitor:TranslatorI::new(self, pattern)) |
131 | } |
132 | } |
133 | |
134 | /// An HirFrame is a single stack frame, represented explicitly, which is |
135 | /// created for each item in the Ast that we traverse. |
136 | /// |
137 | /// Note that technically, this type doesn't represent our entire stack |
138 | /// frame. In particular, the Ast visitor represents any state associated with |
139 | /// traversing the Ast itself. |
140 | #[derive (Clone, Debug)] |
141 | enum HirFrame { |
142 | /// An arbitrary HIR expression. These get pushed whenever we hit a base |
143 | /// case in the Ast. They get popped after an inductive (i.e., recursive) |
144 | /// step is complete. |
145 | Expr(Hir), |
146 | /// A Unicode character class. This frame is mutated as we descend into |
147 | /// the Ast of a character class (which is itself its own mini recursive |
148 | /// structure). |
149 | ClassUnicode(hir::ClassUnicode), |
150 | /// A byte-oriented character class. This frame is mutated as we descend |
151 | /// into the Ast of a character class (which is itself its own mini |
152 | /// recursive structure). |
153 | /// |
154 | /// Byte character classes are created when Unicode mode (`u`) is disabled. |
155 | /// If `allow_invalid_utf8` is disabled (the default), then a byte |
156 | /// character is only permitted to match ASCII text. |
157 | ClassBytes(hir::ClassBytes), |
158 | /// This is pushed on to the stack upon first seeing any kind of group, |
159 | /// indicated by parentheses (including non-capturing groups). It is popped |
160 | /// upon leaving a group. |
161 | Group { |
162 | /// The old active flags when this group was opened. |
163 | /// |
164 | /// If this group sets flags, then the new active flags are set to the |
165 | /// result of merging the old flags with the flags introduced by this |
166 | /// group. If the group doesn't set any flags, then this is simply |
167 | /// equivalent to whatever flags were set when the group was opened. |
168 | /// |
169 | /// When this group is popped, the active flags should be restored to |
170 | /// the flags set here. |
171 | /// |
172 | /// The "active" flags correspond to whatever flags are set in the |
173 | /// Translator. |
174 | old_flags: Flags, |
175 | }, |
176 | /// This is pushed whenever a concatenation is observed. After visiting |
177 | /// every sub-expression in the concatenation, the translator's stack is |
178 | /// popped until it sees a Concat frame. |
179 | Concat, |
180 | /// This is pushed whenever an alternation is observed. After visiting |
181 | /// every sub-expression in the alternation, the translator's stack is |
182 | /// popped until it sees an Alternation frame. |
183 | Alternation, |
184 | } |
185 | |
186 | impl HirFrame { |
187 | /// Assert that the current stack frame is an Hir expression and return it. |
188 | fn unwrap_expr(self) -> Hir { |
189 | match self { |
190 | HirFrame::Expr(expr) => expr, |
191 | _ => panic!("tried to unwrap expr from HirFrame, got: {:?}" , self), |
192 | } |
193 | } |
194 | |
195 | /// Assert that the current stack frame is a Unicode class expression and |
196 | /// return it. |
197 | fn unwrap_class_unicode(self) -> hir::ClassUnicode { |
198 | match self { |
199 | HirFrame::ClassUnicode(cls) => cls, |
200 | _ => panic!( |
201 | "tried to unwrap Unicode class \ |
202 | from HirFrame, got: {:?}" , |
203 | self |
204 | ), |
205 | } |
206 | } |
207 | |
208 | /// Assert that the current stack frame is a byte class expression and |
209 | /// return it. |
210 | fn unwrap_class_bytes(self) -> hir::ClassBytes { |
211 | match self { |
212 | HirFrame::ClassBytes(cls) => cls, |
213 | _ => panic!( |
214 | "tried to unwrap byte class \ |
215 | from HirFrame, got: {:?}" , |
216 | self |
217 | ), |
218 | } |
219 | } |
220 | |
221 | /// Assert that the current stack frame is a group indicator and return |
222 | /// its corresponding flags (the flags that were active at the time the |
223 | /// group was entered). |
224 | fn unwrap_group(self) -> Flags { |
225 | match self { |
226 | HirFrame::Group { old_flags } => old_flags, |
227 | _ => { |
228 | panic!("tried to unwrap group from HirFrame, got: {:?}" , self) |
229 | } |
230 | } |
231 | } |
232 | } |
233 | |
234 | impl<'t, 'p> Visitor for TranslatorI<'t, 'p> { |
235 | type Output = Hir; |
236 | type Err = Error; |
237 | |
238 | fn finish(self) -> Result<Hir> { |
239 | // ... otherwise, we should have exactly one HIR on the stack. |
240 | assert_eq!(self.trans().stack.borrow().len(), 1); |
241 | Ok(self.pop().unwrap().unwrap_expr()) |
242 | } |
243 | |
244 | fn visit_pre(&mut self, ast: &Ast) -> Result<()> { |
245 | match *ast { |
246 | Ast::Class(ast::Class::Bracketed(_)) => { |
247 | if self.flags().unicode() { |
248 | let cls = hir::ClassUnicode::empty(); |
249 | self.push(HirFrame::ClassUnicode(cls)); |
250 | } else { |
251 | let cls = hir::ClassBytes::empty(); |
252 | self.push(HirFrame::ClassBytes(cls)); |
253 | } |
254 | } |
255 | Ast::Group(ref x) => { |
256 | let old_flags = x |
257 | .flags() |
258 | .map(|ast| self.set_flags(ast)) |
259 | .unwrap_or_else(|| self.flags()); |
260 | self.push(HirFrame::Group { old_flags }); |
261 | } |
262 | Ast::Concat(ref x) if x.asts.is_empty() => {} |
263 | Ast::Concat(_) => { |
264 | self.push(HirFrame::Concat); |
265 | } |
266 | Ast::Alternation(ref x) if x.asts.is_empty() => {} |
267 | Ast::Alternation(_) => { |
268 | self.push(HirFrame::Alternation); |
269 | } |
270 | _ => {} |
271 | } |
272 | Ok(()) |
273 | } |
274 | |
275 | fn visit_post(&mut self, ast: &Ast) -> Result<()> { |
276 | match *ast { |
277 | Ast::Empty(_) => { |
278 | self.push(HirFrame::Expr(Hir::empty())); |
279 | } |
280 | Ast::Flags(ref x) => { |
281 | self.set_flags(&x.flags); |
282 | // Flags in the AST are generally considered directives and |
283 | // not actual sub-expressions. However, they can be used in |
284 | // the concrete syntax like `((?i))`, and we need some kind of |
285 | // indication of an expression there, and Empty is the correct |
286 | // choice. |
287 | // |
288 | // There can also be things like `(?i)+`, but we rule those out |
289 | // in the parser. In the future, we might allow them for |
290 | // consistency sake. |
291 | self.push(HirFrame::Expr(Hir::empty())); |
292 | } |
293 | Ast::Literal(ref x) => { |
294 | self.push(HirFrame::Expr(self.hir_literal(x)?)); |
295 | } |
296 | Ast::Dot(span) => { |
297 | self.push(HirFrame::Expr(self.hir_dot(span)?)); |
298 | } |
299 | Ast::Assertion(ref x) => { |
300 | self.push(HirFrame::Expr(self.hir_assertion(x)?)); |
301 | } |
302 | Ast::Class(ast::Class::Perl(ref x)) => { |
303 | if self.flags().unicode() { |
304 | let cls = self.hir_perl_unicode_class(x)?; |
305 | let hcls = hir::Class::Unicode(cls); |
306 | self.push(HirFrame::Expr(Hir::class(hcls))); |
307 | } else { |
308 | let cls = self.hir_perl_byte_class(x); |
309 | let hcls = hir::Class::Bytes(cls); |
310 | self.push(HirFrame::Expr(Hir::class(hcls))); |
311 | } |
312 | } |
313 | Ast::Class(ast::Class::Unicode(ref x)) => { |
314 | let cls = hir::Class::Unicode(self.hir_unicode_class(x)?); |
315 | self.push(HirFrame::Expr(Hir::class(cls))); |
316 | } |
317 | Ast::Class(ast::Class::Bracketed(ref ast)) => { |
318 | if self.flags().unicode() { |
319 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
320 | self.unicode_fold_and_negate( |
321 | &ast.span, |
322 | ast.negated, |
323 | &mut cls, |
324 | )?; |
325 | if cls.ranges().is_empty() { |
326 | return Err(self.error( |
327 | ast.span, |
328 | ErrorKind::EmptyClassNotAllowed, |
329 | )); |
330 | } |
331 | let expr = Hir::class(hir::Class::Unicode(cls)); |
332 | self.push(HirFrame::Expr(expr)); |
333 | } else { |
334 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
335 | self.bytes_fold_and_negate( |
336 | &ast.span, |
337 | ast.negated, |
338 | &mut cls, |
339 | )?; |
340 | if cls.ranges().is_empty() { |
341 | return Err(self.error( |
342 | ast.span, |
343 | ErrorKind::EmptyClassNotAllowed, |
344 | )); |
345 | } |
346 | |
347 | let expr = Hir::class(hir::Class::Bytes(cls)); |
348 | self.push(HirFrame::Expr(expr)); |
349 | } |
350 | } |
351 | Ast::Repetition(ref x) => { |
352 | let expr = self.pop().unwrap().unwrap_expr(); |
353 | self.push(HirFrame::Expr(self.hir_repetition(x, expr))); |
354 | } |
355 | Ast::Group(ref x) => { |
356 | let expr = self.pop().unwrap().unwrap_expr(); |
357 | let old_flags = self.pop().unwrap().unwrap_group(); |
358 | self.trans().flags.set(old_flags); |
359 | self.push(HirFrame::Expr(self.hir_group(x, expr))); |
360 | } |
361 | Ast::Concat(_) => { |
362 | let mut exprs = vec![]; |
363 | while let Some(HirFrame::Expr(expr)) = self.pop() { |
364 | if !expr.kind().is_empty() { |
365 | exprs.push(expr); |
366 | } |
367 | } |
368 | exprs.reverse(); |
369 | self.push(HirFrame::Expr(Hir::concat(exprs))); |
370 | } |
371 | Ast::Alternation(_) => { |
372 | let mut exprs = vec![]; |
373 | while let Some(HirFrame::Expr(expr)) = self.pop() { |
374 | exprs.push(expr); |
375 | } |
376 | exprs.reverse(); |
377 | self.push(HirFrame::Expr(Hir::alternation(exprs))); |
378 | } |
379 | } |
380 | Ok(()) |
381 | } |
382 | |
383 | fn visit_class_set_item_pre( |
384 | &mut self, |
385 | ast: &ast::ClassSetItem, |
386 | ) -> Result<()> { |
387 | match *ast { |
388 | ast::ClassSetItem::Bracketed(_) => { |
389 | if self.flags().unicode() { |
390 | let cls = hir::ClassUnicode::empty(); |
391 | self.push(HirFrame::ClassUnicode(cls)); |
392 | } else { |
393 | let cls = hir::ClassBytes::empty(); |
394 | self.push(HirFrame::ClassBytes(cls)); |
395 | } |
396 | } |
397 | // We needn't handle the Union case here since the visitor will |
398 | // do it for us. |
399 | _ => {} |
400 | } |
401 | Ok(()) |
402 | } |
403 | |
404 | fn visit_class_set_item_post( |
405 | &mut self, |
406 | ast: &ast::ClassSetItem, |
407 | ) -> Result<()> { |
408 | match *ast { |
409 | ast::ClassSetItem::Empty(_) => {} |
410 | ast::ClassSetItem::Literal(ref x) => { |
411 | if self.flags().unicode() { |
412 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
413 | cls.push(hir::ClassUnicodeRange::new(x.c, x.c)); |
414 | self.push(HirFrame::ClassUnicode(cls)); |
415 | } else { |
416 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
417 | let byte = self.class_literal_byte(x)?; |
418 | cls.push(hir::ClassBytesRange::new(byte, byte)); |
419 | self.push(HirFrame::ClassBytes(cls)); |
420 | } |
421 | } |
422 | ast::ClassSetItem::Range(ref x) => { |
423 | if self.flags().unicode() { |
424 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
425 | cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c)); |
426 | self.push(HirFrame::ClassUnicode(cls)); |
427 | } else { |
428 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
429 | let start = self.class_literal_byte(&x.start)?; |
430 | let end = self.class_literal_byte(&x.end)?; |
431 | cls.push(hir::ClassBytesRange::new(start, end)); |
432 | self.push(HirFrame::ClassBytes(cls)); |
433 | } |
434 | } |
435 | ast::ClassSetItem::Ascii(ref x) => { |
436 | if self.flags().unicode() { |
437 | let xcls = self.hir_ascii_unicode_class(x)?; |
438 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
439 | cls.union(&xcls); |
440 | self.push(HirFrame::ClassUnicode(cls)); |
441 | } else { |
442 | let xcls = self.hir_ascii_byte_class(x)?; |
443 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
444 | cls.union(&xcls); |
445 | self.push(HirFrame::ClassBytes(cls)); |
446 | } |
447 | } |
448 | ast::ClassSetItem::Unicode(ref x) => { |
449 | let xcls = self.hir_unicode_class(x)?; |
450 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
451 | cls.union(&xcls); |
452 | self.push(HirFrame::ClassUnicode(cls)); |
453 | } |
454 | ast::ClassSetItem::Perl(ref x) => { |
455 | if self.flags().unicode() { |
456 | let xcls = self.hir_perl_unicode_class(x)?; |
457 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
458 | cls.union(&xcls); |
459 | self.push(HirFrame::ClassUnicode(cls)); |
460 | } else { |
461 | let xcls = self.hir_perl_byte_class(x); |
462 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
463 | cls.union(&xcls); |
464 | self.push(HirFrame::ClassBytes(cls)); |
465 | } |
466 | } |
467 | ast::ClassSetItem::Bracketed(ref ast) => { |
468 | if self.flags().unicode() { |
469 | let mut cls1 = self.pop().unwrap().unwrap_class_unicode(); |
470 | self.unicode_fold_and_negate( |
471 | &ast.span, |
472 | ast.negated, |
473 | &mut cls1, |
474 | )?; |
475 | |
476 | let mut cls2 = self.pop().unwrap().unwrap_class_unicode(); |
477 | cls2.union(&cls1); |
478 | self.push(HirFrame::ClassUnicode(cls2)); |
479 | } else { |
480 | let mut cls1 = self.pop().unwrap().unwrap_class_bytes(); |
481 | self.bytes_fold_and_negate( |
482 | &ast.span, |
483 | ast.negated, |
484 | &mut cls1, |
485 | )?; |
486 | |
487 | let mut cls2 = self.pop().unwrap().unwrap_class_bytes(); |
488 | cls2.union(&cls1); |
489 | self.push(HirFrame::ClassBytes(cls2)); |
490 | } |
491 | } |
492 | // This is handled automatically by the visitor. |
493 | ast::ClassSetItem::Union(_) => {} |
494 | } |
495 | Ok(()) |
496 | } |
497 | |
498 | fn visit_class_set_binary_op_pre( |
499 | &mut self, |
500 | _op: &ast::ClassSetBinaryOp, |
501 | ) -> Result<()> { |
502 | if self.flags().unicode() { |
503 | let cls = hir::ClassUnicode::empty(); |
504 | self.push(HirFrame::ClassUnicode(cls)); |
505 | } else { |
506 | let cls = hir::ClassBytes::empty(); |
507 | self.push(HirFrame::ClassBytes(cls)); |
508 | } |
509 | Ok(()) |
510 | } |
511 | |
512 | fn visit_class_set_binary_op_in( |
513 | &mut self, |
514 | _op: &ast::ClassSetBinaryOp, |
515 | ) -> Result<()> { |
516 | if self.flags().unicode() { |
517 | let cls = hir::ClassUnicode::empty(); |
518 | self.push(HirFrame::ClassUnicode(cls)); |
519 | } else { |
520 | let cls = hir::ClassBytes::empty(); |
521 | self.push(HirFrame::ClassBytes(cls)); |
522 | } |
523 | Ok(()) |
524 | } |
525 | |
526 | fn visit_class_set_binary_op_post( |
527 | &mut self, |
528 | op: &ast::ClassSetBinaryOp, |
529 | ) -> Result<()> { |
530 | use crate::ast::ClassSetBinaryOpKind::*; |
531 | |
532 | if self.flags().unicode() { |
533 | let mut rhs = self.pop().unwrap().unwrap_class_unicode(); |
534 | let mut lhs = self.pop().unwrap().unwrap_class_unicode(); |
535 | let mut cls = self.pop().unwrap().unwrap_class_unicode(); |
536 | if self.flags().case_insensitive() { |
537 | rhs.try_case_fold_simple().map_err(|_| { |
538 | self.error( |
539 | op.rhs.span().clone(), |
540 | ErrorKind::UnicodeCaseUnavailable, |
541 | ) |
542 | })?; |
543 | lhs.try_case_fold_simple().map_err(|_| { |
544 | self.error( |
545 | op.lhs.span().clone(), |
546 | ErrorKind::UnicodeCaseUnavailable, |
547 | ) |
548 | })?; |
549 | } |
550 | match op.kind { |
551 | Intersection => lhs.intersect(&rhs), |
552 | Difference => lhs.difference(&rhs), |
553 | SymmetricDifference => lhs.symmetric_difference(&rhs), |
554 | } |
555 | cls.union(&lhs); |
556 | self.push(HirFrame::ClassUnicode(cls)); |
557 | } else { |
558 | let mut rhs = self.pop().unwrap().unwrap_class_bytes(); |
559 | let mut lhs = self.pop().unwrap().unwrap_class_bytes(); |
560 | let mut cls = self.pop().unwrap().unwrap_class_bytes(); |
561 | if self.flags().case_insensitive() { |
562 | rhs.case_fold_simple(); |
563 | lhs.case_fold_simple(); |
564 | } |
565 | match op.kind { |
566 | Intersection => lhs.intersect(&rhs), |
567 | Difference => lhs.difference(&rhs), |
568 | SymmetricDifference => lhs.symmetric_difference(&rhs), |
569 | } |
570 | cls.union(&lhs); |
571 | self.push(HirFrame::ClassBytes(cls)); |
572 | } |
573 | Ok(()) |
574 | } |
575 | } |
576 | |
577 | /// The internal implementation of a translator. |
578 | /// |
579 | /// This type is responsible for carrying around the original pattern string, |
580 | /// which is not tied to the internal state of a translator. |
581 | /// |
582 | /// A TranslatorI exists for the time it takes to translate a single Ast. |
583 | #[derive (Clone, Debug)] |
584 | struct TranslatorI<'t, 'p> { |
585 | trans: &'t Translator, |
586 | pattern: &'p str, |
587 | } |
588 | |
589 | impl<'t, 'p> TranslatorI<'t, 'p> { |
590 | /// Build a new internal translator. |
591 | fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> { |
592 | TranslatorI { trans, pattern } |
593 | } |
594 | |
595 | /// Return a reference to the underlying translator. |
596 | fn trans(&self) -> &Translator { |
597 | &self.trans |
598 | } |
599 | |
600 | /// Push the given frame on to the call stack. |
601 | fn push(&self, frame: HirFrame) { |
602 | self.trans().stack.borrow_mut().push(frame); |
603 | } |
604 | |
605 | /// Pop the top of the call stack. If the call stack is empty, return None. |
606 | fn pop(&self) -> Option<HirFrame> { |
607 | self.trans().stack.borrow_mut().pop() |
608 | } |
609 | |
610 | /// Create a new error with the given span and error type. |
611 | fn error(&self, span: Span, kind: ErrorKind) -> Error { |
612 | Error { kind, pattern: self.pattern.to_string(), span } |
613 | } |
614 | |
615 | /// Return a copy of the active flags. |
616 | fn flags(&self) -> Flags { |
617 | self.trans().flags.get() |
618 | } |
619 | |
620 | /// Set the flags of this translator from the flags set in the given AST. |
621 | /// Then, return the old flags. |
622 | fn set_flags(&self, ast_flags: &ast::Flags) -> Flags { |
623 | let old_flags = self.flags(); |
624 | let mut new_flags = Flags::from_ast(ast_flags); |
625 | new_flags.merge(&old_flags); |
626 | self.trans().flags.set(new_flags); |
627 | old_flags |
628 | } |
629 | |
630 | fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> { |
631 | let ch = match self.literal_to_char(lit)? { |
632 | byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)), |
633 | hir::Literal::Unicode(ch) => ch, |
634 | }; |
635 | if self.flags().case_insensitive() { |
636 | self.hir_from_char_case_insensitive(lit.span, ch) |
637 | } else { |
638 | self.hir_from_char(lit.span, ch) |
639 | } |
640 | } |
641 | |
642 | /// Convert an Ast literal to its scalar representation. |
643 | /// |
644 | /// When Unicode mode is enabled, then this always succeeds and returns a |
645 | /// `char` (Unicode scalar value). |
646 | /// |
647 | /// When Unicode mode is disabled, then a raw byte is returned. If that |
648 | /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns |
649 | /// an error. |
650 | fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> { |
651 | if self.flags().unicode() { |
652 | return Ok(hir::Literal::Unicode(lit.c)); |
653 | } |
654 | let byte = match lit.byte() { |
655 | None => return Ok(hir::Literal::Unicode(lit.c)), |
656 | Some(byte) => byte, |
657 | }; |
658 | if byte <= 0x7F { |
659 | return Ok(hir::Literal::Unicode(byte as char)); |
660 | } |
661 | if !self.trans().allow_invalid_utf8 { |
662 | return Err(self.error(lit.span, ErrorKind::InvalidUtf8)); |
663 | } |
664 | Ok(hir::Literal::Byte(byte)) |
665 | } |
666 | |
667 | fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> { |
668 | if !self.flags().unicode() && c.len_utf8() > 1 { |
669 | return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); |
670 | } |
671 | Ok(Hir::literal(hir::Literal::Unicode(c))) |
672 | } |
673 | |
674 | fn hir_from_char_case_insensitive( |
675 | &self, |
676 | span: Span, |
677 | c: char, |
678 | ) -> Result<Hir> { |
679 | if self.flags().unicode() { |
680 | // If case folding won't do anything, then don't bother trying. |
681 | let map = |
682 | unicode::contains_simple_case_mapping(c, c).map_err(|_| { |
683 | self.error(span, ErrorKind::UnicodeCaseUnavailable) |
684 | })?; |
685 | if !map { |
686 | return self.hir_from_char(span, c); |
687 | } |
688 | let mut cls = |
689 | hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new( |
690 | c, c, |
691 | )]); |
692 | cls.try_case_fold_simple().map_err(|_| { |
693 | self.error(span, ErrorKind::UnicodeCaseUnavailable) |
694 | })?; |
695 | Ok(Hir::class(hir::Class::Unicode(cls))) |
696 | } else { |
697 | if c.len_utf8() > 1 { |
698 | return Err(self.error(span, ErrorKind::UnicodeNotAllowed)); |
699 | } |
700 | // If case folding won't do anything, then don't bother trying. |
701 | match c { |
702 | 'A' ..='Z' | 'a' ..='z' => {} |
703 | _ => return self.hir_from_char(span, c), |
704 | } |
705 | let mut cls = |
706 | hir::ClassBytes::new(vec![hir::ClassBytesRange::new( |
707 | c as u8, c as u8, |
708 | )]); |
709 | cls.case_fold_simple(); |
710 | Ok(Hir::class(hir::Class::Bytes(cls))) |
711 | } |
712 | } |
713 | |
714 | fn hir_dot(&self, span: Span) -> Result<Hir> { |
715 | let unicode = self.flags().unicode(); |
716 | if !unicode && !self.trans().allow_invalid_utf8 { |
717 | return Err(self.error(span, ErrorKind::InvalidUtf8)); |
718 | } |
719 | Ok(if self.flags().dot_matches_new_line() { |
720 | Hir::any(!unicode) |
721 | } else { |
722 | Hir::dot(!unicode) |
723 | }) |
724 | } |
725 | |
726 | fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> { |
727 | let unicode = self.flags().unicode(); |
728 | let multi_line = self.flags().multi_line(); |
729 | Ok(match asst.kind { |
730 | ast::AssertionKind::StartLine => Hir::anchor(if multi_line { |
731 | hir::Anchor::StartLine |
732 | } else { |
733 | hir::Anchor::StartText |
734 | }), |
735 | ast::AssertionKind::EndLine => Hir::anchor(if multi_line { |
736 | hir::Anchor::EndLine |
737 | } else { |
738 | hir::Anchor::EndText |
739 | }), |
740 | ast::AssertionKind::StartText => { |
741 | Hir::anchor(hir::Anchor::StartText) |
742 | } |
743 | ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText), |
744 | ast::AssertionKind::WordBoundary => { |
745 | Hir::word_boundary(if unicode { |
746 | hir::WordBoundary::Unicode |
747 | } else { |
748 | hir::WordBoundary::Ascii |
749 | }) |
750 | } |
751 | ast::AssertionKind::NotWordBoundary => { |
752 | Hir::word_boundary(if unicode { |
753 | hir::WordBoundary::UnicodeNegate |
754 | } else { |
755 | // It is possible for negated ASCII word boundaries to |
756 | // match at invalid UTF-8 boundaries, even when searching |
757 | // valid UTF-8. |
758 | if !self.trans().allow_invalid_utf8 { |
759 | return Err( |
760 | self.error(asst.span, ErrorKind::InvalidUtf8) |
761 | ); |
762 | } |
763 | hir::WordBoundary::AsciiNegate |
764 | }) |
765 | } |
766 | }) |
767 | } |
768 | |
769 | fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir { |
770 | let kind = match group.kind { |
771 | ast::GroupKind::CaptureIndex(idx) => { |
772 | hir::GroupKind::CaptureIndex(idx) |
773 | } |
774 | ast::GroupKind::CaptureName(ref capname) => { |
775 | hir::GroupKind::CaptureName { |
776 | name: capname.name.clone(), |
777 | index: capname.index, |
778 | } |
779 | } |
780 | ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing, |
781 | }; |
782 | Hir::group(hir::Group { kind, hir: Box::new(expr) }) |
783 | } |
784 | |
785 | fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir { |
786 | let kind = match rep.op.kind { |
787 | ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne, |
788 | ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore, |
789 | ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore, |
790 | ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => { |
791 | hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m)) |
792 | } |
793 | ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => { |
794 | hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m)) |
795 | } |
796 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded( |
797 | m, |
798 | n, |
799 | )) => { |
800 | hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n)) |
801 | } |
802 | }; |
803 | let greedy = |
804 | if self.flags().swap_greed() { !rep.greedy } else { rep.greedy }; |
805 | Hir::repetition(hir::Repetition { kind, greedy, hir: Box::new(expr) }) |
806 | } |
807 | |
808 | fn hir_unicode_class( |
809 | &self, |
810 | ast_class: &ast::ClassUnicode, |
811 | ) -> Result<hir::ClassUnicode> { |
812 | use crate::ast::ClassUnicodeKind::*; |
813 | |
814 | if !self.flags().unicode() { |
815 | return Err( |
816 | self.error(ast_class.span, ErrorKind::UnicodeNotAllowed) |
817 | ); |
818 | } |
819 | let query = match ast_class.kind { |
820 | OneLetter(name) => ClassQuery::OneLetter(name), |
821 | Named(ref name) => ClassQuery::Binary(name), |
822 | NamedValue { ref name, ref value, .. } => ClassQuery::ByValue { |
823 | property_name: name, |
824 | property_value: value, |
825 | }, |
826 | }; |
827 | let mut result = self.convert_unicode_class_error( |
828 | &ast_class.span, |
829 | unicode::class(query), |
830 | ); |
831 | if let Ok(ref mut class) = result { |
832 | self.unicode_fold_and_negate( |
833 | &ast_class.span, |
834 | ast_class.negated, |
835 | class, |
836 | )?; |
837 | if class.ranges().is_empty() { |
838 | let err = self |
839 | .error(ast_class.span, ErrorKind::EmptyClassNotAllowed); |
840 | return Err(err); |
841 | } |
842 | } |
843 | result |
844 | } |
845 | |
846 | fn hir_ascii_unicode_class( |
847 | &self, |
848 | ast: &ast::ClassAscii, |
849 | ) -> Result<hir::ClassUnicode> { |
850 | let mut cls = hir::ClassUnicode::new( |
851 | ascii_class(&ast.kind) |
852 | .iter() |
853 | .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)), |
854 | ); |
855 | self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?; |
856 | Ok(cls) |
857 | } |
858 | |
859 | fn hir_ascii_byte_class( |
860 | &self, |
861 | ast: &ast::ClassAscii, |
862 | ) -> Result<hir::ClassBytes> { |
863 | let mut cls = hir::ClassBytes::new( |
864 | ascii_class(&ast.kind) |
865 | .iter() |
866 | .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)), |
867 | ); |
868 | self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?; |
869 | Ok(cls) |
870 | } |
871 | |
872 | fn hir_perl_unicode_class( |
873 | &self, |
874 | ast_class: &ast::ClassPerl, |
875 | ) -> Result<hir::ClassUnicode> { |
876 | use crate::ast::ClassPerlKind::*; |
877 | |
878 | assert!(self.flags().unicode()); |
879 | let result = match ast_class.kind { |
880 | Digit => unicode::perl_digit(), |
881 | Space => unicode::perl_space(), |
882 | Word => unicode::perl_word(), |
883 | }; |
884 | let mut class = |
885 | self.convert_unicode_class_error(&ast_class.span, result)?; |
886 | // We needn't apply case folding here because the Perl Unicode classes |
887 | // are already closed under Unicode simple case folding. |
888 | if ast_class.negated { |
889 | class.negate(); |
890 | } |
891 | Ok(class) |
892 | } |
893 | |
894 | fn hir_perl_byte_class( |
895 | &self, |
896 | ast_class: &ast::ClassPerl, |
897 | ) -> hir::ClassBytes { |
898 | use crate::ast::ClassPerlKind::*; |
899 | |
900 | assert!(!self.flags().unicode()); |
901 | let mut class = match ast_class.kind { |
902 | Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit), |
903 | Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space), |
904 | Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word), |
905 | }; |
906 | // We needn't apply case folding here because the Perl ASCII classes |
907 | // are already closed (under ASCII case folding). |
908 | if ast_class.negated { |
909 | class.negate(); |
910 | } |
911 | class |
912 | } |
913 | |
914 | /// Converts the given Unicode specific error to an HIR translation error. |
915 | /// |
916 | /// The span given should approximate the position at which an error would |
917 | /// occur. |
918 | fn convert_unicode_class_error( |
919 | &self, |
920 | span: &Span, |
921 | result: unicode::Result<hir::ClassUnicode>, |
922 | ) -> Result<hir::ClassUnicode> { |
923 | result.map_err(|err| { |
924 | let sp = span.clone(); |
925 | match err { |
926 | unicode::Error::PropertyNotFound => { |
927 | self.error(sp, ErrorKind::UnicodePropertyNotFound) |
928 | } |
929 | unicode::Error::PropertyValueNotFound => { |
930 | self.error(sp, ErrorKind::UnicodePropertyValueNotFound) |
931 | } |
932 | unicode::Error::PerlClassNotFound => { |
933 | self.error(sp, ErrorKind::UnicodePerlClassNotFound) |
934 | } |
935 | } |
936 | }) |
937 | } |
938 | |
939 | fn unicode_fold_and_negate( |
940 | &self, |
941 | span: &Span, |
942 | negated: bool, |
943 | class: &mut hir::ClassUnicode, |
944 | ) -> Result<()> { |
945 | // Note that we must apply case folding before negation! |
946 | // Consider `(?i)[^x]`. If we applied negation field, then |
947 | // the result would be the character class that matched any |
948 | // Unicode scalar value. |
949 | if self.flags().case_insensitive() { |
950 | class.try_case_fold_simple().map_err(|_| { |
951 | self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable) |
952 | })?; |
953 | } |
954 | if negated { |
955 | class.negate(); |
956 | } |
957 | Ok(()) |
958 | } |
959 | |
960 | fn bytes_fold_and_negate( |
961 | &self, |
962 | span: &Span, |
963 | negated: bool, |
964 | class: &mut hir::ClassBytes, |
965 | ) -> Result<()> { |
966 | // Note that we must apply case folding before negation! |
967 | // Consider `(?i)[^x]`. If we applied negation first, then |
968 | // the result would be the character class that matched any |
969 | // Unicode scalar value. |
970 | if self.flags().case_insensitive() { |
971 | class.case_fold_simple(); |
972 | } |
973 | if negated { |
974 | class.negate(); |
975 | } |
976 | if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() { |
977 | return Err(self.error(span.clone(), ErrorKind::InvalidUtf8)); |
978 | } |
979 | Ok(()) |
980 | } |
981 | |
982 | /// Return a scalar byte value suitable for use as a literal in a byte |
983 | /// character class. |
984 | fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> { |
985 | match self.literal_to_char(ast)? { |
986 | hir::Literal::Byte(byte) => Ok(byte), |
987 | hir::Literal::Unicode(ch) => { |
988 | if ch <= 0x7F as char { |
989 | Ok(ch as u8) |
990 | } else { |
991 | // We can't feasibly support Unicode in |
992 | // byte oriented classes. Byte classes don't |
993 | // do Unicode case folding. |
994 | Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed)) |
995 | } |
996 | } |
997 | } |
998 | } |
999 | } |
1000 | |
1001 | /// A translator's representation of a regular expression's flags at any given |
1002 | /// moment in time. |
1003 | /// |
1004 | /// Each flag can be in one of three states: absent, present but disabled or |
1005 | /// present but enabled. |
1006 | #[derive (Clone, Copy, Debug, Default)] |
1007 | struct Flags { |
1008 | case_insensitive: Option<bool>, |
1009 | multi_line: Option<bool>, |
1010 | dot_matches_new_line: Option<bool>, |
1011 | swap_greed: Option<bool>, |
1012 | unicode: Option<bool>, |
1013 | // Note that `ignore_whitespace` is omitted here because it is handled |
1014 | // entirely in the parser. |
1015 | } |
1016 | |
1017 | impl Flags { |
1018 | fn from_ast(ast: &ast::Flags) -> Flags { |
1019 | let mut flags = Flags::default(); |
1020 | let mut enable = true; |
1021 | for item in &ast.items { |
1022 | match item.kind { |
1023 | ast::FlagsItemKind::Negation => { |
1024 | enable = false; |
1025 | } |
1026 | ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => { |
1027 | flags.case_insensitive = Some(enable); |
1028 | } |
1029 | ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => { |
1030 | flags.multi_line = Some(enable); |
1031 | } |
1032 | ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => { |
1033 | flags.dot_matches_new_line = Some(enable); |
1034 | } |
1035 | ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => { |
1036 | flags.swap_greed = Some(enable); |
1037 | } |
1038 | ast::FlagsItemKind::Flag(ast::Flag::Unicode) => { |
1039 | flags.unicode = Some(enable); |
1040 | } |
1041 | ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {} |
1042 | } |
1043 | } |
1044 | flags |
1045 | } |
1046 | |
1047 | fn merge(&mut self, previous: &Flags) { |
1048 | if self.case_insensitive.is_none() { |
1049 | self.case_insensitive = previous.case_insensitive; |
1050 | } |
1051 | if self.multi_line.is_none() { |
1052 | self.multi_line = previous.multi_line; |
1053 | } |
1054 | if self.dot_matches_new_line.is_none() { |
1055 | self.dot_matches_new_line = previous.dot_matches_new_line; |
1056 | } |
1057 | if self.swap_greed.is_none() { |
1058 | self.swap_greed = previous.swap_greed; |
1059 | } |
1060 | if self.unicode.is_none() { |
1061 | self.unicode = previous.unicode; |
1062 | } |
1063 | } |
1064 | |
1065 | fn case_insensitive(&self) -> bool { |
1066 | self.case_insensitive.unwrap_or(false) |
1067 | } |
1068 | |
1069 | fn multi_line(&self) -> bool { |
1070 | self.multi_line.unwrap_or(false) |
1071 | } |
1072 | |
1073 | fn dot_matches_new_line(&self) -> bool { |
1074 | self.dot_matches_new_line.unwrap_or(false) |
1075 | } |
1076 | |
1077 | fn swap_greed(&self) -> bool { |
1078 | self.swap_greed.unwrap_or(false) |
1079 | } |
1080 | |
1081 | fn unicode(&self) -> bool { |
1082 | self.unicode.unwrap_or(true) |
1083 | } |
1084 | } |
1085 | |
1086 | fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes { |
1087 | let ranges: Vec<_> = ascii_classimpl Iterator (kind) |
1088 | .iter() |
1089 | .cloned() |
1090 | .map(|(s: char, e: char)| hir::ClassBytesRange::new(start:s as u8, end:e as u8)) |
1091 | .collect(); |
1092 | hir::ClassBytes::new(ranges) |
1093 | } |
1094 | |
1095 | fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] { |
1096 | use crate::ast::ClassAsciiKind::*; |
1097 | match *kind { |
1098 | Alnum => &[('0' , '9' ), ('A' , 'Z' ), ('a' , 'z' )], |
1099 | Alpha => &[('A' , 'Z' ), ('a' , 'z' )], |
1100 | Ascii => &[(' \x00' , ' \x7F' )], |
1101 | Blank => &[(' \t' , ' \t' ), (' ' , ' ' )], |
1102 | Cntrl => &[(' \x00' , ' \x1F' ), (' \x7F' , ' \x7F' )], |
1103 | Digit => &[('0' , '9' )], |
1104 | Graph => &[('!' , '~' )], |
1105 | Lower => &[('a' , 'z' )], |
1106 | Print => &[(' ' , '~' )], |
1107 | Punct => &[('!' , '/' ), (':' , '@' ), ('[' , '`' ), ('{' , '~' )], |
1108 | Space => &[ |
1109 | (' \t' , ' \t' ), |
1110 | (' \n' , ' \n' ), |
1111 | (' \x0B' , ' \x0B' ), |
1112 | (' \x0C' , ' \x0C' ), |
1113 | (' \r' , ' \r' ), |
1114 | (' ' , ' ' ), |
1115 | ], |
1116 | Upper => &[('A' , 'Z' )], |
1117 | Word => &[('0' , '9' ), ('A' , 'Z' ), ('_' , '_' ), ('a' , 'z' )], |
1118 | Xdigit => &[('0' , '9' ), ('A' , 'F' ), ('a' , 'f' )], |
1119 | } |
1120 | } |
1121 | |
1122 | #[cfg (test)] |
1123 | mod tests { |
1124 | use crate::ast::parse::ParserBuilder; |
1125 | use crate::ast::{self, Ast, Position, Span}; |
1126 | use crate::hir::{self, Hir, HirKind}; |
1127 | use crate::unicode::{self, ClassQuery}; |
1128 | |
1129 | use super::{ascii_class, TranslatorBuilder}; |
1130 | |
1131 | // We create these errors to compare with real hir::Errors in the tests. |
1132 | // We define equality between TestError and hir::Error to disregard the |
1133 | // pattern string in hir::Error, which is annoying to provide in tests. |
1134 | #[derive (Clone, Debug)] |
1135 | struct TestError { |
1136 | span: Span, |
1137 | kind: hir::ErrorKind, |
1138 | } |
1139 | |
1140 | impl PartialEq<hir::Error> for TestError { |
1141 | fn eq(&self, other: &hir::Error) -> bool { |
1142 | self.span == other.span && self.kind == other.kind |
1143 | } |
1144 | } |
1145 | |
1146 | impl PartialEq<TestError> for hir::Error { |
1147 | fn eq(&self, other: &TestError) -> bool { |
1148 | self.span == other.span && self.kind == other.kind |
1149 | } |
1150 | } |
1151 | |
1152 | fn parse(pattern: &str) -> Ast { |
1153 | ParserBuilder::new().octal(true).build().parse(pattern).unwrap() |
1154 | } |
1155 | |
1156 | fn t(pattern: &str) -> Hir { |
1157 | TranslatorBuilder::new() |
1158 | .allow_invalid_utf8(false) |
1159 | .build() |
1160 | .translate(pattern, &parse(pattern)) |
1161 | .unwrap() |
1162 | } |
1163 | |
1164 | fn t_err(pattern: &str) -> hir::Error { |
1165 | TranslatorBuilder::new() |
1166 | .allow_invalid_utf8(false) |
1167 | .build() |
1168 | .translate(pattern, &parse(pattern)) |
1169 | .unwrap_err() |
1170 | } |
1171 | |
1172 | fn t_bytes(pattern: &str) -> Hir { |
1173 | TranslatorBuilder::new() |
1174 | .allow_invalid_utf8(true) |
1175 | .build() |
1176 | .translate(pattern, &parse(pattern)) |
1177 | .unwrap() |
1178 | } |
1179 | |
1180 | fn hir_lit(s: &str) -> Hir { |
1181 | match s.len() { |
1182 | 0 => Hir::empty(), |
1183 | _ => { |
1184 | let lits = s |
1185 | .chars() |
1186 | .map(hir::Literal::Unicode) |
1187 | .map(Hir::literal) |
1188 | .collect(); |
1189 | Hir::concat(lits) |
1190 | } |
1191 | } |
1192 | } |
1193 | |
1194 | fn hir_blit(s: &[u8]) -> Hir { |
1195 | match s.len() { |
1196 | 0 => Hir::empty(), |
1197 | 1 => Hir::literal(hir::Literal::Byte(s[0])), |
1198 | _ => { |
1199 | let lits = s |
1200 | .iter() |
1201 | .cloned() |
1202 | .map(hir::Literal::Byte) |
1203 | .map(Hir::literal) |
1204 | .collect(); |
1205 | Hir::concat(lits) |
1206 | } |
1207 | } |
1208 | } |
1209 | |
1210 | fn hir_group(i: u32, expr: Hir) -> Hir { |
1211 | Hir::group(hir::Group { |
1212 | kind: hir::GroupKind::CaptureIndex(i), |
1213 | hir: Box::new(expr), |
1214 | }) |
1215 | } |
1216 | |
1217 | fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir { |
1218 | Hir::group(hir::Group { |
1219 | kind: hir::GroupKind::CaptureName { |
1220 | name: name.to_string(), |
1221 | index: i, |
1222 | }, |
1223 | hir: Box::new(expr), |
1224 | }) |
1225 | } |
1226 | |
1227 | fn hir_group_nocap(expr: Hir) -> Hir { |
1228 | Hir::group(hir::Group { |
1229 | kind: hir::GroupKind::NonCapturing, |
1230 | hir: Box::new(expr), |
1231 | }) |
1232 | } |
1233 | |
1234 | fn hir_quest(greedy: bool, expr: Hir) -> Hir { |
1235 | Hir::repetition(hir::Repetition { |
1236 | kind: hir::RepetitionKind::ZeroOrOne, |
1237 | greedy, |
1238 | hir: Box::new(expr), |
1239 | }) |
1240 | } |
1241 | |
1242 | fn hir_star(greedy: bool, expr: Hir) -> Hir { |
1243 | Hir::repetition(hir::Repetition { |
1244 | kind: hir::RepetitionKind::ZeroOrMore, |
1245 | greedy, |
1246 | hir: Box::new(expr), |
1247 | }) |
1248 | } |
1249 | |
1250 | fn hir_plus(greedy: bool, expr: Hir) -> Hir { |
1251 | Hir::repetition(hir::Repetition { |
1252 | kind: hir::RepetitionKind::OneOrMore, |
1253 | greedy, |
1254 | hir: Box::new(expr), |
1255 | }) |
1256 | } |
1257 | |
1258 | fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir { |
1259 | Hir::repetition(hir::Repetition { |
1260 | kind: hir::RepetitionKind::Range(range), |
1261 | greedy, |
1262 | hir: Box::new(expr), |
1263 | }) |
1264 | } |
1265 | |
1266 | fn hir_alt(alts: Vec<Hir>) -> Hir { |
1267 | Hir::alternation(alts) |
1268 | } |
1269 | |
1270 | fn hir_cat(exprs: Vec<Hir>) -> Hir { |
1271 | Hir::concat(exprs) |
1272 | } |
1273 | |
1274 | #[allow (dead_code)] |
1275 | fn hir_uclass_query(query: ClassQuery<'_>) -> Hir { |
1276 | Hir::class(hir::Class::Unicode(unicode::class(query).unwrap())) |
1277 | } |
1278 | |
1279 | #[allow (dead_code)] |
1280 | fn hir_uclass_perl_word() -> Hir { |
1281 | Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap())) |
1282 | } |
1283 | |
1284 | fn hir_uclass(ranges: &[(char, char)]) -> Hir { |
1285 | let ranges: Vec<hir::ClassUnicodeRange> = ranges |
1286 | .iter() |
1287 | .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)) |
1288 | .collect(); |
1289 | Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges))) |
1290 | } |
1291 | |
1292 | fn hir_bclass(ranges: &[(u8, u8)]) -> Hir { |
1293 | let ranges: Vec<hir::ClassBytesRange> = ranges |
1294 | .iter() |
1295 | .map(|&(s, e)| hir::ClassBytesRange::new(s, e)) |
1296 | .collect(); |
1297 | Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) |
1298 | } |
1299 | |
1300 | fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir { |
1301 | let ranges: Vec<hir::ClassBytesRange> = ranges |
1302 | .iter() |
1303 | .map(|&(s, e)| { |
1304 | assert!(s as u32 <= 0x7F); |
1305 | assert!(e as u32 <= 0x7F); |
1306 | hir::ClassBytesRange::new(s as u8, e as u8) |
1307 | }) |
1308 | .collect(); |
1309 | Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges))) |
1310 | } |
1311 | |
1312 | fn hir_case_fold(expr: Hir) -> Hir { |
1313 | match expr.into_kind() { |
1314 | HirKind::Class(mut cls) => { |
1315 | cls.case_fold_simple(); |
1316 | Hir::class(cls) |
1317 | } |
1318 | _ => panic!("cannot case fold non-class Hir expr" ), |
1319 | } |
1320 | } |
1321 | |
1322 | fn hir_negate(expr: Hir) -> Hir { |
1323 | match expr.into_kind() { |
1324 | HirKind::Class(mut cls) => { |
1325 | cls.negate(); |
1326 | Hir::class(cls) |
1327 | } |
1328 | _ => panic!("cannot negate non-class Hir expr" ), |
1329 | } |
1330 | } |
1331 | |
1332 | #[allow (dead_code)] |
1333 | fn hir_union(expr1: Hir, expr2: Hir) -> Hir { |
1334 | use crate::hir::Class::{Bytes, Unicode}; |
1335 | |
1336 | match (expr1.into_kind(), expr2.into_kind()) { |
1337 | (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { |
1338 | c1.union(&c2); |
1339 | Hir::class(hir::Class::Unicode(c1)) |
1340 | } |
1341 | (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { |
1342 | c1.union(&c2); |
1343 | Hir::class(hir::Class::Bytes(c1)) |
1344 | } |
1345 | _ => panic!("cannot union non-class Hir exprs" ), |
1346 | } |
1347 | } |
1348 | |
1349 | #[allow (dead_code)] |
1350 | fn hir_difference(expr1: Hir, expr2: Hir) -> Hir { |
1351 | use crate::hir::Class::{Bytes, Unicode}; |
1352 | |
1353 | match (expr1.into_kind(), expr2.into_kind()) { |
1354 | (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => { |
1355 | c1.difference(&c2); |
1356 | Hir::class(hir::Class::Unicode(c1)) |
1357 | } |
1358 | (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => { |
1359 | c1.difference(&c2); |
1360 | Hir::class(hir::Class::Bytes(c1)) |
1361 | } |
1362 | _ => panic!("cannot difference non-class Hir exprs" ), |
1363 | } |
1364 | } |
1365 | |
1366 | fn hir_anchor(anchor: hir::Anchor) -> Hir { |
1367 | Hir::anchor(anchor) |
1368 | } |
1369 | |
1370 | fn hir_word(wb: hir::WordBoundary) -> Hir { |
1371 | Hir::word_boundary(wb) |
1372 | } |
1373 | |
1374 | #[test ] |
1375 | fn empty() { |
1376 | assert_eq!(t("" ), Hir::empty()); |
1377 | assert_eq!(t("(?i)" ), Hir::empty()); |
1378 | assert_eq!(t("()" ), hir_group(1, Hir::empty())); |
1379 | assert_eq!(t("(?:)" ), hir_group_nocap(Hir::empty())); |
1380 | assert_eq!(t("(?P<wat>)" ), hir_group_name(1, "wat" , Hir::empty())); |
1381 | assert_eq!(t("|" ), hir_alt(vec![Hir::empty(), Hir::empty()])); |
1382 | assert_eq!( |
1383 | t("()|()" ), |
1384 | hir_alt(vec![ |
1385 | hir_group(1, Hir::empty()), |
1386 | hir_group(2, Hir::empty()), |
1387 | ]) |
1388 | ); |
1389 | assert_eq!( |
1390 | t("(|b)" ), |
1391 | hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b" ),])) |
1392 | ); |
1393 | assert_eq!( |
1394 | t("(a|)" ), |
1395 | hir_group(1, hir_alt(vec![hir_lit("a" ), Hir::empty(),])) |
1396 | ); |
1397 | assert_eq!( |
1398 | t("(a||c)" ), |
1399 | hir_group( |
1400 | 1, |
1401 | hir_alt(vec![hir_lit("a" ), Hir::empty(), hir_lit("c" ),]) |
1402 | ) |
1403 | ); |
1404 | assert_eq!( |
1405 | t("(||)" ), |
1406 | hir_group( |
1407 | 1, |
1408 | hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),]) |
1409 | ) |
1410 | ); |
1411 | } |
1412 | |
1413 | #[test ] |
1414 | fn literal() { |
1415 | assert_eq!(t("a" ), hir_lit("a" )); |
1416 | assert_eq!(t("(?-u)a" ), hir_lit("a" )); |
1417 | assert_eq!(t("☃" ), hir_lit("☃" )); |
1418 | assert_eq!(t("abcd" ), hir_lit("abcd" )); |
1419 | |
1420 | assert_eq!(t_bytes("(?-u)a" ), hir_lit("a" )); |
1421 | assert_eq!(t_bytes("(?-u) \x61" ), hir_lit("a" )); |
1422 | assert_eq!(t_bytes(r"(?-u)\x61" ), hir_lit("a" )); |
1423 | assert_eq!(t_bytes(r"(?-u)\xFF" ), hir_blit(b" \xFF" )); |
1424 | |
1425 | assert_eq!( |
1426 | t_err("(?-u)☃" ), |
1427 | TestError { |
1428 | kind: hir::ErrorKind::UnicodeNotAllowed, |
1429 | span: Span::new( |
1430 | Position::new(5, 1, 6), |
1431 | Position::new(8, 1, 7) |
1432 | ), |
1433 | } |
1434 | ); |
1435 | assert_eq!( |
1436 | t_err(r"(?-u)\xFF" ), |
1437 | TestError { |
1438 | kind: hir::ErrorKind::InvalidUtf8, |
1439 | span: Span::new( |
1440 | Position::new(5, 1, 6), |
1441 | Position::new(9, 1, 10) |
1442 | ), |
1443 | } |
1444 | ); |
1445 | } |
1446 | |
1447 | #[test ] |
1448 | fn literal_case_insensitive() { |
1449 | #[cfg (feature = "unicode-case" )] |
1450 | assert_eq!(t("(?i)a" ), hir_uclass(&[('A' , 'A' ), ('a' , 'a' ),])); |
1451 | #[cfg (feature = "unicode-case" )] |
1452 | assert_eq!( |
1453 | t("(?i:a)" ), |
1454 | hir_group_nocap(hir_uclass(&[('A' , 'A' ), ('a' , 'a' )],)) |
1455 | ); |
1456 | #[cfg (feature = "unicode-case" )] |
1457 | assert_eq!( |
1458 | t("a(?i)a(?-i)a" ), |
1459 | hir_cat(vec![ |
1460 | hir_lit("a" ), |
1461 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1462 | hir_lit("a" ), |
1463 | ]) |
1464 | ); |
1465 | #[cfg (feature = "unicode-case" )] |
1466 | assert_eq!( |
1467 | t("(?i)ab@c" ), |
1468 | hir_cat(vec![ |
1469 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1470 | hir_uclass(&[('B' , 'B' ), ('b' , 'b' )]), |
1471 | hir_lit("@" ), |
1472 | hir_uclass(&[('C' , 'C' ), ('c' , 'c' )]), |
1473 | ]) |
1474 | ); |
1475 | #[cfg (feature = "unicode-case" )] |
1476 | assert_eq!( |
1477 | t("(?i)β" ), |
1478 | hir_uclass(&[('Β' , 'Β' ), ('β' , 'β' ), ('ϐ' , 'ϐ' ),]) |
1479 | ); |
1480 | |
1481 | assert_eq!(t("(?i-u)a" ), hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' ),])); |
1482 | #[cfg (feature = "unicode-case" )] |
1483 | assert_eq!( |
1484 | t("(?-u)a(?i)a(?-i)a" ), |
1485 | hir_cat(vec![ |
1486 | hir_lit("a" ), |
1487 | hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' )]), |
1488 | hir_lit("a" ), |
1489 | ]) |
1490 | ); |
1491 | assert_eq!( |
1492 | t("(?i-u)ab@c" ), |
1493 | hir_cat(vec![ |
1494 | hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' )]), |
1495 | hir_bclass(&[(b'B' , b'B' ), (b'b' , b'b' )]), |
1496 | hir_lit("@" ), |
1497 | hir_bclass(&[(b'C' , b'C' ), (b'c' , b'c' )]), |
1498 | ]) |
1499 | ); |
1500 | |
1501 | assert_eq!( |
1502 | t_bytes("(?i-u)a" ), |
1503 | hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' ),]) |
1504 | ); |
1505 | assert_eq!( |
1506 | t_bytes("(?i-u) \x61" ), |
1507 | hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' ),]) |
1508 | ); |
1509 | assert_eq!( |
1510 | t_bytes(r"(?i-u)\x61" ), |
1511 | hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' ),]) |
1512 | ); |
1513 | assert_eq!(t_bytes(r"(?i-u)\xFF" ), hir_blit(b" \xFF" )); |
1514 | |
1515 | assert_eq!( |
1516 | t_err("(?i-u)β" ), |
1517 | TestError { |
1518 | kind: hir::ErrorKind::UnicodeNotAllowed, |
1519 | span: Span::new( |
1520 | Position::new(6, 1, 7), |
1521 | Position::new(8, 1, 8), |
1522 | ), |
1523 | } |
1524 | ); |
1525 | } |
1526 | |
1527 | #[test ] |
1528 | fn dot() { |
1529 | assert_eq!( |
1530 | t("." ), |
1531 | hir_uclass(&[(' \0' , ' \t' ), (' \x0B' , ' \u{10FFFF}' ),]) |
1532 | ); |
1533 | assert_eq!(t("(?s)." ), hir_uclass(&[(' \0' , ' \u{10FFFF}' ),])); |
1534 | assert_eq!( |
1535 | t_bytes("(?-u)." ), |
1536 | hir_bclass(&[(b' \0' , b' \t' ), (b' \x0B' , b' \xFF' ),]) |
1537 | ); |
1538 | assert_eq!(t_bytes("(?s-u)." ), hir_bclass(&[(b' \0' , b' \xFF' ),])); |
1539 | |
1540 | // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed. |
1541 | assert_eq!( |
1542 | t_err("(?-u)." ), |
1543 | TestError { |
1544 | kind: hir::ErrorKind::InvalidUtf8, |
1545 | span: Span::new( |
1546 | Position::new(5, 1, 6), |
1547 | Position::new(6, 1, 7) |
1548 | ), |
1549 | } |
1550 | ); |
1551 | assert_eq!( |
1552 | t_err("(?s-u)." ), |
1553 | TestError { |
1554 | kind: hir::ErrorKind::InvalidUtf8, |
1555 | span: Span::new( |
1556 | Position::new(6, 1, 7), |
1557 | Position::new(7, 1, 8) |
1558 | ), |
1559 | } |
1560 | ); |
1561 | } |
1562 | |
1563 | #[test ] |
1564 | fn assertions() { |
1565 | assert_eq!(t("^" ), hir_anchor(hir::Anchor::StartText)); |
1566 | assert_eq!(t("$" ), hir_anchor(hir::Anchor::EndText)); |
1567 | assert_eq!(t(r"\A" ), hir_anchor(hir::Anchor::StartText)); |
1568 | assert_eq!(t(r"\z" ), hir_anchor(hir::Anchor::EndText)); |
1569 | assert_eq!(t("(?m)^" ), hir_anchor(hir::Anchor::StartLine)); |
1570 | assert_eq!(t("(?m)$" ), hir_anchor(hir::Anchor::EndLine)); |
1571 | assert_eq!(t(r"(?m)\A" ), hir_anchor(hir::Anchor::StartText)); |
1572 | assert_eq!(t(r"(?m)\z" ), hir_anchor(hir::Anchor::EndText)); |
1573 | |
1574 | assert_eq!(t(r"\b" ), hir_word(hir::WordBoundary::Unicode)); |
1575 | assert_eq!(t(r"\B" ), hir_word(hir::WordBoundary::UnicodeNegate)); |
1576 | assert_eq!(t(r"(?-u)\b" ), hir_word(hir::WordBoundary::Ascii)); |
1577 | assert_eq!( |
1578 | t_bytes(r"(?-u)\B" ), |
1579 | hir_word(hir::WordBoundary::AsciiNegate) |
1580 | ); |
1581 | |
1582 | assert_eq!( |
1583 | t_err(r"(?-u)\B" ), |
1584 | TestError { |
1585 | kind: hir::ErrorKind::InvalidUtf8, |
1586 | span: Span::new( |
1587 | Position::new(5, 1, 6), |
1588 | Position::new(7, 1, 8) |
1589 | ), |
1590 | } |
1591 | ); |
1592 | } |
1593 | |
1594 | #[test ] |
1595 | fn group() { |
1596 | assert_eq!(t("(a)" ), hir_group(1, hir_lit("a" ))); |
1597 | assert_eq!( |
1598 | t("(a)(b)" ), |
1599 | hir_cat(vec![ |
1600 | hir_group(1, hir_lit("a" )), |
1601 | hir_group(2, hir_lit("b" )), |
1602 | ]) |
1603 | ); |
1604 | assert_eq!( |
1605 | t("(a)|(b)" ), |
1606 | hir_alt(vec![ |
1607 | hir_group(1, hir_lit("a" )), |
1608 | hir_group(2, hir_lit("b" )), |
1609 | ]) |
1610 | ); |
1611 | assert_eq!(t("(?P<foo>)" ), hir_group_name(1, "foo" , Hir::empty())); |
1612 | assert_eq!(t("(?P<foo>a)" ), hir_group_name(1, "foo" , hir_lit("a" ))); |
1613 | assert_eq!( |
1614 | t("(?P<foo>a)(?P<bar>b)" ), |
1615 | hir_cat(vec![ |
1616 | hir_group_name(1, "foo" , hir_lit("a" )), |
1617 | hir_group_name(2, "bar" , hir_lit("b" )), |
1618 | ]) |
1619 | ); |
1620 | assert_eq!(t("(?:)" ), hir_group_nocap(Hir::empty())); |
1621 | assert_eq!(t("(?:a)" ), hir_group_nocap(hir_lit("a" ))); |
1622 | assert_eq!( |
1623 | t("(?:a)(b)" ), |
1624 | hir_cat(vec![ |
1625 | hir_group_nocap(hir_lit("a" )), |
1626 | hir_group(1, hir_lit("b" )), |
1627 | ]) |
1628 | ); |
1629 | assert_eq!( |
1630 | t("(a)(?:b)(c)" ), |
1631 | hir_cat(vec![ |
1632 | hir_group(1, hir_lit("a" )), |
1633 | hir_group_nocap(hir_lit("b" )), |
1634 | hir_group(2, hir_lit("c" )), |
1635 | ]) |
1636 | ); |
1637 | assert_eq!( |
1638 | t("(a)(?P<foo>b)(c)" ), |
1639 | hir_cat(vec![ |
1640 | hir_group(1, hir_lit("a" )), |
1641 | hir_group_name(2, "foo" , hir_lit("b" )), |
1642 | hir_group(3, hir_lit("c" )), |
1643 | ]) |
1644 | ); |
1645 | assert_eq!(t("()" ), hir_group(1, Hir::empty())); |
1646 | assert_eq!(t("((?i))" ), hir_group(1, Hir::empty())); |
1647 | assert_eq!(t("((?x))" ), hir_group(1, Hir::empty())); |
1648 | assert_eq!(t("(((?x)))" ), hir_group(1, hir_group(2, Hir::empty()))); |
1649 | } |
1650 | |
1651 | #[test ] |
1652 | fn flags() { |
1653 | #[cfg (feature = "unicode-case" )] |
1654 | assert_eq!( |
1655 | t("(?i:a)a" ), |
1656 | hir_cat(vec![ |
1657 | hir_group_nocap(hir_uclass(&[('A' , 'A' ), ('a' , 'a' )])), |
1658 | hir_lit("a" ), |
1659 | ]) |
1660 | ); |
1661 | assert_eq!( |
1662 | t("(?i-u:a)β" ), |
1663 | hir_cat(vec![ |
1664 | hir_group_nocap(hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' )])), |
1665 | hir_lit("β" ), |
1666 | ]) |
1667 | ); |
1668 | assert_eq!( |
1669 | t("(?:(?i-u)a)b" ), |
1670 | hir_cat(vec![ |
1671 | hir_group_nocap(hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' )])), |
1672 | hir_lit("b" ), |
1673 | ]) |
1674 | ); |
1675 | assert_eq!( |
1676 | t("((?i-u)a)b" ), |
1677 | hir_cat(vec![ |
1678 | hir_group(1, hir_bclass(&[(b'A' , b'A' ), (b'a' , b'a' )])), |
1679 | hir_lit("b" ), |
1680 | ]) |
1681 | ); |
1682 | #[cfg (feature = "unicode-case" )] |
1683 | assert_eq!( |
1684 | t("(?i)(?-i:a)a" ), |
1685 | hir_cat(vec![ |
1686 | hir_group_nocap(hir_lit("a" )), |
1687 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1688 | ]) |
1689 | ); |
1690 | #[cfg (feature = "unicode-case" )] |
1691 | assert_eq!( |
1692 | t("(?im)a^" ), |
1693 | hir_cat(vec![ |
1694 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1695 | hir_anchor(hir::Anchor::StartLine), |
1696 | ]) |
1697 | ); |
1698 | #[cfg (feature = "unicode-case" )] |
1699 | assert_eq!( |
1700 | t("(?im)a^(?i-m)a^" ), |
1701 | hir_cat(vec![ |
1702 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1703 | hir_anchor(hir::Anchor::StartLine), |
1704 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1705 | hir_anchor(hir::Anchor::StartText), |
1706 | ]) |
1707 | ); |
1708 | assert_eq!( |
1709 | t("(?U)a*a*?(?-U)a*a*?" ), |
1710 | hir_cat(vec![ |
1711 | hir_star(false, hir_lit("a" )), |
1712 | hir_star(true, hir_lit("a" )), |
1713 | hir_star(true, hir_lit("a" )), |
1714 | hir_star(false, hir_lit("a" )), |
1715 | ]) |
1716 | ); |
1717 | #[cfg (feature = "unicode-case" )] |
1718 | assert_eq!( |
1719 | t("(?:a(?i)a)a" ), |
1720 | hir_cat(vec![ |
1721 | hir_group_nocap(hir_cat(vec![ |
1722 | hir_lit("a" ), |
1723 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1724 | ])), |
1725 | hir_lit("a" ), |
1726 | ]) |
1727 | ); |
1728 | #[cfg (feature = "unicode-case" )] |
1729 | assert_eq!( |
1730 | t("(?i)(?:a(?-i)a)a" ), |
1731 | hir_cat(vec![ |
1732 | hir_group_nocap(hir_cat(vec![ |
1733 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1734 | hir_lit("a" ), |
1735 | ])), |
1736 | hir_uclass(&[('A' , 'A' ), ('a' , 'a' )]), |
1737 | ]) |
1738 | ); |
1739 | } |
1740 | |
1741 | #[test ] |
1742 | fn escape() { |
1743 | assert_eq!( |
1744 | t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#" ), |
1745 | hir_lit(r"\.+*?()|[]{}^$#" ) |
1746 | ); |
1747 | } |
1748 | |
1749 | #[test ] |
1750 | fn repetition() { |
1751 | assert_eq!(t("a?" ), hir_quest(true, hir_lit("a" ))); |
1752 | assert_eq!(t("a*" ), hir_star(true, hir_lit("a" ))); |
1753 | assert_eq!(t("a+" ), hir_plus(true, hir_lit("a" ))); |
1754 | assert_eq!(t("a??" ), hir_quest(false, hir_lit("a" ))); |
1755 | assert_eq!(t("a*?" ), hir_star(false, hir_lit("a" ))); |
1756 | assert_eq!(t("a+?" ), hir_plus(false, hir_lit("a" ))); |
1757 | |
1758 | assert_eq!( |
1759 | t("a{1}" ), |
1760 | hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a" ),) |
1761 | ); |
1762 | assert_eq!( |
1763 | t("a{1,}" ), |
1764 | hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a" ),) |
1765 | ); |
1766 | assert_eq!( |
1767 | t("a{1,2}" ), |
1768 | hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a" ),) |
1769 | ); |
1770 | assert_eq!( |
1771 | t("a{1}?" ), |
1772 | hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a" ),) |
1773 | ); |
1774 | assert_eq!( |
1775 | t("a{1,}?" ), |
1776 | hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a" ),) |
1777 | ); |
1778 | assert_eq!( |
1779 | t("a{1,2}?" ), |
1780 | hir_range( |
1781 | false, |
1782 | hir::RepetitionRange::Bounded(1, 2), |
1783 | hir_lit("a" ), |
1784 | ) |
1785 | ); |
1786 | |
1787 | assert_eq!( |
1788 | t("ab?" ), |
1789 | hir_cat(vec![hir_lit("a" ), hir_quest(true, hir_lit("b" )),]) |
1790 | ); |
1791 | assert_eq!( |
1792 | t("(ab)?" ), |
1793 | hir_quest( |
1794 | true, |
1795 | hir_group(1, hir_cat(vec![hir_lit("a" ), hir_lit("b" ),])) |
1796 | ) |
1797 | ); |
1798 | assert_eq!( |
1799 | t("a|b?" ), |
1800 | hir_alt(vec![hir_lit("a" ), hir_quest(true, hir_lit("b" )),]) |
1801 | ); |
1802 | } |
1803 | |
1804 | #[test ] |
1805 | fn cat_alt() { |
1806 | assert_eq!( |
1807 | t("(ab)" ), |
1808 | hir_group(1, hir_cat(vec![hir_lit("a" ), hir_lit("b" ),])) |
1809 | ); |
1810 | assert_eq!(t("a|b" ), hir_alt(vec![hir_lit("a" ), hir_lit("b" ),])); |
1811 | assert_eq!( |
1812 | t("a|b|c" ), |
1813 | hir_alt(vec![hir_lit("a" ), hir_lit("b" ), hir_lit("c" ),]) |
1814 | ); |
1815 | assert_eq!( |
1816 | t("ab|bc|cd" ), |
1817 | hir_alt(vec![hir_lit("ab" ), hir_lit("bc" ), hir_lit("cd" ),]) |
1818 | ); |
1819 | assert_eq!( |
1820 | t("(a|b)" ), |
1821 | hir_group(1, hir_alt(vec![hir_lit("a" ), hir_lit("b" ),])) |
1822 | ); |
1823 | assert_eq!( |
1824 | t("(a|b|c)" ), |
1825 | hir_group( |
1826 | 1, |
1827 | hir_alt(vec![hir_lit("a" ), hir_lit("b" ), hir_lit("c" ),]) |
1828 | ) |
1829 | ); |
1830 | assert_eq!( |
1831 | t("(ab|bc|cd)" ), |
1832 | hir_group( |
1833 | 1, |
1834 | hir_alt(vec![hir_lit("ab" ), hir_lit("bc" ), hir_lit("cd" ),]) |
1835 | ) |
1836 | ); |
1837 | assert_eq!( |
1838 | t("(ab|(bc|(cd)))" ), |
1839 | hir_group( |
1840 | 1, |
1841 | hir_alt(vec![ |
1842 | hir_lit("ab" ), |
1843 | hir_group( |
1844 | 2, |
1845 | hir_alt(vec![ |
1846 | hir_lit("bc" ), |
1847 | hir_group(3, hir_lit("cd" )), |
1848 | ]) |
1849 | ), |
1850 | ]) |
1851 | ) |
1852 | ); |
1853 | } |
1854 | |
1855 | #[test ] |
1856 | fn class_ascii() { |
1857 | assert_eq!( |
1858 | t("[[:alnum:]]" ), |
1859 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)) |
1860 | ); |
1861 | assert_eq!( |
1862 | t("[[:alpha:]]" ), |
1863 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)) |
1864 | ); |
1865 | assert_eq!( |
1866 | t("[[:ascii:]]" ), |
1867 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)) |
1868 | ); |
1869 | assert_eq!( |
1870 | t("[[:blank:]]" ), |
1871 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)) |
1872 | ); |
1873 | assert_eq!( |
1874 | t("[[:cntrl:]]" ), |
1875 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)) |
1876 | ); |
1877 | assert_eq!( |
1878 | t("[[:digit:]]" ), |
1879 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)) |
1880 | ); |
1881 | assert_eq!( |
1882 | t("[[:graph:]]" ), |
1883 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)) |
1884 | ); |
1885 | assert_eq!( |
1886 | t("[[:lower:]]" ), |
1887 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)) |
1888 | ); |
1889 | assert_eq!( |
1890 | t("[[:print:]]" ), |
1891 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)) |
1892 | ); |
1893 | assert_eq!( |
1894 | t("[[:punct:]]" ), |
1895 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)) |
1896 | ); |
1897 | assert_eq!( |
1898 | t("[[:space:]]" ), |
1899 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)) |
1900 | ); |
1901 | assert_eq!( |
1902 | t("[[:upper:]]" ), |
1903 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)) |
1904 | ); |
1905 | assert_eq!( |
1906 | t("[[:word:]]" ), |
1907 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)) |
1908 | ); |
1909 | assert_eq!( |
1910 | t("[[:xdigit:]]" ), |
1911 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)) |
1912 | ); |
1913 | |
1914 | assert_eq!( |
1915 | t("[[:^lower:]]" ), |
1916 | hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))) |
1917 | ); |
1918 | #[cfg (feature = "unicode-case" )] |
1919 | assert_eq!( |
1920 | t("(?i)[[:lower:]]" ), |
1921 | hir_uclass(&[ |
1922 | ('A' , 'Z' ), |
1923 | ('a' , 'z' ), |
1924 | (' \u{17F}' , ' \u{17F}' ), |
1925 | (' \u{212A}' , ' \u{212A}' ), |
1926 | ]) |
1927 | ); |
1928 | |
1929 | assert_eq!( |
1930 | t("(?-u)[[:lower:]]" ), |
1931 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)) |
1932 | ); |
1933 | assert_eq!( |
1934 | t("(?i-u)[[:lower:]]" ), |
1935 | hir_case_fold(hir_bclass_from_char(ascii_class( |
1936 | &ast::ClassAsciiKind::Lower |
1937 | ))) |
1938 | ); |
1939 | |
1940 | assert_eq!( |
1941 | t_err("(?-u)[[:^lower:]]" ), |
1942 | TestError { |
1943 | kind: hir::ErrorKind::InvalidUtf8, |
1944 | span: Span::new( |
1945 | Position::new(6, 1, 7), |
1946 | Position::new(16, 1, 17) |
1947 | ), |
1948 | } |
1949 | ); |
1950 | assert_eq!( |
1951 | t_err("(?i-u)[[:^lower:]]" ), |
1952 | TestError { |
1953 | kind: hir::ErrorKind::InvalidUtf8, |
1954 | span: Span::new( |
1955 | Position::new(7, 1, 8), |
1956 | Position::new(17, 1, 18) |
1957 | ), |
1958 | } |
1959 | ); |
1960 | } |
1961 | |
1962 | #[test ] |
1963 | fn class_ascii_multiple() { |
1964 | // See: https://github.com/rust-lang/regex/issues/680 |
1965 | assert_eq!( |
1966 | t("[[:alnum:][:^ascii:]]" ), |
1967 | hir_union( |
1968 | hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)), |
1969 | hir_uclass(&[(' \u{80}' , ' \u{10FFFF}' )]), |
1970 | ), |
1971 | ); |
1972 | assert_eq!( |
1973 | t_bytes("(?-u)[[:alnum:][:^ascii:]]" ), |
1974 | hir_union( |
1975 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)), |
1976 | hir_bclass(&[(0x80, 0xFF)]), |
1977 | ), |
1978 | ); |
1979 | } |
1980 | |
1981 | #[test ] |
1982 | #[cfg (feature = "unicode-perl" )] |
1983 | fn class_perl() { |
1984 | // Unicode |
1985 | assert_eq!(t(r"\d" ), hir_uclass_query(ClassQuery::Binary("digit" ))); |
1986 | assert_eq!(t(r"\s" ), hir_uclass_query(ClassQuery::Binary("space" ))); |
1987 | assert_eq!(t(r"\w" ), hir_uclass_perl_word()); |
1988 | #[cfg (feature = "unicode-case" )] |
1989 | assert_eq!( |
1990 | t(r"(?i)\d" ), |
1991 | hir_uclass_query(ClassQuery::Binary("digit" )) |
1992 | ); |
1993 | #[cfg (feature = "unicode-case" )] |
1994 | assert_eq!( |
1995 | t(r"(?i)\s" ), |
1996 | hir_uclass_query(ClassQuery::Binary("space" )) |
1997 | ); |
1998 | #[cfg (feature = "unicode-case" )] |
1999 | assert_eq!(t(r"(?i)\w" ), hir_uclass_perl_word()); |
2000 | |
2001 | // Unicode, negated |
2002 | assert_eq!( |
2003 | t(r"\D" ), |
2004 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit" ))) |
2005 | ); |
2006 | assert_eq!( |
2007 | t(r"\S" ), |
2008 | hir_negate(hir_uclass_query(ClassQuery::Binary("space" ))) |
2009 | ); |
2010 | assert_eq!(t(r"\W" ), hir_negate(hir_uclass_perl_word())); |
2011 | #[cfg (feature = "unicode-case" )] |
2012 | assert_eq!( |
2013 | t(r"(?i)\D" ), |
2014 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit" ))) |
2015 | ); |
2016 | #[cfg (feature = "unicode-case" )] |
2017 | assert_eq!( |
2018 | t(r"(?i)\S" ), |
2019 | hir_negate(hir_uclass_query(ClassQuery::Binary("space" ))) |
2020 | ); |
2021 | #[cfg (feature = "unicode-case" )] |
2022 | assert_eq!(t(r"(?i)\W" ), hir_negate(hir_uclass_perl_word())); |
2023 | |
2024 | // ASCII only |
2025 | assert_eq!( |
2026 | t(r"(?-u)\d" ), |
2027 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2028 | ); |
2029 | assert_eq!( |
2030 | t(r"(?-u)\s" ), |
2031 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) |
2032 | ); |
2033 | assert_eq!( |
2034 | t(r"(?-u)\w" ), |
2035 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) |
2036 | ); |
2037 | assert_eq!( |
2038 | t(r"(?i-u)\d" ), |
2039 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2040 | ); |
2041 | assert_eq!( |
2042 | t(r"(?i-u)\s" ), |
2043 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)) |
2044 | ); |
2045 | assert_eq!( |
2046 | t(r"(?i-u)\w" ), |
2047 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)) |
2048 | ); |
2049 | |
2050 | // ASCII only, negated |
2051 | assert_eq!( |
2052 | t(r"(?-u)\D" ), |
2053 | hir_negate(hir_bclass_from_char(ascii_class( |
2054 | &ast::ClassAsciiKind::Digit |
2055 | ))) |
2056 | ); |
2057 | assert_eq!( |
2058 | t(r"(?-u)\S" ), |
2059 | hir_negate(hir_bclass_from_char(ascii_class( |
2060 | &ast::ClassAsciiKind::Space |
2061 | ))) |
2062 | ); |
2063 | assert_eq!( |
2064 | t(r"(?-u)\W" ), |
2065 | hir_negate(hir_bclass_from_char(ascii_class( |
2066 | &ast::ClassAsciiKind::Word |
2067 | ))) |
2068 | ); |
2069 | assert_eq!( |
2070 | t(r"(?i-u)\D" ), |
2071 | hir_negate(hir_bclass_from_char(ascii_class( |
2072 | &ast::ClassAsciiKind::Digit |
2073 | ))) |
2074 | ); |
2075 | assert_eq!( |
2076 | t(r"(?i-u)\S" ), |
2077 | hir_negate(hir_bclass_from_char(ascii_class( |
2078 | &ast::ClassAsciiKind::Space |
2079 | ))) |
2080 | ); |
2081 | assert_eq!( |
2082 | t(r"(?i-u)\W" ), |
2083 | hir_negate(hir_bclass_from_char(ascii_class( |
2084 | &ast::ClassAsciiKind::Word |
2085 | ))) |
2086 | ); |
2087 | } |
2088 | |
2089 | #[test ] |
2090 | #[cfg (not(feature = "unicode-perl" ))] |
2091 | fn class_perl_word_disabled() { |
2092 | assert_eq!( |
2093 | t_err(r"\w" ), |
2094 | TestError { |
2095 | kind: hir::ErrorKind::UnicodePerlClassNotFound, |
2096 | span: Span::new( |
2097 | Position::new(0, 1, 1), |
2098 | Position::new(2, 1, 3) |
2099 | ), |
2100 | } |
2101 | ); |
2102 | } |
2103 | |
2104 | #[test ] |
2105 | #[cfg (all(not(feature = "unicode-perl" ), not(feature = "unicode-bool" )))] |
2106 | fn class_perl_space_disabled() { |
2107 | assert_eq!( |
2108 | t_err(r"\s" ), |
2109 | TestError { |
2110 | kind: hir::ErrorKind::UnicodePerlClassNotFound, |
2111 | span: Span::new( |
2112 | Position::new(0, 1, 1), |
2113 | Position::new(2, 1, 3) |
2114 | ), |
2115 | } |
2116 | ); |
2117 | } |
2118 | |
2119 | #[test ] |
2120 | #[cfg (all( |
2121 | not(feature = "unicode-perl" ), |
2122 | not(feature = "unicode-gencat" ) |
2123 | ))] |
2124 | fn class_perl_digit_disabled() { |
2125 | assert_eq!( |
2126 | t_err(r"\d" ), |
2127 | TestError { |
2128 | kind: hir::ErrorKind::UnicodePerlClassNotFound, |
2129 | span: Span::new( |
2130 | Position::new(0, 1, 1), |
2131 | Position::new(2, 1, 3) |
2132 | ), |
2133 | } |
2134 | ); |
2135 | } |
2136 | |
2137 | #[test ] |
2138 | #[cfg (feature = "unicode-gencat" )] |
2139 | fn class_unicode_gencat() { |
2140 | assert_eq!(t(r"\pZ" ), hir_uclass_query(ClassQuery::Binary("Z" ))); |
2141 | assert_eq!(t(r"\pz" ), hir_uclass_query(ClassQuery::Binary("Z" ))); |
2142 | assert_eq!( |
2143 | t(r"\p{Separator}" ), |
2144 | hir_uclass_query(ClassQuery::Binary("Z" )) |
2145 | ); |
2146 | assert_eq!( |
2147 | t(r"\p{se PaRa ToR}" ), |
2148 | hir_uclass_query(ClassQuery::Binary("Z" )) |
2149 | ); |
2150 | assert_eq!( |
2151 | t(r"\p{gc:Separator}" ), |
2152 | hir_uclass_query(ClassQuery::Binary("Z" )) |
2153 | ); |
2154 | assert_eq!( |
2155 | t(r"\p{gc=Separator}" ), |
2156 | hir_uclass_query(ClassQuery::Binary("Z" )) |
2157 | ); |
2158 | assert_eq!( |
2159 | t(r"\p{Other}" ), |
2160 | hir_uclass_query(ClassQuery::Binary("Other" )) |
2161 | ); |
2162 | assert_eq!(t(r"\pC" ), hir_uclass_query(ClassQuery::Binary("Other" ))); |
2163 | |
2164 | assert_eq!( |
2165 | t(r"\PZ" ), |
2166 | hir_negate(hir_uclass_query(ClassQuery::Binary("Z" ))) |
2167 | ); |
2168 | assert_eq!( |
2169 | t(r"\P{separator}" ), |
2170 | hir_negate(hir_uclass_query(ClassQuery::Binary("Z" ))) |
2171 | ); |
2172 | assert_eq!( |
2173 | t(r"\P{gc!=separator}" ), |
2174 | hir_negate(hir_uclass_query(ClassQuery::Binary("Z" ))) |
2175 | ); |
2176 | |
2177 | assert_eq!(t(r"\p{any}" ), hir_uclass_query(ClassQuery::Binary("Any" ))); |
2178 | assert_eq!( |
2179 | t(r"\p{assigned}" ), |
2180 | hir_uclass_query(ClassQuery::Binary("Assigned" )) |
2181 | ); |
2182 | assert_eq!( |
2183 | t(r"\p{ascii}" ), |
2184 | hir_uclass_query(ClassQuery::Binary("ASCII" )) |
2185 | ); |
2186 | assert_eq!( |
2187 | t(r"\p{gc:any}" ), |
2188 | hir_uclass_query(ClassQuery::Binary("Any" )) |
2189 | ); |
2190 | assert_eq!( |
2191 | t(r"\p{gc:assigned}" ), |
2192 | hir_uclass_query(ClassQuery::Binary("Assigned" )) |
2193 | ); |
2194 | assert_eq!( |
2195 | t(r"\p{gc:ascii}" ), |
2196 | hir_uclass_query(ClassQuery::Binary("ASCII" )) |
2197 | ); |
2198 | |
2199 | assert_eq!( |
2200 | t_err(r"(?-u)\pZ" ), |
2201 | TestError { |
2202 | kind: hir::ErrorKind::UnicodeNotAllowed, |
2203 | span: Span::new( |
2204 | Position::new(5, 1, 6), |
2205 | Position::new(8, 1, 9) |
2206 | ), |
2207 | } |
2208 | ); |
2209 | assert_eq!( |
2210 | t_err(r"(?-u)\p{Separator}" ), |
2211 | TestError { |
2212 | kind: hir::ErrorKind::UnicodeNotAllowed, |
2213 | span: Span::new( |
2214 | Position::new(5, 1, 6), |
2215 | Position::new(18, 1, 19) |
2216 | ), |
2217 | } |
2218 | ); |
2219 | assert_eq!( |
2220 | t_err(r"\pE" ), |
2221 | TestError { |
2222 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2223 | span: Span::new( |
2224 | Position::new(0, 1, 1), |
2225 | Position::new(3, 1, 4) |
2226 | ), |
2227 | } |
2228 | ); |
2229 | assert_eq!( |
2230 | t_err(r"\p{Foo}" ), |
2231 | TestError { |
2232 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2233 | span: Span::new( |
2234 | Position::new(0, 1, 1), |
2235 | Position::new(7, 1, 8) |
2236 | ), |
2237 | } |
2238 | ); |
2239 | assert_eq!( |
2240 | t_err(r"\p{gc:Foo}" ), |
2241 | TestError { |
2242 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2243 | span: Span::new( |
2244 | Position::new(0, 1, 1), |
2245 | Position::new(10, 1, 11) |
2246 | ), |
2247 | } |
2248 | ); |
2249 | } |
2250 | |
2251 | #[test ] |
2252 | #[cfg (not(feature = "unicode-gencat" ))] |
2253 | fn class_unicode_gencat_disabled() { |
2254 | assert_eq!( |
2255 | t_err(r"\p{Separator}" ), |
2256 | TestError { |
2257 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2258 | span: Span::new( |
2259 | Position::new(0, 1, 1), |
2260 | Position::new(13, 1, 14) |
2261 | ), |
2262 | } |
2263 | ); |
2264 | |
2265 | assert_eq!( |
2266 | t_err(r"\p{Any}" ), |
2267 | TestError { |
2268 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2269 | span: Span::new( |
2270 | Position::new(0, 1, 1), |
2271 | Position::new(7, 1, 8) |
2272 | ), |
2273 | } |
2274 | ); |
2275 | } |
2276 | |
2277 | #[test ] |
2278 | #[cfg (feature = "unicode-script" )] |
2279 | fn class_unicode_script() { |
2280 | assert_eq!( |
2281 | t(r"\p{Greek}" ), |
2282 | hir_uclass_query(ClassQuery::Binary("Greek" )) |
2283 | ); |
2284 | #[cfg (feature = "unicode-case" )] |
2285 | assert_eq!( |
2286 | t(r"(?i)\p{Greek}" ), |
2287 | hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek" ))) |
2288 | ); |
2289 | #[cfg (feature = "unicode-case" )] |
2290 | assert_eq!( |
2291 | t(r"(?i)\P{Greek}" ), |
2292 | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2293 | "Greek" |
2294 | )))) |
2295 | ); |
2296 | |
2297 | assert_eq!( |
2298 | t_err(r"\p{sc:Foo}" ), |
2299 | TestError { |
2300 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2301 | span: Span::new( |
2302 | Position::new(0, 1, 1), |
2303 | Position::new(10, 1, 11) |
2304 | ), |
2305 | } |
2306 | ); |
2307 | assert_eq!( |
2308 | t_err(r"\p{scx:Foo}" ), |
2309 | TestError { |
2310 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2311 | span: Span::new( |
2312 | Position::new(0, 1, 1), |
2313 | Position::new(11, 1, 12) |
2314 | ), |
2315 | } |
2316 | ); |
2317 | } |
2318 | |
2319 | #[test ] |
2320 | #[cfg (not(feature = "unicode-script" ))] |
2321 | fn class_unicode_script_disabled() { |
2322 | assert_eq!( |
2323 | t_err(r"\p{Greek}" ), |
2324 | TestError { |
2325 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2326 | span: Span::new( |
2327 | Position::new(0, 1, 1), |
2328 | Position::new(9, 1, 10) |
2329 | ), |
2330 | } |
2331 | ); |
2332 | |
2333 | assert_eq!( |
2334 | t_err(r"\p{scx:Greek}" ), |
2335 | TestError { |
2336 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2337 | span: Span::new( |
2338 | Position::new(0, 1, 1), |
2339 | Position::new(13, 1, 14) |
2340 | ), |
2341 | } |
2342 | ); |
2343 | } |
2344 | |
2345 | #[test ] |
2346 | #[cfg (feature = "unicode-age" )] |
2347 | fn class_unicode_age() { |
2348 | assert_eq!( |
2349 | t_err(r"\p{age:Foo}" ), |
2350 | TestError { |
2351 | kind: hir::ErrorKind::UnicodePropertyValueNotFound, |
2352 | span: Span::new( |
2353 | Position::new(0, 1, 1), |
2354 | Position::new(11, 1, 12) |
2355 | ), |
2356 | } |
2357 | ); |
2358 | } |
2359 | |
2360 | #[test ] |
2361 | #[cfg (feature = "unicode-gencat" )] |
2362 | fn class_unicode_any_empty() { |
2363 | assert_eq!( |
2364 | t_err(r"\P{any}" ), |
2365 | TestError { |
2366 | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2367 | span: Span::new( |
2368 | Position::new(0, 1, 1), |
2369 | Position::new(7, 1, 8) |
2370 | ), |
2371 | } |
2372 | ); |
2373 | } |
2374 | |
2375 | #[test ] |
2376 | #[cfg (not(feature = "unicode-age" ))] |
2377 | fn class_unicode_age_disabled() { |
2378 | assert_eq!( |
2379 | t_err(r"\p{age:3.0}" ), |
2380 | TestError { |
2381 | kind: hir::ErrorKind::UnicodePropertyNotFound, |
2382 | span: Span::new( |
2383 | Position::new(0, 1, 1), |
2384 | Position::new(11, 1, 12) |
2385 | ), |
2386 | } |
2387 | ); |
2388 | } |
2389 | |
2390 | #[test ] |
2391 | fn class_bracketed() { |
2392 | assert_eq!(t("[a]" ), hir_uclass(&[('a' , 'a' )])); |
2393 | assert_eq!(t("[^[a]]" ), hir_negate(hir_uclass(&[('a' , 'a' )]))); |
2394 | assert_eq!(t("[a-z]" ), hir_uclass(&[('a' , 'z' )])); |
2395 | assert_eq!(t("[a-fd-h]" ), hir_uclass(&[('a' , 'h' )])); |
2396 | assert_eq!(t("[a-fg-m]" ), hir_uclass(&[('a' , 'm' )])); |
2397 | assert_eq!(t(r"[\x00]" ), hir_uclass(&[(' \0' , ' \0' )])); |
2398 | assert_eq!(t(r"[\n]" ), hir_uclass(&[(' \n' , ' \n' )])); |
2399 | assert_eq!(t("[ \n]" ), hir_uclass(&[(' \n' , ' \n' )])); |
2400 | #[cfg (any(feature = "unicode-perl" , feature = "unicode-gencat" ))] |
2401 | assert_eq!(t(r"[\d]" ), hir_uclass_query(ClassQuery::Binary("digit" ))); |
2402 | #[cfg (feature = "unicode-gencat" )] |
2403 | assert_eq!( |
2404 | t(r"[\pZ]" ), |
2405 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2406 | ); |
2407 | #[cfg (feature = "unicode-gencat" )] |
2408 | assert_eq!( |
2409 | t(r"[\p{separator}]" ), |
2410 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2411 | ); |
2412 | #[cfg (any(feature = "unicode-perl" , feature = "unicode-gencat" ))] |
2413 | assert_eq!(t(r"[^\D]" ), hir_uclass_query(ClassQuery::Binary("digit" ))); |
2414 | #[cfg (feature = "unicode-gencat" )] |
2415 | assert_eq!( |
2416 | t(r"[^\PZ]" ), |
2417 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2418 | ); |
2419 | #[cfg (feature = "unicode-gencat" )] |
2420 | assert_eq!( |
2421 | t(r"[^\P{separator}]" ), |
2422 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2423 | ); |
2424 | #[cfg (all( |
2425 | feature = "unicode-case" , |
2426 | any(feature = "unicode-perl" , feature = "unicode-gencat" ) |
2427 | ))] |
2428 | assert_eq!( |
2429 | t(r"(?i)[^\D]" ), |
2430 | hir_uclass_query(ClassQuery::Binary("digit" )) |
2431 | ); |
2432 | #[cfg (all(feature = "unicode-case" , feature = "unicode-script" ))] |
2433 | assert_eq!( |
2434 | t(r"(?i)[^\P{greek}]" ), |
2435 | hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek" ))) |
2436 | ); |
2437 | |
2438 | assert_eq!(t("(?-u)[a]" ), hir_bclass(&[(b'a' , b'a' )])); |
2439 | assert_eq!(t(r"(?-u)[\x00]" ), hir_bclass(&[(b' \0' , b' \0' )])); |
2440 | assert_eq!(t_bytes(r"(?-u)[\xFF]" ), hir_bclass(&[(b' \xFF' , b' \xFF' )])); |
2441 | |
2442 | #[cfg (feature = "unicode-case" )] |
2443 | assert_eq!(t("(?i)[a]" ), hir_uclass(&[('A' , 'A' ), ('a' , 'a' )])); |
2444 | #[cfg (feature = "unicode-case" )] |
2445 | assert_eq!( |
2446 | t("(?i)[k]" ), |
2447 | hir_uclass(&[('K' , 'K' ), ('k' , 'k' ), (' \u{212A}' , ' \u{212A}' ),]) |
2448 | ); |
2449 | #[cfg (feature = "unicode-case" )] |
2450 | assert_eq!( |
2451 | t("(?i)[β]" ), |
2452 | hir_uclass(&[('Β' , 'Β' ), ('β' , 'β' ), ('ϐ' , 'ϐ' ),]) |
2453 | ); |
2454 | assert_eq!(t("(?i-u)[k]" ), hir_bclass(&[(b'K' , b'K' ), (b'k' , b'k' ),])); |
2455 | |
2456 | assert_eq!(t("[^a]" ), hir_negate(hir_uclass(&[('a' , 'a' )]))); |
2457 | assert_eq!(t(r"[^\x00]" ), hir_negate(hir_uclass(&[(' \0' , ' \0' )]))); |
2458 | assert_eq!( |
2459 | t_bytes("(?-u)[^a]" ), |
2460 | hir_negate(hir_bclass(&[(b'a' , b'a' )])) |
2461 | ); |
2462 | #[cfg (any(feature = "unicode-perl" , feature = "unicode-gencat" ))] |
2463 | assert_eq!( |
2464 | t(r"[^\d]" ), |
2465 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit" ))) |
2466 | ); |
2467 | #[cfg (feature = "unicode-gencat" )] |
2468 | assert_eq!( |
2469 | t(r"[^\pZ]" ), |
2470 | hir_negate(hir_uclass_query(ClassQuery::Binary("separator" ))) |
2471 | ); |
2472 | #[cfg (feature = "unicode-gencat" )] |
2473 | assert_eq!( |
2474 | t(r"[^\p{separator}]" ), |
2475 | hir_negate(hir_uclass_query(ClassQuery::Binary("separator" ))) |
2476 | ); |
2477 | #[cfg (all(feature = "unicode-case" , feature = "unicode-script" ))] |
2478 | assert_eq!( |
2479 | t(r"(?i)[^\p{greek}]" ), |
2480 | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2481 | "greek" |
2482 | )))) |
2483 | ); |
2484 | #[cfg (all(feature = "unicode-case" , feature = "unicode-script" ))] |
2485 | assert_eq!( |
2486 | t(r"(?i)[\P{greek}]" ), |
2487 | hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary( |
2488 | "greek" |
2489 | )))) |
2490 | ); |
2491 | |
2492 | // Test some weird cases. |
2493 | assert_eq!(t(r"[\[]" ), hir_uclass(&[('[' , '[' )])); |
2494 | |
2495 | assert_eq!(t(r"[&]" ), hir_uclass(&[('&' , '&' )])); |
2496 | assert_eq!(t(r"[\&]" ), hir_uclass(&[('&' , '&' )])); |
2497 | assert_eq!(t(r"[\&\&]" ), hir_uclass(&[('&' , '&' )])); |
2498 | assert_eq!(t(r"[\x00-&]" ), hir_uclass(&[(' \0' , '&' )])); |
2499 | assert_eq!(t(r"[&-\xFF]" ), hir_uclass(&[('&' , ' \u{FF}' )])); |
2500 | |
2501 | assert_eq!(t(r"[~]" ), hir_uclass(&[('~' , '~' )])); |
2502 | assert_eq!(t(r"[\~]" ), hir_uclass(&[('~' , '~' )])); |
2503 | assert_eq!(t(r"[\~\~]" ), hir_uclass(&[('~' , '~' )])); |
2504 | assert_eq!(t(r"[\x00-~]" ), hir_uclass(&[(' \0' , '~' )])); |
2505 | assert_eq!(t(r"[~-\xFF]" ), hir_uclass(&[('~' , ' \u{FF}' )])); |
2506 | |
2507 | assert_eq!(t(r"[-]" ), hir_uclass(&[('-' , '-' )])); |
2508 | assert_eq!(t(r"[\-]" ), hir_uclass(&[('-' , '-' )])); |
2509 | assert_eq!(t(r"[\-\-]" ), hir_uclass(&[('-' , '-' )])); |
2510 | assert_eq!(t(r"[\x00-\-]" ), hir_uclass(&[(' \0' , '-' )])); |
2511 | assert_eq!(t(r"[\--\xFF]" ), hir_uclass(&[('-' , ' \u{FF}' )])); |
2512 | |
2513 | assert_eq!( |
2514 | t_err("(?-u)[^a]" ), |
2515 | TestError { |
2516 | kind: hir::ErrorKind::InvalidUtf8, |
2517 | span: Span::new( |
2518 | Position::new(5, 1, 6), |
2519 | Position::new(9, 1, 10) |
2520 | ), |
2521 | } |
2522 | ); |
2523 | #[cfg (any(feature = "unicode-perl" , feature = "unicode-bool" ))] |
2524 | assert_eq!( |
2525 | t_err(r"[^\s\S]" ), |
2526 | TestError { |
2527 | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2528 | span: Span::new( |
2529 | Position::new(0, 1, 1), |
2530 | Position::new(7, 1, 8) |
2531 | ), |
2532 | } |
2533 | ); |
2534 | #[cfg (any(feature = "unicode-perl" , feature = "unicode-bool" ))] |
2535 | assert_eq!( |
2536 | t_err(r"(?-u)[^\s\S]" ), |
2537 | TestError { |
2538 | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2539 | span: Span::new( |
2540 | Position::new(5, 1, 6), |
2541 | Position::new(12, 1, 13) |
2542 | ), |
2543 | } |
2544 | ); |
2545 | } |
2546 | |
2547 | #[test ] |
2548 | fn class_bracketed_union() { |
2549 | assert_eq!(t("[a-zA-Z]" ), hir_uclass(&[('A' , 'Z' ), ('a' , 'z' )])); |
2550 | #[cfg (feature = "unicode-gencat" )] |
2551 | assert_eq!( |
2552 | t(r"[a\pZb]" ), |
2553 | hir_union( |
2554 | hir_uclass(&[('a' , 'b' )]), |
2555 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2556 | ) |
2557 | ); |
2558 | #[cfg (all(feature = "unicode-gencat" , feature = "unicode-script" ))] |
2559 | assert_eq!( |
2560 | t(r"[\pZ\p{Greek}]" ), |
2561 | hir_union( |
2562 | hir_uclass_query(ClassQuery::Binary("greek" )), |
2563 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2564 | ) |
2565 | ); |
2566 | #[cfg (all( |
2567 | feature = "unicode-age" , |
2568 | feature = "unicode-gencat" , |
2569 | feature = "unicode-script" |
2570 | ))] |
2571 | assert_eq!( |
2572 | t(r"[\p{age:3.0}\pZ\p{Greek}]" ), |
2573 | hir_union( |
2574 | hir_uclass_query(ClassQuery::ByValue { |
2575 | property_name: "age" , |
2576 | property_value: "3.0" , |
2577 | }), |
2578 | hir_union( |
2579 | hir_uclass_query(ClassQuery::Binary("greek" )), |
2580 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2581 | ) |
2582 | ) |
2583 | ); |
2584 | #[cfg (all( |
2585 | feature = "unicode-age" , |
2586 | feature = "unicode-gencat" , |
2587 | feature = "unicode-script" |
2588 | ))] |
2589 | assert_eq!( |
2590 | t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]" ), |
2591 | hir_union( |
2592 | hir_uclass_query(ClassQuery::ByValue { |
2593 | property_name: "age" , |
2594 | property_value: "3.0" , |
2595 | }), |
2596 | hir_union( |
2597 | hir_uclass_query(ClassQuery::Binary("cyrillic" )), |
2598 | hir_union( |
2599 | hir_uclass_query(ClassQuery::Binary("greek" )), |
2600 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2601 | ) |
2602 | ) |
2603 | ) |
2604 | ); |
2605 | |
2606 | #[cfg (all( |
2607 | feature = "unicode-age" , |
2608 | feature = "unicode-case" , |
2609 | feature = "unicode-gencat" , |
2610 | feature = "unicode-script" |
2611 | ))] |
2612 | assert_eq!( |
2613 | t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]" ), |
2614 | hir_case_fold(hir_union( |
2615 | hir_uclass_query(ClassQuery::ByValue { |
2616 | property_name: "age" , |
2617 | property_value: "3.0" , |
2618 | }), |
2619 | hir_union( |
2620 | hir_uclass_query(ClassQuery::Binary("greek" )), |
2621 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2622 | ) |
2623 | )) |
2624 | ); |
2625 | #[cfg (all( |
2626 | feature = "unicode-age" , |
2627 | feature = "unicode-gencat" , |
2628 | feature = "unicode-script" |
2629 | ))] |
2630 | assert_eq!( |
2631 | t(r"[^\p{age:3.0}\pZ\p{Greek}]" ), |
2632 | hir_negate(hir_union( |
2633 | hir_uclass_query(ClassQuery::ByValue { |
2634 | property_name: "age" , |
2635 | property_value: "3.0" , |
2636 | }), |
2637 | hir_union( |
2638 | hir_uclass_query(ClassQuery::Binary("greek" )), |
2639 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2640 | ) |
2641 | )) |
2642 | ); |
2643 | #[cfg (all( |
2644 | feature = "unicode-age" , |
2645 | feature = "unicode-case" , |
2646 | feature = "unicode-gencat" , |
2647 | feature = "unicode-script" |
2648 | ))] |
2649 | assert_eq!( |
2650 | t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]" ), |
2651 | hir_negate(hir_case_fold(hir_union( |
2652 | hir_uclass_query(ClassQuery::ByValue { |
2653 | property_name: "age" , |
2654 | property_value: "3.0" , |
2655 | }), |
2656 | hir_union( |
2657 | hir_uclass_query(ClassQuery::Binary("greek" )), |
2658 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2659 | ) |
2660 | ))) |
2661 | ); |
2662 | } |
2663 | |
2664 | #[test ] |
2665 | fn class_bracketed_nested() { |
2666 | assert_eq!(t(r"[a[^c]]" ), hir_negate(hir_uclass(&[('c' , 'c' )]))); |
2667 | assert_eq!(t(r"[a-b[^c]]" ), hir_negate(hir_uclass(&[('c' , 'c' )]))); |
2668 | assert_eq!(t(r"[a-c[^c]]" ), hir_negate(hir_uclass(&[]))); |
2669 | |
2670 | assert_eq!(t(r"[^a[^c]]" ), hir_uclass(&[('c' , 'c' )])); |
2671 | assert_eq!(t(r"[^a-b[^c]]" ), hir_uclass(&[('c' , 'c' )])); |
2672 | |
2673 | #[cfg (feature = "unicode-case" )] |
2674 | assert_eq!( |
2675 | t(r"(?i)[a[^c]]" ), |
2676 | hir_negate(hir_case_fold(hir_uclass(&[('c' , 'c' )]))) |
2677 | ); |
2678 | #[cfg (feature = "unicode-case" )] |
2679 | assert_eq!( |
2680 | t(r"(?i)[a-b[^c]]" ), |
2681 | hir_negate(hir_case_fold(hir_uclass(&[('c' , 'c' )]))) |
2682 | ); |
2683 | |
2684 | #[cfg (feature = "unicode-case" )] |
2685 | assert_eq!(t(r"(?i)[^a[^c]]" ), hir_uclass(&[('C' , 'C' ), ('c' , 'c' )])); |
2686 | #[cfg (feature = "unicode-case" )] |
2687 | assert_eq!( |
2688 | t(r"(?i)[^a-b[^c]]" ), |
2689 | hir_uclass(&[('C' , 'C' ), ('c' , 'c' )]) |
2690 | ); |
2691 | |
2692 | assert_eq!( |
2693 | t_err(r"[^a-c[^c]]" ), |
2694 | TestError { |
2695 | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2696 | span: Span::new( |
2697 | Position::new(0, 1, 1), |
2698 | Position::new(10, 1, 11) |
2699 | ), |
2700 | } |
2701 | ); |
2702 | #[cfg (feature = "unicode-case" )] |
2703 | assert_eq!( |
2704 | t_err(r"(?i)[^a-c[^c]]" ), |
2705 | TestError { |
2706 | kind: hir::ErrorKind::EmptyClassNotAllowed, |
2707 | span: Span::new( |
2708 | Position::new(4, 1, 5), |
2709 | Position::new(14, 1, 15) |
2710 | ), |
2711 | } |
2712 | ); |
2713 | } |
2714 | |
2715 | #[test ] |
2716 | fn class_bracketed_intersect() { |
2717 | assert_eq!(t("[abc&&b-c]" ), hir_uclass(&[('b' , 'c' )])); |
2718 | assert_eq!(t("[abc&&[b-c]]" ), hir_uclass(&[('b' , 'c' )])); |
2719 | assert_eq!(t("[[abc]&&[b-c]]" ), hir_uclass(&[('b' , 'c' )])); |
2720 | assert_eq!(t("[a-z&&b-y&&c-x]" ), hir_uclass(&[('c' , 'x' )])); |
2721 | assert_eq!(t("[c-da-b&&a-d]" ), hir_uclass(&[('a' , 'd' )])); |
2722 | assert_eq!(t("[a-d&&c-da-b]" ), hir_uclass(&[('a' , 'd' )])); |
2723 | assert_eq!(t(r"[a-z&&a-c]" ), hir_uclass(&[('a' , 'c' )])); |
2724 | assert_eq!(t(r"[[a-z&&a-c]]" ), hir_uclass(&[('a' , 'c' )])); |
2725 | assert_eq!(t(r"[^[a-z&&a-c]]" ), hir_negate(hir_uclass(&[('a' , 'c' )]))); |
2726 | |
2727 | assert_eq!(t("(?-u)[abc&&b-c]" ), hir_bclass(&[(b'b' , b'c' )])); |
2728 | assert_eq!(t("(?-u)[abc&&[b-c]]" ), hir_bclass(&[(b'b' , b'c' )])); |
2729 | assert_eq!(t("(?-u)[[abc]&&[b-c]]" ), hir_bclass(&[(b'b' , b'c' )])); |
2730 | assert_eq!(t("(?-u)[a-z&&b-y&&c-x]" ), hir_bclass(&[(b'c' , b'x' )])); |
2731 | assert_eq!(t("(?-u)[c-da-b&&a-d]" ), hir_bclass(&[(b'a' , b'd' )])); |
2732 | assert_eq!(t("(?-u)[a-d&&c-da-b]" ), hir_bclass(&[(b'a' , b'd' )])); |
2733 | |
2734 | #[cfg (feature = "unicode-case" )] |
2735 | assert_eq!( |
2736 | t("(?i)[abc&&b-c]" ), |
2737 | hir_case_fold(hir_uclass(&[('b' , 'c' )])) |
2738 | ); |
2739 | #[cfg (feature = "unicode-case" )] |
2740 | assert_eq!( |
2741 | t("(?i)[abc&&[b-c]]" ), |
2742 | hir_case_fold(hir_uclass(&[('b' , 'c' )])) |
2743 | ); |
2744 | #[cfg (feature = "unicode-case" )] |
2745 | assert_eq!( |
2746 | t("(?i)[[abc]&&[b-c]]" ), |
2747 | hir_case_fold(hir_uclass(&[('b' , 'c' )])) |
2748 | ); |
2749 | #[cfg (feature = "unicode-case" )] |
2750 | assert_eq!( |
2751 | t("(?i)[a-z&&b-y&&c-x]" ), |
2752 | hir_case_fold(hir_uclass(&[('c' , 'x' )])) |
2753 | ); |
2754 | #[cfg (feature = "unicode-case" )] |
2755 | assert_eq!( |
2756 | t("(?i)[c-da-b&&a-d]" ), |
2757 | hir_case_fold(hir_uclass(&[('a' , 'd' )])) |
2758 | ); |
2759 | #[cfg (feature = "unicode-case" )] |
2760 | assert_eq!( |
2761 | t("(?i)[a-d&&c-da-b]" ), |
2762 | hir_case_fold(hir_uclass(&[('a' , 'd' )])) |
2763 | ); |
2764 | |
2765 | assert_eq!( |
2766 | t("(?i-u)[abc&&b-c]" ), |
2767 | hir_case_fold(hir_bclass(&[(b'b' , b'c' )])) |
2768 | ); |
2769 | assert_eq!( |
2770 | t("(?i-u)[abc&&[b-c]]" ), |
2771 | hir_case_fold(hir_bclass(&[(b'b' , b'c' )])) |
2772 | ); |
2773 | assert_eq!( |
2774 | t("(?i-u)[[abc]&&[b-c]]" ), |
2775 | hir_case_fold(hir_bclass(&[(b'b' , b'c' )])) |
2776 | ); |
2777 | assert_eq!( |
2778 | t("(?i-u)[a-z&&b-y&&c-x]" ), |
2779 | hir_case_fold(hir_bclass(&[(b'c' , b'x' )])) |
2780 | ); |
2781 | assert_eq!( |
2782 | t("(?i-u)[c-da-b&&a-d]" ), |
2783 | hir_case_fold(hir_bclass(&[(b'a' , b'd' )])) |
2784 | ); |
2785 | assert_eq!( |
2786 | t("(?i-u)[a-d&&c-da-b]" ), |
2787 | hir_case_fold(hir_bclass(&[(b'a' , b'd' )])) |
2788 | ); |
2789 | |
2790 | // In `[a^]`, `^` does not need to be escaped, so it makes sense that |
2791 | // `^` is also allowed to be unescaped after `&&`. |
2792 | assert_eq!(t(r"[\^&&^]" ), hir_uclass(&[('^' , '^' )])); |
2793 | // `]` needs to be escaped after `&&` since it's not at start of class. |
2794 | assert_eq!(t(r"[]&&\]]" ), hir_uclass(&[(']' , ']' )])); |
2795 | assert_eq!(t(r"[-&&-]" ), hir_uclass(&[('-' , '-' )])); |
2796 | assert_eq!(t(r"[\&&&&]" ), hir_uclass(&[('&' , '&' )])); |
2797 | assert_eq!(t(r"[\&&&\&]" ), hir_uclass(&[('&' , '&' )])); |
2798 | // Test precedence. |
2799 | assert_eq!( |
2800 | t(r"[a-w&&[^c-g]z]" ), |
2801 | hir_uclass(&[('a' , 'b' ), ('h' , 'w' )]) |
2802 | ); |
2803 | } |
2804 | |
2805 | #[test ] |
2806 | fn class_bracketed_intersect_negate() { |
2807 | #[cfg (feature = "unicode-perl" )] |
2808 | assert_eq!( |
2809 | t(r"[^\w&&\d]" ), |
2810 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit" ))) |
2811 | ); |
2812 | assert_eq!(t(r"[^[a-z&&a-c]]" ), hir_negate(hir_uclass(&[('a' , 'c' )]))); |
2813 | #[cfg (feature = "unicode-perl" )] |
2814 | assert_eq!( |
2815 | t(r"[^[\w&&\d]]" ), |
2816 | hir_negate(hir_uclass_query(ClassQuery::Binary("digit" ))) |
2817 | ); |
2818 | #[cfg (feature = "unicode-perl" )] |
2819 | assert_eq!( |
2820 | t(r"[^[^\w&&\d]]" ), |
2821 | hir_uclass_query(ClassQuery::Binary("digit" )) |
2822 | ); |
2823 | #[cfg (feature = "unicode-perl" )] |
2824 | assert_eq!(t(r"[[[^\w]&&[^\d]]]" ), hir_negate(hir_uclass_perl_word())); |
2825 | |
2826 | #[cfg (feature = "unicode-perl" )] |
2827 | assert_eq!( |
2828 | t_bytes(r"(?-u)[^\w&&\d]" ), |
2829 | hir_negate(hir_bclass_from_char(ascii_class( |
2830 | &ast::ClassAsciiKind::Digit |
2831 | ))) |
2832 | ); |
2833 | assert_eq!( |
2834 | t_bytes(r"(?-u)[^[a-z&&a-c]]" ), |
2835 | hir_negate(hir_bclass(&[(b'a' , b'c' )])) |
2836 | ); |
2837 | assert_eq!( |
2838 | t_bytes(r"(?-u)[^[\w&&\d]]" ), |
2839 | hir_negate(hir_bclass_from_char(ascii_class( |
2840 | &ast::ClassAsciiKind::Digit |
2841 | ))) |
2842 | ); |
2843 | assert_eq!( |
2844 | t_bytes(r"(?-u)[^[^\w&&\d]]" ), |
2845 | hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)) |
2846 | ); |
2847 | assert_eq!( |
2848 | t_bytes(r"(?-u)[[[^\w]&&[^\d]]]" ), |
2849 | hir_negate(hir_bclass_from_char(ascii_class( |
2850 | &ast::ClassAsciiKind::Word |
2851 | ))) |
2852 | ); |
2853 | } |
2854 | |
2855 | #[test ] |
2856 | fn class_bracketed_difference() { |
2857 | #[cfg (feature = "unicode-gencat" )] |
2858 | assert_eq!( |
2859 | t(r"[\pL--[:ascii:]]" ), |
2860 | hir_difference( |
2861 | hir_uclass_query(ClassQuery::Binary("letter" )), |
2862 | hir_uclass(&[(' \0' , ' \x7F' )]) |
2863 | ) |
2864 | ); |
2865 | |
2866 | assert_eq!( |
2867 | t(r"(?-u)[[:alpha:]--[:lower:]]" ), |
2868 | hir_bclass(&[(b'A' , b'Z' )]) |
2869 | ); |
2870 | } |
2871 | |
2872 | #[test ] |
2873 | fn class_bracketed_symmetric_difference() { |
2874 | #[cfg (feature = "unicode-script" )] |
2875 | assert_eq!( |
2876 | t(r"[\p{sc:Greek}~~\p{scx:Greek}]" ), |
2877 | hir_uclass(&[ |
2878 | (' \u{0342}' , ' \u{0342}' ), |
2879 | (' \u{0345}' , ' \u{0345}' ), |
2880 | (' \u{1DC0}' , ' \u{1DC1}' ), |
2881 | ]) |
2882 | ); |
2883 | assert_eq!(t(r"[a-g~~c-j]" ), hir_uclass(&[('a' , 'b' ), ('h' , 'j' )])); |
2884 | |
2885 | assert_eq!( |
2886 | t(r"(?-u)[a-g~~c-j]" ), |
2887 | hir_bclass(&[(b'a' , b'b' ), (b'h' , b'j' )]) |
2888 | ); |
2889 | } |
2890 | |
2891 | #[test ] |
2892 | fn ignore_whitespace() { |
2893 | assert_eq!(t(r"(?x)\12 3" ), hir_lit(" \n3" )); |
2894 | assert_eq!(t(r"(?x)\x { 53 }" ), hir_lit("S" )); |
2895 | assert_eq!( |
2896 | t(r"(?x)\x # comment |
2897 | { # comment |
2898 | 53 # comment |
2899 | } #comment" ), |
2900 | hir_lit("S" ) |
2901 | ); |
2902 | |
2903 | assert_eq!(t(r"(?x)\x 53" ), hir_lit("S" )); |
2904 | assert_eq!( |
2905 | t(r"(?x)\x # comment |
2906 | 53 # comment" ), |
2907 | hir_lit("S" ) |
2908 | ); |
2909 | assert_eq!(t(r"(?x)\x5 3" ), hir_lit("S" )); |
2910 | |
2911 | #[cfg (feature = "unicode-gencat" )] |
2912 | assert_eq!( |
2913 | t(r"(?x)\p # comment |
2914 | { # comment |
2915 | Separator # comment |
2916 | } # comment" ), |
2917 | hir_uclass_query(ClassQuery::Binary("separator" )) |
2918 | ); |
2919 | |
2920 | assert_eq!( |
2921 | t(r"(?x)a # comment |
2922 | { # comment |
2923 | 5 # comment |
2924 | , # comment |
2925 | 10 # comment |
2926 | } # comment" ), |
2927 | hir_range( |
2928 | true, |
2929 | hir::RepetitionRange::Bounded(5, 10), |
2930 | hir_lit("a" ) |
2931 | ) |
2932 | ); |
2933 | |
2934 | assert_eq!(t(r"(?x)a\ # hi there" ), hir_lit("a " )); |
2935 | } |
2936 | |
2937 | #[test ] |
2938 | fn analysis_is_always_utf8() { |
2939 | // Positive examples. |
2940 | assert!(t_bytes(r"a" ).is_always_utf8()); |
2941 | assert!(t_bytes(r"ab" ).is_always_utf8()); |
2942 | assert!(t_bytes(r"(?-u)a" ).is_always_utf8()); |
2943 | assert!(t_bytes(r"(?-u)ab" ).is_always_utf8()); |
2944 | assert!(t_bytes(r"\xFF" ).is_always_utf8()); |
2945 | assert!(t_bytes(r"\xFF\xFF" ).is_always_utf8()); |
2946 | assert!(t_bytes(r"[^a]" ).is_always_utf8()); |
2947 | assert!(t_bytes(r"[^a][^a]" ).is_always_utf8()); |
2948 | assert!(t_bytes(r"\b" ).is_always_utf8()); |
2949 | assert!(t_bytes(r"\B" ).is_always_utf8()); |
2950 | assert!(t_bytes(r"(?-u)\b" ).is_always_utf8()); |
2951 | |
2952 | // Negative examples. |
2953 | assert!(!t_bytes(r"(?-u)\xFF" ).is_always_utf8()); |
2954 | assert!(!t_bytes(r"(?-u)\xFF\xFF" ).is_always_utf8()); |
2955 | assert!(!t_bytes(r"(?-u)[^a]" ).is_always_utf8()); |
2956 | assert!(!t_bytes(r"(?-u)[^a][^a]" ).is_always_utf8()); |
2957 | assert!(!t_bytes(r"(?-u)\B" ).is_always_utf8()); |
2958 | } |
2959 | |
2960 | #[test ] |
2961 | fn analysis_is_all_assertions() { |
2962 | // Positive examples. |
2963 | assert!(t(r"\b" ).is_all_assertions()); |
2964 | assert!(t(r"\B" ).is_all_assertions()); |
2965 | assert!(t(r"^" ).is_all_assertions()); |
2966 | assert!(t(r"$" ).is_all_assertions()); |
2967 | assert!(t(r"\A" ).is_all_assertions()); |
2968 | assert!(t(r"\z" ).is_all_assertions()); |
2969 | assert!(t(r"$^\z\A\b\B" ).is_all_assertions()); |
2970 | assert!(t(r"$|^|\z|\A|\b|\B" ).is_all_assertions()); |
2971 | assert!(t(r"^$|$^" ).is_all_assertions()); |
2972 | assert!(t(r"((\b)+())*^" ).is_all_assertions()); |
2973 | |
2974 | // Negative examples. |
2975 | assert!(!t(r"^a" ).is_all_assertions()); |
2976 | } |
2977 | |
2978 | #[test ] |
2979 | fn analysis_is_anchored() { |
2980 | // Positive examples. |
2981 | assert!(t(r"^" ).is_anchored_start()); |
2982 | assert!(t(r"$" ).is_anchored_end()); |
2983 | assert!(t(r"^" ).is_line_anchored_start()); |
2984 | assert!(t(r"$" ).is_line_anchored_end()); |
2985 | |
2986 | assert!(t(r"^^" ).is_anchored_start()); |
2987 | assert!(t(r"$$" ).is_anchored_end()); |
2988 | assert!(t(r"^^" ).is_line_anchored_start()); |
2989 | assert!(t(r"$$" ).is_line_anchored_end()); |
2990 | |
2991 | assert!(t(r"^$" ).is_anchored_start()); |
2992 | assert!(t(r"^$" ).is_anchored_end()); |
2993 | assert!(t(r"^$" ).is_line_anchored_start()); |
2994 | assert!(t(r"^$" ).is_line_anchored_end()); |
2995 | |
2996 | assert!(t(r"^foo" ).is_anchored_start()); |
2997 | assert!(t(r"foo$" ).is_anchored_end()); |
2998 | assert!(t(r"^foo" ).is_line_anchored_start()); |
2999 | assert!(t(r"foo$" ).is_line_anchored_end()); |
3000 | |
3001 | assert!(t(r"^foo|^bar" ).is_anchored_start()); |
3002 | assert!(t(r"foo$|bar$" ).is_anchored_end()); |
3003 | assert!(t(r"^foo|^bar" ).is_line_anchored_start()); |
3004 | assert!(t(r"foo$|bar$" ).is_line_anchored_end()); |
3005 | |
3006 | assert!(t(r"^(foo|bar)" ).is_anchored_start()); |
3007 | assert!(t(r"(foo|bar)$" ).is_anchored_end()); |
3008 | assert!(t(r"^(foo|bar)" ).is_line_anchored_start()); |
3009 | assert!(t(r"(foo|bar)$" ).is_line_anchored_end()); |
3010 | |
3011 | assert!(t(r"^+" ).is_anchored_start()); |
3012 | assert!(t(r"$+" ).is_anchored_end()); |
3013 | assert!(t(r"^+" ).is_line_anchored_start()); |
3014 | assert!(t(r"$+" ).is_line_anchored_end()); |
3015 | assert!(t(r"^++" ).is_anchored_start()); |
3016 | assert!(t(r"$++" ).is_anchored_end()); |
3017 | assert!(t(r"^++" ).is_line_anchored_start()); |
3018 | assert!(t(r"$++" ).is_line_anchored_end()); |
3019 | assert!(t(r"(^)+" ).is_anchored_start()); |
3020 | assert!(t(r"($)+" ).is_anchored_end()); |
3021 | assert!(t(r"(^)+" ).is_line_anchored_start()); |
3022 | assert!(t(r"($)+" ).is_line_anchored_end()); |
3023 | |
3024 | assert!(t(r"$^" ).is_anchored_start()); |
3025 | assert!(t(r"$^" ).is_anchored_start()); |
3026 | assert!(t(r"$^" ).is_line_anchored_end()); |
3027 | assert!(t(r"$^" ).is_line_anchored_end()); |
3028 | assert!(t(r"$^|^$" ).is_anchored_start()); |
3029 | assert!(t(r"$^|^$" ).is_anchored_end()); |
3030 | assert!(t(r"$^|^$" ).is_line_anchored_start()); |
3031 | assert!(t(r"$^|^$" ).is_line_anchored_end()); |
3032 | |
3033 | assert!(t(r"\b^" ).is_anchored_start()); |
3034 | assert!(t(r"$\b" ).is_anchored_end()); |
3035 | assert!(t(r"\b^" ).is_line_anchored_start()); |
3036 | assert!(t(r"$\b" ).is_line_anchored_end()); |
3037 | assert!(t(r"^(?m:^)" ).is_anchored_start()); |
3038 | assert!(t(r"(?m:$)$" ).is_anchored_end()); |
3039 | assert!(t(r"^(?m:^)" ).is_line_anchored_start()); |
3040 | assert!(t(r"(?m:$)$" ).is_line_anchored_end()); |
3041 | assert!(t(r"(?m:^)^" ).is_anchored_start()); |
3042 | assert!(t(r"$(?m:$)" ).is_anchored_end()); |
3043 | assert!(t(r"(?m:^)^" ).is_line_anchored_start()); |
3044 | assert!(t(r"$(?m:$)" ).is_line_anchored_end()); |
3045 | |
3046 | // Negative examples. |
3047 | assert!(!t(r"(?m)^" ).is_anchored_start()); |
3048 | assert!(!t(r"(?m)$" ).is_anchored_end()); |
3049 | assert!(!t(r"(?m:^$)|$^" ).is_anchored_start()); |
3050 | assert!(!t(r"(?m:^$)|$^" ).is_anchored_end()); |
3051 | assert!(!t(r"$^|(?m:^$)" ).is_anchored_start()); |
3052 | assert!(!t(r"$^|(?m:^$)" ).is_anchored_end()); |
3053 | |
3054 | assert!(!t(r"a^" ).is_anchored_start()); |
3055 | assert!(!t(r"$a" ).is_anchored_start()); |
3056 | assert!(!t(r"a^" ).is_line_anchored_start()); |
3057 | assert!(!t(r"$a" ).is_line_anchored_start()); |
3058 | |
3059 | assert!(!t(r"a^" ).is_anchored_end()); |
3060 | assert!(!t(r"$a" ).is_anchored_end()); |
3061 | assert!(!t(r"a^" ).is_line_anchored_end()); |
3062 | assert!(!t(r"$a" ).is_line_anchored_end()); |
3063 | |
3064 | assert!(!t(r"^foo|bar" ).is_anchored_start()); |
3065 | assert!(!t(r"foo|bar$" ).is_anchored_end()); |
3066 | assert!(!t(r"^foo|bar" ).is_line_anchored_start()); |
3067 | assert!(!t(r"foo|bar$" ).is_line_anchored_end()); |
3068 | |
3069 | assert!(!t(r"^*" ).is_anchored_start()); |
3070 | assert!(!t(r"$*" ).is_anchored_end()); |
3071 | assert!(!t(r"^*" ).is_line_anchored_start()); |
3072 | assert!(!t(r"$*" ).is_line_anchored_end()); |
3073 | assert!(!t(r"^*+" ).is_anchored_start()); |
3074 | assert!(!t(r"$*+" ).is_anchored_end()); |
3075 | assert!(!t(r"^*+" ).is_line_anchored_start()); |
3076 | assert!(!t(r"$*+" ).is_line_anchored_end()); |
3077 | assert!(!t(r"^+*" ).is_anchored_start()); |
3078 | assert!(!t(r"$+*" ).is_anchored_end()); |
3079 | assert!(!t(r"^+*" ).is_line_anchored_start()); |
3080 | assert!(!t(r"$+*" ).is_line_anchored_end()); |
3081 | assert!(!t(r"(^)*" ).is_anchored_start()); |
3082 | assert!(!t(r"($)*" ).is_anchored_end()); |
3083 | assert!(!t(r"(^)*" ).is_line_anchored_start()); |
3084 | assert!(!t(r"($)*" ).is_line_anchored_end()); |
3085 | } |
3086 | |
3087 | #[test ] |
3088 | fn analysis_is_line_anchored() { |
3089 | assert!(t(r"(?m)^(foo|bar)" ).is_line_anchored_start()); |
3090 | assert!(t(r"(?m)(foo|bar)$" ).is_line_anchored_end()); |
3091 | |
3092 | assert!(t(r"(?m)^foo|^bar" ).is_line_anchored_start()); |
3093 | assert!(t(r"(?m)foo$|bar$" ).is_line_anchored_end()); |
3094 | |
3095 | assert!(t(r"(?m)^" ).is_line_anchored_start()); |
3096 | assert!(t(r"(?m)$" ).is_line_anchored_end()); |
3097 | |
3098 | assert!(t(r"(?m:^$)|$^" ).is_line_anchored_start()); |
3099 | assert!(t(r"(?m:^$)|$^" ).is_line_anchored_end()); |
3100 | |
3101 | assert!(t(r"$^|(?m:^$)" ).is_line_anchored_start()); |
3102 | assert!(t(r"$^|(?m:^$)" ).is_line_anchored_end()); |
3103 | } |
3104 | |
3105 | #[test ] |
3106 | fn analysis_is_any_anchored() { |
3107 | // Positive examples. |
3108 | assert!(t(r"^" ).is_any_anchored_start()); |
3109 | assert!(t(r"$" ).is_any_anchored_end()); |
3110 | assert!(t(r"\A" ).is_any_anchored_start()); |
3111 | assert!(t(r"\z" ).is_any_anchored_end()); |
3112 | |
3113 | // Negative examples. |
3114 | assert!(!t(r"(?m)^" ).is_any_anchored_start()); |
3115 | assert!(!t(r"(?m)$" ).is_any_anchored_end()); |
3116 | assert!(!t(r"$" ).is_any_anchored_start()); |
3117 | assert!(!t(r"^" ).is_any_anchored_end()); |
3118 | } |
3119 | |
3120 | #[test ] |
3121 | fn analysis_is_match_empty() { |
3122 | // Positive examples. |
3123 | assert!(t(r"" ).is_match_empty()); |
3124 | assert!(t(r"()" ).is_match_empty()); |
3125 | assert!(t(r"()*" ).is_match_empty()); |
3126 | assert!(t(r"()+" ).is_match_empty()); |
3127 | assert!(t(r"()?" ).is_match_empty()); |
3128 | assert!(t(r"a*" ).is_match_empty()); |
3129 | assert!(t(r"a?" ).is_match_empty()); |
3130 | assert!(t(r"a{0}" ).is_match_empty()); |
3131 | assert!(t(r"a{0,}" ).is_match_empty()); |
3132 | assert!(t(r"a{0,1}" ).is_match_empty()); |
3133 | assert!(t(r"a{0,10}" ).is_match_empty()); |
3134 | #[cfg (feature = "unicode-gencat" )] |
3135 | assert!(t(r"\pL*" ).is_match_empty()); |
3136 | assert!(t(r"a*|b" ).is_match_empty()); |
3137 | assert!(t(r"b|a*" ).is_match_empty()); |
3138 | assert!(t(r"a|" ).is_match_empty()); |
3139 | assert!(t(r"|a" ).is_match_empty()); |
3140 | assert!(t(r"a||b" ).is_match_empty()); |
3141 | assert!(t(r"a*a?(abcd)*" ).is_match_empty()); |
3142 | assert!(t(r"^" ).is_match_empty()); |
3143 | assert!(t(r"$" ).is_match_empty()); |
3144 | assert!(t(r"(?m)^" ).is_match_empty()); |
3145 | assert!(t(r"(?m)$" ).is_match_empty()); |
3146 | assert!(t(r"\A" ).is_match_empty()); |
3147 | assert!(t(r"\z" ).is_match_empty()); |
3148 | assert!(t(r"\B" ).is_match_empty()); |
3149 | assert!(t_bytes(r"(?-u)\B" ).is_match_empty()); |
3150 | assert!(t(r"\b" ).is_match_empty()); |
3151 | assert!(t(r"(?-u)\b" ).is_match_empty()); |
3152 | |
3153 | // Negative examples. |
3154 | assert!(!t(r"a+" ).is_match_empty()); |
3155 | assert!(!t(r"a{1}" ).is_match_empty()); |
3156 | assert!(!t(r"a{1,}" ).is_match_empty()); |
3157 | assert!(!t(r"a{1,2}" ).is_match_empty()); |
3158 | assert!(!t(r"a{1,10}" ).is_match_empty()); |
3159 | assert!(!t(r"b|a" ).is_match_empty()); |
3160 | assert!(!t(r"a*a+(abcd)*" ).is_match_empty()); |
3161 | } |
3162 | |
3163 | #[test ] |
3164 | fn analysis_is_literal() { |
3165 | // Positive examples. |
3166 | assert!(t(r"a" ).is_literal()); |
3167 | assert!(t(r"ab" ).is_literal()); |
3168 | assert!(t(r"abc" ).is_literal()); |
3169 | assert!(t(r"(?m)abc" ).is_literal()); |
3170 | |
3171 | // Negative examples. |
3172 | assert!(!t(r"" ).is_literal()); |
3173 | assert!(!t(r"^" ).is_literal()); |
3174 | assert!(!t(r"a|b" ).is_literal()); |
3175 | assert!(!t(r"(a)" ).is_literal()); |
3176 | assert!(!t(r"a+" ).is_literal()); |
3177 | assert!(!t(r"foo(a)" ).is_literal()); |
3178 | assert!(!t(r"(a)foo" ).is_literal()); |
3179 | assert!(!t(r"[a]" ).is_literal()); |
3180 | } |
3181 | |
3182 | #[test ] |
3183 | fn analysis_is_alternation_literal() { |
3184 | // Positive examples. |
3185 | assert!(t(r"a" ).is_alternation_literal()); |
3186 | assert!(t(r"ab" ).is_alternation_literal()); |
3187 | assert!(t(r"abc" ).is_alternation_literal()); |
3188 | assert!(t(r"(?m)abc" ).is_alternation_literal()); |
3189 | assert!(t(r"a|b" ).is_alternation_literal()); |
3190 | assert!(t(r"a|b|c" ).is_alternation_literal()); |
3191 | assert!(t(r"foo|bar" ).is_alternation_literal()); |
3192 | assert!(t(r"foo|bar|baz" ).is_alternation_literal()); |
3193 | |
3194 | // Negative examples. |
3195 | assert!(!t(r"" ).is_alternation_literal()); |
3196 | assert!(!t(r"^" ).is_alternation_literal()); |
3197 | assert!(!t(r"(a)" ).is_alternation_literal()); |
3198 | assert!(!t(r"a+" ).is_alternation_literal()); |
3199 | assert!(!t(r"foo(a)" ).is_alternation_literal()); |
3200 | assert!(!t(r"(a)foo" ).is_alternation_literal()); |
3201 | assert!(!t(r"[a]" ).is_alternation_literal()); |
3202 | assert!(!t(r"[a]|b" ).is_alternation_literal()); |
3203 | assert!(!t(r"a|[b]" ).is_alternation_literal()); |
3204 | assert!(!t(r"(a)|b" ).is_alternation_literal()); |
3205 | assert!(!t(r"a|(b)" ).is_alternation_literal()); |
3206 | } |
3207 | } |
3208 | |