1/*!
2This module provides a regular expression printer for `Ast`.
3*/
4
5use core::fmt;
6
7use crate::ast::{
8 self,
9 visitor::{self, Visitor},
10 Ast,
11};
12
13/// A builder for constructing a printer.
14///
15/// Note that since a printer doesn't have any configuration knobs, this type
16/// remains unexported.
17#[derive(Clone, Debug)]
18struct PrinterBuilder {
19 _priv: (),
20}
21
22impl Default for PrinterBuilder {
23 fn default() -> PrinterBuilder {
24 PrinterBuilder::new()
25 }
26}
27
28impl PrinterBuilder {
29 fn new() -> PrinterBuilder {
30 PrinterBuilder { _priv: () }
31 }
32
33 fn build(&self) -> Printer {
34 Printer { _priv: () }
35 }
36}
37
38/// A printer for a regular expression abstract syntax tree.
39///
40/// A printer converts an abstract syntax tree (AST) to a regular expression
41/// pattern string. This particular printer uses constant stack space and heap
42/// space proportional to the size of the AST.
43///
44/// This printer will not necessarily preserve the original formatting of the
45/// regular expression pattern string. For example, all whitespace and comments
46/// are ignored.
47#[derive(Debug)]
48pub struct Printer {
49 _priv: (),
50}
51
52impl Printer {
53 /// Create a new printer.
54 pub fn new() -> Printer {
55 PrinterBuilder::new().build()
56 }
57
58 /// Print the given `Ast` to the given writer. The writer must implement
59 /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
60 /// here are a `fmt::Formatter` (which is available in `fmt::Display`
61 /// implementations) or a `&mut String`.
62 pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
63 visitor::visit(ast, visitor:Writer { wtr })
64 }
65}
66
67#[derive(Debug)]
68struct Writer<W> {
69 wtr: W,
70}
71
72impl<W: fmt::Write> Visitor for Writer<W> {
73 type Output = ();
74 type Err = fmt::Error;
75
76 fn finish(self) -> fmt::Result {
77 Ok(())
78 }
79
80 fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
81 match *ast {
82 Ast::Group(ref x) => self.fmt_group_pre(x),
83 Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_pre(x),
84 _ => Ok(()),
85 }
86 }
87
88 fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
89 match *ast {
90 Ast::Empty(_) => Ok(()),
91 Ast::Flags(ref x) => self.fmt_set_flags(x),
92 Ast::Literal(ref x) => self.fmt_literal(x),
93 Ast::Dot(_) => self.wtr.write_str("."),
94 Ast::Assertion(ref x) => self.fmt_assertion(x),
95 Ast::ClassPerl(ref x) => self.fmt_class_perl(x),
96 Ast::ClassUnicode(ref x) => self.fmt_class_unicode(x),
97 Ast::ClassBracketed(ref x) => self.fmt_class_bracketed_post(x),
98 Ast::Repetition(ref x) => self.fmt_repetition(x),
99 Ast::Group(ref x) => self.fmt_group_post(x),
100 Ast::Alternation(_) => Ok(()),
101 Ast::Concat(_) => Ok(()),
102 }
103 }
104
105 fn visit_alternation_in(&mut self) -> fmt::Result {
106 self.wtr.write_str("|")
107 }
108
109 fn visit_class_set_item_pre(
110 &mut self,
111 ast: &ast::ClassSetItem,
112 ) -> Result<(), Self::Err> {
113 match *ast {
114 ast::ClassSetItem::Bracketed(ref x) => {
115 self.fmt_class_bracketed_pre(x)
116 }
117 _ => Ok(()),
118 }
119 }
120
121 fn visit_class_set_item_post(
122 &mut self,
123 ast: &ast::ClassSetItem,
124 ) -> Result<(), Self::Err> {
125 use crate::ast::ClassSetItem::*;
126
127 match *ast {
128 Empty(_) => Ok(()),
129 Literal(ref x) => self.fmt_literal(x),
130 Range(ref x) => {
131 self.fmt_literal(&x.start)?;
132 self.wtr.write_str("-")?;
133 self.fmt_literal(&x.end)?;
134 Ok(())
135 }
136 Ascii(ref x) => self.fmt_class_ascii(x),
137 Unicode(ref x) => self.fmt_class_unicode(x),
138 Perl(ref x) => self.fmt_class_perl(x),
139 Bracketed(ref x) => self.fmt_class_bracketed_post(x),
140 Union(_) => Ok(()),
141 }
142 }
143
144 fn visit_class_set_binary_op_in(
145 &mut self,
146 ast: &ast::ClassSetBinaryOp,
147 ) -> Result<(), Self::Err> {
148 self.fmt_class_set_binary_op_kind(&ast.kind)
149 }
150}
151
152impl<W: fmt::Write> Writer<W> {
153 fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
154 use crate::ast::GroupKind::*;
155 match ast.kind {
156 CaptureIndex(_) => self.wtr.write_str("("),
157 CaptureName { ref name, starts_with_p } => {
158 let start = if starts_with_p { "(?P<" } else { "(?<" };
159 self.wtr.write_str(start)?;
160 self.wtr.write_str(&name.name)?;
161 self.wtr.write_str(">")?;
162 Ok(())
163 }
164 NonCapturing(ref flags) => {
165 self.wtr.write_str("(?")?;
166 self.fmt_flags(flags)?;
167 self.wtr.write_str(":")?;
168 Ok(())
169 }
170 }
171 }
172
173 fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
174 self.wtr.write_str(")")
175 }
176
177 fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
178 use crate::ast::RepetitionKind::*;
179 match ast.op.kind {
180 ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
181 ZeroOrOne => self.wtr.write_str("??"),
182 ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
183 ZeroOrMore => self.wtr.write_str("*?"),
184 OneOrMore if ast.greedy => self.wtr.write_str("+"),
185 OneOrMore => self.wtr.write_str("+?"),
186 Range(ref x) => {
187 self.fmt_repetition_range(x)?;
188 if !ast.greedy {
189 self.wtr.write_str("?")?;
190 }
191 Ok(())
192 }
193 }
194 }
195
196 fn fmt_repetition_range(
197 &mut self,
198 ast: &ast::RepetitionRange,
199 ) -> fmt::Result {
200 use crate::ast::RepetitionRange::*;
201 match *ast {
202 Exactly(x) => write!(self.wtr, "{{{}}}", x),
203 AtLeast(x) => write!(self.wtr, "{{{},}}", x),
204 Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
205 }
206 }
207
208 fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
209 use crate::ast::LiteralKind::*;
210
211 match ast.kind {
212 Verbatim => self.wtr.write_char(ast.c),
213 Meta | Superfluous => write!(self.wtr, r"\{}", ast.c),
214 Octal => write!(self.wtr, r"\{:o}", u32::from(ast.c)),
215 HexFixed(ast::HexLiteralKind::X) => {
216 write!(self.wtr, r"\x{:02X}", u32::from(ast.c))
217 }
218 HexFixed(ast::HexLiteralKind::UnicodeShort) => {
219 write!(self.wtr, r"\u{:04X}", u32::from(ast.c))
220 }
221 HexFixed(ast::HexLiteralKind::UnicodeLong) => {
222 write!(self.wtr, r"\U{:08X}", u32::from(ast.c))
223 }
224 HexBrace(ast::HexLiteralKind::X) => {
225 write!(self.wtr, r"\x{{{:X}}}", u32::from(ast.c))
226 }
227 HexBrace(ast::HexLiteralKind::UnicodeShort) => {
228 write!(self.wtr, r"\u{{{:X}}}", u32::from(ast.c))
229 }
230 HexBrace(ast::HexLiteralKind::UnicodeLong) => {
231 write!(self.wtr, r"\U{{{:X}}}", u32::from(ast.c))
232 }
233 Special(ast::SpecialLiteralKind::Bell) => {
234 self.wtr.write_str(r"\a")
235 }
236 Special(ast::SpecialLiteralKind::FormFeed) => {
237 self.wtr.write_str(r"\f")
238 }
239 Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
240 Special(ast::SpecialLiteralKind::LineFeed) => {
241 self.wtr.write_str(r"\n")
242 }
243 Special(ast::SpecialLiteralKind::CarriageReturn) => {
244 self.wtr.write_str(r"\r")
245 }
246 Special(ast::SpecialLiteralKind::VerticalTab) => {
247 self.wtr.write_str(r"\v")
248 }
249 Special(ast::SpecialLiteralKind::Space) => {
250 self.wtr.write_str(r"\ ")
251 }
252 }
253 }
254
255 fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
256 use crate::ast::AssertionKind::*;
257 match ast.kind {
258 StartLine => self.wtr.write_str("^"),
259 EndLine => self.wtr.write_str("$"),
260 StartText => self.wtr.write_str(r"\A"),
261 EndText => self.wtr.write_str(r"\z"),
262 WordBoundary => self.wtr.write_str(r"\b"),
263 NotWordBoundary => self.wtr.write_str(r"\B"),
264 WordBoundaryStart => self.wtr.write_str(r"\b{start}"),
265 WordBoundaryEnd => self.wtr.write_str(r"\b{end}"),
266 WordBoundaryStartAngle => self.wtr.write_str(r"\<"),
267 WordBoundaryEndAngle => self.wtr.write_str(r"\>"),
268 WordBoundaryStartHalf => self.wtr.write_str(r"\b{start-half}"),
269 WordBoundaryEndHalf => self.wtr.write_str(r"\b{end-half}"),
270 }
271 }
272
273 fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
274 self.wtr.write_str("(?")?;
275 self.fmt_flags(&ast.flags)?;
276 self.wtr.write_str(")")?;
277 Ok(())
278 }
279
280 fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
281 use crate::ast::{Flag, FlagsItemKind};
282
283 for item in &ast.items {
284 match item.kind {
285 FlagsItemKind::Negation => self.wtr.write_str("-"),
286 FlagsItemKind::Flag(ref flag) => match *flag {
287 Flag::CaseInsensitive => self.wtr.write_str("i"),
288 Flag::MultiLine => self.wtr.write_str("m"),
289 Flag::DotMatchesNewLine => self.wtr.write_str("s"),
290 Flag::SwapGreed => self.wtr.write_str("U"),
291 Flag::Unicode => self.wtr.write_str("u"),
292 Flag::CRLF => self.wtr.write_str("R"),
293 Flag::IgnoreWhitespace => self.wtr.write_str("x"),
294 },
295 }?;
296 }
297 Ok(())
298 }
299
300 fn fmt_class_bracketed_pre(
301 &mut self,
302 ast: &ast::ClassBracketed,
303 ) -> fmt::Result {
304 if ast.negated {
305 self.wtr.write_str("[^")
306 } else {
307 self.wtr.write_str("[")
308 }
309 }
310
311 fn fmt_class_bracketed_post(
312 &mut self,
313 _ast: &ast::ClassBracketed,
314 ) -> fmt::Result {
315 self.wtr.write_str("]")
316 }
317
318 fn fmt_class_set_binary_op_kind(
319 &mut self,
320 ast: &ast::ClassSetBinaryOpKind,
321 ) -> fmt::Result {
322 use crate::ast::ClassSetBinaryOpKind::*;
323 match *ast {
324 Intersection => self.wtr.write_str("&&"),
325 Difference => self.wtr.write_str("--"),
326 SymmetricDifference => self.wtr.write_str("~~"),
327 }
328 }
329
330 fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
331 use crate::ast::ClassPerlKind::*;
332 match ast.kind {
333 Digit if ast.negated => self.wtr.write_str(r"\D"),
334 Digit => self.wtr.write_str(r"\d"),
335 Space if ast.negated => self.wtr.write_str(r"\S"),
336 Space => self.wtr.write_str(r"\s"),
337 Word if ast.negated => self.wtr.write_str(r"\W"),
338 Word => self.wtr.write_str(r"\w"),
339 }
340 }
341
342 fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
343 use crate::ast::ClassAsciiKind::*;
344 match ast.kind {
345 Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
346 Alnum => self.wtr.write_str("[:alnum:]"),
347 Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
348 Alpha => self.wtr.write_str("[:alpha:]"),
349 Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
350 Ascii => self.wtr.write_str("[:ascii:]"),
351 Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
352 Blank => self.wtr.write_str("[:blank:]"),
353 Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
354 Cntrl => self.wtr.write_str("[:cntrl:]"),
355 Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
356 Digit => self.wtr.write_str("[:digit:]"),
357 Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
358 Graph => self.wtr.write_str("[:graph:]"),
359 Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
360 Lower => self.wtr.write_str("[:lower:]"),
361 Print if ast.negated => self.wtr.write_str("[:^print:]"),
362 Print => self.wtr.write_str("[:print:]"),
363 Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
364 Punct => self.wtr.write_str("[:punct:]"),
365 Space if ast.negated => self.wtr.write_str("[:^space:]"),
366 Space => self.wtr.write_str("[:space:]"),
367 Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
368 Upper => self.wtr.write_str("[:upper:]"),
369 Word if ast.negated => self.wtr.write_str("[:^word:]"),
370 Word => self.wtr.write_str("[:word:]"),
371 Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
372 Xdigit => self.wtr.write_str("[:xdigit:]"),
373 }
374 }
375
376 fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
377 use crate::ast::ClassUnicodeKind::*;
378 use crate::ast::ClassUnicodeOpKind::*;
379
380 if ast.negated {
381 self.wtr.write_str(r"\P")?;
382 } else {
383 self.wtr.write_str(r"\p")?;
384 }
385 match ast.kind {
386 OneLetter(c) => self.wtr.write_char(c),
387 Named(ref x) => write!(self.wtr, "{{{}}}", x),
388 NamedValue { op: Equal, ref name, ref value } => {
389 write!(self.wtr, "{{{}={}}}", name, value)
390 }
391 NamedValue { op: Colon, ref name, ref value } => {
392 write!(self.wtr, "{{{}:{}}}", name, value)
393 }
394 NamedValue { op: NotEqual, ref name, ref value } => {
395 write!(self.wtr, "{{{}!={}}}", name, value)
396 }
397 }
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use alloc::string::String;
404
405 use crate::ast::parse::ParserBuilder;
406
407 use super::*;
408
409 fn roundtrip(given: &str) {
410 roundtrip_with(|b| b, given);
411 }
412
413 fn roundtrip_with<F>(mut f: F, given: &str)
414 where
415 F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
416 {
417 let mut builder = ParserBuilder::new();
418 f(&mut builder);
419 let ast = builder.build().parse(given).unwrap();
420
421 let mut printer = Printer::new();
422 let mut dst = String::new();
423 printer.print(&ast, &mut dst).unwrap();
424 assert_eq!(given, dst);
425 }
426
427 #[test]
428 fn print_literal() {
429 roundtrip("a");
430 roundtrip(r"\[");
431 roundtrip_with(|b| b.octal(true), r"\141");
432 roundtrip(r"\x61");
433 roundtrip(r"\x7F");
434 roundtrip(r"\u0061");
435 roundtrip(r"\U00000061");
436 roundtrip(r"\x{61}");
437 roundtrip(r"\x{7F}");
438 roundtrip(r"\u{61}");
439 roundtrip(r"\U{61}");
440
441 roundtrip(r"\a");
442 roundtrip(r"\f");
443 roundtrip(r"\t");
444 roundtrip(r"\n");
445 roundtrip(r"\r");
446 roundtrip(r"\v");
447 roundtrip(r"(?x)\ ");
448 }
449
450 #[test]
451 fn print_dot() {
452 roundtrip(".");
453 }
454
455 #[test]
456 fn print_concat() {
457 roundtrip("ab");
458 roundtrip("abcde");
459 roundtrip("a(bcd)ef");
460 }
461
462 #[test]
463 fn print_alternation() {
464 roundtrip("a|b");
465 roundtrip("a|b|c|d|e");
466 roundtrip("|a|b|c|d|e");
467 roundtrip("|a|b|c|d|e|");
468 roundtrip("a(b|c|d)|e|f");
469 }
470
471 #[test]
472 fn print_assertion() {
473 roundtrip(r"^");
474 roundtrip(r"$");
475 roundtrip(r"\A");
476 roundtrip(r"\z");
477 roundtrip(r"\b");
478 roundtrip(r"\B");
479 }
480
481 #[test]
482 fn print_repetition() {
483 roundtrip("a?");
484 roundtrip("a??");
485 roundtrip("a*");
486 roundtrip("a*?");
487 roundtrip("a+");
488 roundtrip("a+?");
489 roundtrip("a{5}");
490 roundtrip("a{5}?");
491 roundtrip("a{5,}");
492 roundtrip("a{5,}?");
493 roundtrip("a{5,10}");
494 roundtrip("a{5,10}?");
495 }
496
497 #[test]
498 fn print_flags() {
499 roundtrip("(?i)");
500 roundtrip("(?-i)");
501 roundtrip("(?s-i)");
502 roundtrip("(?-si)");
503 roundtrip("(?siUmux)");
504 }
505
506 #[test]
507 fn print_group() {
508 roundtrip("(?i:a)");
509 roundtrip("(?P<foo>a)");
510 roundtrip("(?<foo>a)");
511 roundtrip("(a)");
512 }
513
514 #[test]
515 fn print_class() {
516 roundtrip(r"[abc]");
517 roundtrip(r"[a-z]");
518 roundtrip(r"[^a-z]");
519 roundtrip(r"[a-z0-9]");
520 roundtrip(r"[-a-z0-9]");
521 roundtrip(r"[-a-z0-9]");
522 roundtrip(r"[a-z0-9---]");
523 roundtrip(r"[a-z&&m-n]");
524 roundtrip(r"[[a-z&&m-n]]");
525 roundtrip(r"[a-z--m-n]");
526 roundtrip(r"[a-z~~m-n]");
527 roundtrip(r"[a-z[0-9]]");
528 roundtrip(r"[a-z[^0-9]]");
529
530 roundtrip(r"\d");
531 roundtrip(r"\D");
532 roundtrip(r"\s");
533 roundtrip(r"\S");
534 roundtrip(r"\w");
535 roundtrip(r"\W");
536
537 roundtrip(r"[[:alnum:]]");
538 roundtrip(r"[[:^alnum:]]");
539 roundtrip(r"[[:alpha:]]");
540 roundtrip(r"[[:^alpha:]]");
541 roundtrip(r"[[:ascii:]]");
542 roundtrip(r"[[:^ascii:]]");
543 roundtrip(r"[[:blank:]]");
544 roundtrip(r"[[:^blank:]]");
545 roundtrip(r"[[:cntrl:]]");
546 roundtrip(r"[[:^cntrl:]]");
547 roundtrip(r"[[:digit:]]");
548 roundtrip(r"[[:^digit:]]");
549 roundtrip(r"[[:graph:]]");
550 roundtrip(r"[[:^graph:]]");
551 roundtrip(r"[[:lower:]]");
552 roundtrip(r"[[:^lower:]]");
553 roundtrip(r"[[:print:]]");
554 roundtrip(r"[[:^print:]]");
555 roundtrip(r"[[:punct:]]");
556 roundtrip(r"[[:^punct:]]");
557 roundtrip(r"[[:space:]]");
558 roundtrip(r"[[:^space:]]");
559 roundtrip(r"[[:upper:]]");
560 roundtrip(r"[[:^upper:]]");
561 roundtrip(r"[[:word:]]");
562 roundtrip(r"[[:^word:]]");
563 roundtrip(r"[[:xdigit:]]");
564 roundtrip(r"[[:^xdigit:]]");
565
566 roundtrip(r"\pL");
567 roundtrip(r"\PL");
568 roundtrip(r"\p{L}");
569 roundtrip(r"\P{L}");
570 roundtrip(r"\p{X=Y}");
571 roundtrip(r"\P{X=Y}");
572 roundtrip(r"\p{X:Y}");
573 roundtrip(r"\P{X:Y}");
574 roundtrip(r"\p{X!=Y}");
575 roundtrip(r"\P{X!=Y}");
576 }
577}
578