1/*!
2This module provides a regular expression printer for `Ast`.
3*/
4
5use std::fmt;
6
7use crate::ast::visitor::{self, Visitor};
8use crate::ast::{self, Ast};
9
10/// A builder for constructing a printer.
11///
12/// Note that since a printer doesn't have any configuration knobs, this type
13/// remains unexported.
14#[derive(Clone, Debug)]
15struct PrinterBuilder {
16 _priv: (),
17}
18
19impl Default for PrinterBuilder {
20 fn default() -> PrinterBuilder {
21 PrinterBuilder::new()
22 }
23}
24
25impl PrinterBuilder {
26 fn new() -> PrinterBuilder {
27 PrinterBuilder { _priv: () }
28 }
29
30 fn build(&self) -> Printer {
31 Printer { _priv: () }
32 }
33}
34
35/// A printer for a regular expression abstract syntax tree.
36///
37/// A printer converts an abstract syntax tree (AST) to a regular expression
38/// pattern string. This particular printer uses constant stack space and heap
39/// space proportional to the size of the AST.
40///
41/// This printer will not necessarily preserve the original formatting of the
42/// regular expression pattern string. For example, all whitespace and comments
43/// are ignored.
44#[derive(Debug)]
45pub struct Printer {
46 _priv: (),
47}
48
49impl Printer {
50 /// Create a new printer.
51 pub fn new() -> Printer {
52 PrinterBuilder::new().build()
53 }
54
55 /// Print the given `Ast` to the given writer. The writer must implement
56 /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57 /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58 /// implementations) or a `&mut String`.
59 pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60 visitor::visit(ast, Writer { wtr })
61 }
62}
63
64#[derive(Debug)]
65struct Writer<W> {
66 wtr: W,
67}
68
69impl<W: fmt::Write> Visitor for Writer<W> {
70 type Output = ();
71 type Err = fmt::Error;
72
73 fn finish(self) -> fmt::Result {
74 Ok(())
75 }
76
77 fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
78 match *ast {
79 Ast::Group(ref x) => self.fmt_group_pre(x),
80 Ast::Class(ast::Class::Bracketed(ref x)) => {
81 self.fmt_class_bracketed_pre(x)
82 }
83 _ => Ok(()),
84 }
85 }
86
87 fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
88 use crate::ast::Class;
89
90 match *ast {
91 Ast::Empty(_) => Ok(()),
92 Ast::Flags(ref x) => self.fmt_set_flags(x),
93 Ast::Literal(ref x) => self.fmt_literal(x),
94 Ast::Dot(_) => self.wtr.write_str("."),
95 Ast::Assertion(ref x) => self.fmt_assertion(x),
96 Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
97 Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
98 Ast::Class(Class::Bracketed(ref x)) => {
99 self.fmt_class_bracketed_post(x)
100 }
101 Ast::Repetition(ref x) => self.fmt_repetition(x),
102 Ast::Group(ref x) => self.fmt_group_post(x),
103 Ast::Alternation(_) => Ok(()),
104 Ast::Concat(_) => Ok(()),
105 }
106 }
107
108 fn visit_alternation_in(&mut self) -> fmt::Result {
109 self.wtr.write_str("|")
110 }
111
112 fn visit_class_set_item_pre(
113 &mut self,
114 ast: &ast::ClassSetItem,
115 ) -> Result<(), Self::Err> {
116 match *ast {
117 ast::ClassSetItem::Bracketed(ref x) => {
118 self.fmt_class_bracketed_pre(x)
119 }
120 _ => Ok(()),
121 }
122 }
123
124 fn visit_class_set_item_post(
125 &mut self,
126 ast: &ast::ClassSetItem,
127 ) -> Result<(), Self::Err> {
128 use crate::ast::ClassSetItem::*;
129
130 match *ast {
131 Empty(_) => Ok(()),
132 Literal(ref x) => self.fmt_literal(x),
133 Range(ref x) => {
134 self.fmt_literal(&x.start)?;
135 self.wtr.write_str("-")?;
136 self.fmt_literal(&x.end)?;
137 Ok(())
138 }
139 Ascii(ref x) => self.fmt_class_ascii(x),
140 Unicode(ref x) => self.fmt_class_unicode(x),
141 Perl(ref x) => self.fmt_class_perl(x),
142 Bracketed(ref x) => self.fmt_class_bracketed_post(x),
143 Union(_) => Ok(()),
144 }
145 }
146
147 fn visit_class_set_binary_op_in(
148 &mut self,
149 ast: &ast::ClassSetBinaryOp,
150 ) -> Result<(), Self::Err> {
151 self.fmt_class_set_binary_op_kind(&ast.kind)
152 }
153}
154
155impl<W: fmt::Write> Writer<W> {
156 fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
157 use crate::ast::GroupKind::*;
158 match ast.kind {
159 CaptureIndex(_) => self.wtr.write_str("("),
160 CaptureName(ref x) => {
161 self.wtr.write_str("(?P<")?;
162 self.wtr.write_str(&x.name)?;
163 self.wtr.write_str(">")?;
164 Ok(())
165 }
166 NonCapturing(ref flags) => {
167 self.wtr.write_str("(?")?;
168 self.fmt_flags(flags)?;
169 self.wtr.write_str(":")?;
170 Ok(())
171 }
172 }
173 }
174
175 fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
176 self.wtr.write_str(")")
177 }
178
179 fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
180 use crate::ast::RepetitionKind::*;
181 match ast.op.kind {
182 ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
183 ZeroOrOne => self.wtr.write_str("??"),
184 ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
185 ZeroOrMore => self.wtr.write_str("*?"),
186 OneOrMore if ast.greedy => self.wtr.write_str("+"),
187 OneOrMore => self.wtr.write_str("+?"),
188 Range(ref x) => {
189 self.fmt_repetition_range(x)?;
190 if !ast.greedy {
191 self.wtr.write_str("?")?;
192 }
193 Ok(())
194 }
195 }
196 }
197
198 fn fmt_repetition_range(
199 &mut self,
200 ast: &ast::RepetitionRange,
201 ) -> fmt::Result {
202 use crate::ast::RepetitionRange::*;
203 match *ast {
204 Exactly(x) => write!(self.wtr, "{{{}}}", x),
205 AtLeast(x) => write!(self.wtr, "{{{},}}", x),
206 Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
207 }
208 }
209
210 fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
211 use crate::ast::LiteralKind::*;
212
213 match ast.kind {
214 Verbatim => self.wtr.write_char(ast.c),
215 Punctuation => write!(self.wtr, r"\{}", ast.c),
216 Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
217 HexFixed(ast::HexLiteralKind::X) => {
218 write!(self.wtr, r"\x{:02X}", ast.c as u32)
219 }
220 HexFixed(ast::HexLiteralKind::UnicodeShort) => {
221 write!(self.wtr, r"\u{:04X}", ast.c as u32)
222 }
223 HexFixed(ast::HexLiteralKind::UnicodeLong) => {
224 write!(self.wtr, r"\U{:08X}", ast.c as u32)
225 }
226 HexBrace(ast::HexLiteralKind::X) => {
227 write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
228 }
229 HexBrace(ast::HexLiteralKind::UnicodeShort) => {
230 write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
231 }
232 HexBrace(ast::HexLiteralKind::UnicodeLong) => {
233 write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
234 }
235 Special(ast::SpecialLiteralKind::Bell) => {
236 self.wtr.write_str(r"\a")
237 }
238 Special(ast::SpecialLiteralKind::FormFeed) => {
239 self.wtr.write_str(r"\f")
240 }
241 Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
242 Special(ast::SpecialLiteralKind::LineFeed) => {
243 self.wtr.write_str(r"\n")
244 }
245 Special(ast::SpecialLiteralKind::CarriageReturn) => {
246 self.wtr.write_str(r"\r")
247 }
248 Special(ast::SpecialLiteralKind::VerticalTab) => {
249 self.wtr.write_str(r"\v")
250 }
251 Special(ast::SpecialLiteralKind::Space) => {
252 self.wtr.write_str(r"\ ")
253 }
254 }
255 }
256
257 fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
258 use crate::ast::AssertionKind::*;
259 match ast.kind {
260 StartLine => self.wtr.write_str("^"),
261 EndLine => self.wtr.write_str("$"),
262 StartText => self.wtr.write_str(r"\A"),
263 EndText => self.wtr.write_str(r"\z"),
264 WordBoundary => self.wtr.write_str(r"\b"),
265 NotWordBoundary => self.wtr.write_str(r"\B"),
266 }
267 }
268
269 fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
270 self.wtr.write_str("(?")?;
271 self.fmt_flags(&ast.flags)?;
272 self.wtr.write_str(")")?;
273 Ok(())
274 }
275
276 fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
277 use crate::ast::{Flag, FlagsItemKind};
278
279 for item in &ast.items {
280 match item.kind {
281 FlagsItemKind::Negation => self.wtr.write_str("-"),
282 FlagsItemKind::Flag(ref flag) => match *flag {
283 Flag::CaseInsensitive => self.wtr.write_str("i"),
284 Flag::MultiLine => self.wtr.write_str("m"),
285 Flag::DotMatchesNewLine => self.wtr.write_str("s"),
286 Flag::SwapGreed => self.wtr.write_str("U"),
287 Flag::Unicode => self.wtr.write_str("u"),
288 Flag::IgnoreWhitespace => self.wtr.write_str("x"),
289 },
290 }?;
291 }
292 Ok(())
293 }
294
295 fn fmt_class_bracketed_pre(
296 &mut self,
297 ast: &ast::ClassBracketed,
298 ) -> fmt::Result {
299 if ast.negated {
300 self.wtr.write_str("[^")
301 } else {
302 self.wtr.write_str("[")
303 }
304 }
305
306 fn fmt_class_bracketed_post(
307 &mut self,
308 _ast: &ast::ClassBracketed,
309 ) -> fmt::Result {
310 self.wtr.write_str("]")
311 }
312
313 fn fmt_class_set_binary_op_kind(
314 &mut self,
315 ast: &ast::ClassSetBinaryOpKind,
316 ) -> fmt::Result {
317 use crate::ast::ClassSetBinaryOpKind::*;
318 match *ast {
319 Intersection => self.wtr.write_str("&&"),
320 Difference => self.wtr.write_str("--"),
321 SymmetricDifference => self.wtr.write_str("~~"),
322 }
323 }
324
325 fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
326 use crate::ast::ClassPerlKind::*;
327 match ast.kind {
328 Digit if ast.negated => self.wtr.write_str(r"\D"),
329 Digit => self.wtr.write_str(r"\d"),
330 Space if ast.negated => self.wtr.write_str(r"\S"),
331 Space => self.wtr.write_str(r"\s"),
332 Word if ast.negated => self.wtr.write_str(r"\W"),
333 Word => self.wtr.write_str(r"\w"),
334 }
335 }
336
337 fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
338 use crate::ast::ClassAsciiKind::*;
339 match ast.kind {
340 Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
341 Alnum => self.wtr.write_str("[:alnum:]"),
342 Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
343 Alpha => self.wtr.write_str("[:alpha:]"),
344 Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
345 Ascii => self.wtr.write_str("[:ascii:]"),
346 Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
347 Blank => self.wtr.write_str("[:blank:]"),
348 Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
349 Cntrl => self.wtr.write_str("[:cntrl:]"),
350 Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
351 Digit => self.wtr.write_str("[:digit:]"),
352 Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
353 Graph => self.wtr.write_str("[:graph:]"),
354 Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
355 Lower => self.wtr.write_str("[:lower:]"),
356 Print if ast.negated => self.wtr.write_str("[:^print:]"),
357 Print => self.wtr.write_str("[:print:]"),
358 Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
359 Punct => self.wtr.write_str("[:punct:]"),
360 Space if ast.negated => self.wtr.write_str("[:^space:]"),
361 Space => self.wtr.write_str("[:space:]"),
362 Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
363 Upper => self.wtr.write_str("[:upper:]"),
364 Word if ast.negated => self.wtr.write_str("[:^word:]"),
365 Word => self.wtr.write_str("[:word:]"),
366 Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
367 Xdigit => self.wtr.write_str("[:xdigit:]"),
368 }
369 }
370
371 fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
372 use crate::ast::ClassUnicodeKind::*;
373 use crate::ast::ClassUnicodeOpKind::*;
374
375 if ast.negated {
376 self.wtr.write_str(r"\P")?;
377 } else {
378 self.wtr.write_str(r"\p")?;
379 }
380 match ast.kind {
381 OneLetter(c) => self.wtr.write_char(c),
382 Named(ref x) => write!(self.wtr, "{{{}}}", x),
383 NamedValue { op: Equal, ref name, ref value } => {
384 write!(self.wtr, "{{{}={}}}", name, value)
385 }
386 NamedValue { op: Colon, ref name, ref value } => {
387 write!(self.wtr, "{{{}:{}}}", name, value)
388 }
389 NamedValue { op: NotEqual, ref name, ref value } => {
390 write!(self.wtr, "{{{}!={}}}", name, value)
391 }
392 }
393 }
394}
395
396#[cfg(test)]
397mod tests {
398 use super::Printer;
399 use crate::ast::parse::ParserBuilder;
400
401 fn roundtrip(given: &str) {
402 roundtrip_with(|b| b, given);
403 }
404
405 fn roundtrip_with<F>(mut f: F, given: &str)
406 where
407 F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
408 {
409 let mut builder = ParserBuilder::new();
410 f(&mut builder);
411 let ast = builder.build().parse(given).unwrap();
412
413 let mut printer = Printer::new();
414 let mut dst = String::new();
415 printer.print(&ast, &mut dst).unwrap();
416 assert_eq!(given, dst);
417 }
418
419 #[test]
420 fn print_literal() {
421 roundtrip("a");
422 roundtrip(r"\[");
423 roundtrip_with(|b| b.octal(true), r"\141");
424 roundtrip(r"\x61");
425 roundtrip(r"\x7F");
426 roundtrip(r"\u0061");
427 roundtrip(r"\U00000061");
428 roundtrip(r"\x{61}");
429 roundtrip(r"\x{7F}");
430 roundtrip(r"\u{61}");
431 roundtrip(r"\U{61}");
432
433 roundtrip(r"\a");
434 roundtrip(r"\f");
435 roundtrip(r"\t");
436 roundtrip(r"\n");
437 roundtrip(r"\r");
438 roundtrip(r"\v");
439 roundtrip(r"(?x)\ ");
440 }
441
442 #[test]
443 fn print_dot() {
444 roundtrip(".");
445 }
446
447 #[test]
448 fn print_concat() {
449 roundtrip("ab");
450 roundtrip("abcde");
451 roundtrip("a(bcd)ef");
452 }
453
454 #[test]
455 fn print_alternation() {
456 roundtrip("a|b");
457 roundtrip("a|b|c|d|e");
458 roundtrip("|a|b|c|d|e");
459 roundtrip("|a|b|c|d|e|");
460 roundtrip("a(b|c|d)|e|f");
461 }
462
463 #[test]
464 fn print_assertion() {
465 roundtrip(r"^");
466 roundtrip(r"$");
467 roundtrip(r"\A");
468 roundtrip(r"\z");
469 roundtrip(r"\b");
470 roundtrip(r"\B");
471 }
472
473 #[test]
474 fn print_repetition() {
475 roundtrip("a?");
476 roundtrip("a??");
477 roundtrip("a*");
478 roundtrip("a*?");
479 roundtrip("a+");
480 roundtrip("a+?");
481 roundtrip("a{5}");
482 roundtrip("a{5}?");
483 roundtrip("a{5,}");
484 roundtrip("a{5,}?");
485 roundtrip("a{5,10}");
486 roundtrip("a{5,10}?");
487 }
488
489 #[test]
490 fn print_flags() {
491 roundtrip("(?i)");
492 roundtrip("(?-i)");
493 roundtrip("(?s-i)");
494 roundtrip("(?-si)");
495 roundtrip("(?siUmux)");
496 }
497
498 #[test]
499 fn print_group() {
500 roundtrip("(?i:a)");
501 roundtrip("(?P<foo>a)");
502 roundtrip("(a)");
503 }
504
505 #[test]
506 fn print_class() {
507 roundtrip(r"[abc]");
508 roundtrip(r"[a-z]");
509 roundtrip(r"[^a-z]");
510 roundtrip(r"[a-z0-9]");
511 roundtrip(r"[-a-z0-9]");
512 roundtrip(r"[-a-z0-9]");
513 roundtrip(r"[a-z0-9---]");
514 roundtrip(r"[a-z&&m-n]");
515 roundtrip(r"[[a-z&&m-n]]");
516 roundtrip(r"[a-z--m-n]");
517 roundtrip(r"[a-z~~m-n]");
518 roundtrip(r"[a-z[0-9]]");
519 roundtrip(r"[a-z[^0-9]]");
520
521 roundtrip(r"\d");
522 roundtrip(r"\D");
523 roundtrip(r"\s");
524 roundtrip(r"\S");
525 roundtrip(r"\w");
526 roundtrip(r"\W");
527
528 roundtrip(r"[[:alnum:]]");
529 roundtrip(r"[[:^alnum:]]");
530 roundtrip(r"[[:alpha:]]");
531 roundtrip(r"[[:^alpha:]]");
532 roundtrip(r"[[:ascii:]]");
533 roundtrip(r"[[:^ascii:]]");
534 roundtrip(r"[[:blank:]]");
535 roundtrip(r"[[:^blank:]]");
536 roundtrip(r"[[:cntrl:]]");
537 roundtrip(r"[[:^cntrl:]]");
538 roundtrip(r"[[:digit:]]");
539 roundtrip(r"[[:^digit:]]");
540 roundtrip(r"[[:graph:]]");
541 roundtrip(r"[[:^graph:]]");
542 roundtrip(r"[[:lower:]]");
543 roundtrip(r"[[:^lower:]]");
544 roundtrip(r"[[:print:]]");
545 roundtrip(r"[[:^print:]]");
546 roundtrip(r"[[:punct:]]");
547 roundtrip(r"[[:^punct:]]");
548 roundtrip(r"[[:space:]]");
549 roundtrip(r"[[:^space:]]");
550 roundtrip(r"[[:upper:]]");
551 roundtrip(r"[[:^upper:]]");
552 roundtrip(r"[[:word:]]");
553 roundtrip(r"[[:^word:]]");
554 roundtrip(r"[[:xdigit:]]");
555 roundtrip(r"[[:^xdigit:]]");
556
557 roundtrip(r"\pL");
558 roundtrip(r"\PL");
559 roundtrip(r"\p{L}");
560 roundtrip(r"\P{L}");
561 roundtrip(r"\p{X=Y}");
562 roundtrip(r"\P{X=Y}");
563 roundtrip(r"\p{X:Y}");
564 roundtrip(r"\P{X:Y}");
565 roundtrip(r"\p{X!=Y}");
566 roundtrip(r"\P{X!=Y}");
567 }
568}
569