1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Types for the pest's abstract syntax tree.
11
12/// A grammar rule
13#[derive(Clone, Debug, Eq, PartialEq)]
14pub struct Rule {
15 /// The name of the rule
16 pub name: String,
17 /// The rule's type (silent, atomic, ...)
18 pub ty: RuleType,
19 /// The rule's expression
20 pub expr: Expr,
21}
22
23/// All possible rule types
24#[derive(Clone, Copy, Debug, Eq, PartialEq)]
25pub enum RuleType {
26 /// The normal rule type
27 Normal,
28 /// Silent rules are just like normal rules
29 /// — when run, they function the same way —
30 /// except they do not produce pairs or tokens.
31 /// If a rule is silent, it will never appear in a parse result.
32 /// (their syntax is `_{ ... }`)
33 Silent,
34 /// atomic rule prevent implicit whitespace: inside an atomic rule,
35 /// the tilde ~ means "immediately followed by",
36 /// and repetition operators (asterisk * and plus sign +)
37 /// have no implicit separation. In addition, all other rules
38 /// called from an atomic rule are also treated as atomic.
39 /// In an atomic rule, interior matching rules are silent.
40 /// (their syntax is `@{ ... }`)
41 Atomic,
42 /// Compound atomic rules are similar to atomic rules,
43 /// but they produce inner tokens as normal.
44 /// (their syntax is `${ ... }`)
45 CompoundAtomic,
46 /// Non-atomic rules cancel the effect of atomic rules.
47 /// (their syntax is `!{ ... }`)
48 NonAtomic,
49}
50
51/// All possible rule expressions
52///
53/// # Warning: Semantic Versioning
54/// There may be non-breaking changes to the meta-grammar
55/// between minor versions. Those non-breaking changes, however,
56/// may translate into semver-breaking changes due to the additional variants
57/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
58/// future (e.g. by increasing MSRV and non_exhaustive annotations).
59#[derive(Clone, Debug, Eq, PartialEq)]
60pub enum Expr {
61 /// Matches an exact string, e.g. `"a"`
62 Str(String),
63 /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
64 Insens(String),
65 /// Matches one character in the range, e.g. `'a'..'z'`
66 Range(String, String),
67 /// Matches the rule with the given name, e.g. `a`
68 Ident(String),
69 /// Matches a custom part of the stack, e.g. `PEEK[..]`
70 PeekSlice(i32, Option<i32>),
71 /// Positive lookahead; matches expression without making progress, e.g. `&e`
72 PosPred(Box<Expr>),
73 /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
74 NegPred(Box<Expr>),
75 /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
76 Seq(Box<Expr>, Box<Expr>),
77 /// Matches either of two expressions, e.g. `e1 | e2`
78 Choice(Box<Expr>, Box<Expr>),
79 /// Optionally matches an expression, e.g. `e?`
80 Opt(Box<Expr>),
81 /// Matches an expression zero or more times, e.g. `e*`
82 Rep(Box<Expr>),
83 /// Matches an expression one or more times, e.g. `e+`
84 RepOnce(Box<Expr>),
85 /// Matches an expression an exact number of times, e.g. `e{n}`
86 RepExact(Box<Expr>, u32),
87 /// Matches an expression at least a number of times, e.g. `e{n,}`
88 RepMin(Box<Expr>, u32),
89 /// Matches an expression at most a number of times, e.g. `e{,n}`
90 RepMax(Box<Expr>, u32),
91 /// Matches an expression a number of times within a range, e.g. `e{m, n}`
92 RepMinMax(Box<Expr>, u32, u32),
93 /// Continues to match expressions until one of the strings in the `Vec` is found
94 Skip(Vec<String>),
95 /// Matches an expression and pushes it to the stack, e.g. `push(e)`
96 Push(Box<Expr>),
97 /// Matches an expression and assigns a label to it, e.g. #label = exp
98 #[cfg(feature = "grammar-extras")]
99 NodeTag(Box<Expr>, String),
100}
101
102impl Expr {
103 /// Returns the iterator that steps the expression from top to bottom.
104 pub fn iter_top_down(&self) -> ExprTopDownIterator {
105 ExprTopDownIterator::new(self)
106 }
107
108 /// Applies `f` to the expression and all its children (top to bottom).
109 pub fn map_top_down<F>(self, mut f: F) -> Expr
110 where
111 F: FnMut(Expr) -> Expr,
112 {
113 fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
114 where
115 F: FnMut(Expr) -> Expr,
116 {
117 let expr = f(expr);
118
119 match expr {
120 Expr::PosPred(expr) => {
121 let mapped = Box::new(map_internal(*expr, f));
122 Expr::PosPred(mapped)
123 }
124 Expr::NegPred(expr) => {
125 let mapped = Box::new(map_internal(*expr, f));
126 Expr::NegPred(mapped)
127 }
128 Expr::Seq(lhs, rhs) => {
129 let mapped_lhs = Box::new(map_internal(*lhs, f));
130 let mapped_rhs = Box::new(map_internal(*rhs, f));
131 Expr::Seq(mapped_lhs, mapped_rhs)
132 }
133 Expr::Choice(lhs, rhs) => {
134 let mapped_lhs = Box::new(map_internal(*lhs, f));
135 let mapped_rhs = Box::new(map_internal(*rhs, f));
136 Expr::Choice(mapped_lhs, mapped_rhs)
137 }
138 Expr::Rep(expr) => {
139 let mapped = Box::new(map_internal(*expr, f));
140 Expr::Rep(mapped)
141 }
142 Expr::RepOnce(expr) => {
143 let mapped = Box::new(map_internal(*expr, f));
144 Expr::RepOnce(mapped)
145 }
146 Expr::RepExact(expr, max) => {
147 let mapped = Box::new(map_internal(*expr, f));
148 Expr::RepExact(mapped, max)
149 }
150 Expr::RepMin(expr, num) => {
151 let mapped = Box::new(map_internal(*expr, f));
152 Expr::RepMin(mapped, num)
153 }
154 Expr::RepMax(expr, num) => {
155 let mapped = Box::new(map_internal(*expr, f));
156 Expr::RepMax(mapped, num)
157 }
158 Expr::RepMinMax(expr, min, max) => {
159 let mapped = Box::new(map_internal(*expr, f));
160 Expr::RepMinMax(mapped, min, max)
161 }
162 Expr::Opt(expr) => {
163 let mapped = Box::new(map_internal(*expr, f));
164 Expr::Opt(mapped)
165 }
166 Expr::Push(expr) => {
167 let mapped = Box::new(map_internal(*expr, f));
168 Expr::Push(mapped)
169 }
170 #[cfg(feature = "grammar-extras")]
171 Expr::NodeTag(expr, tag) => {
172 let mapped = Box::new(map_internal(*expr, f));
173 Expr::NodeTag(mapped, tag)
174 }
175 expr => expr,
176 }
177 }
178
179 map_internal(self, &mut f)
180 }
181
182 /// Applies `f` to the expression and all its children (bottom up).
183 pub fn map_bottom_up<F>(self, mut f: F) -> Expr
184 where
185 F: FnMut(Expr) -> Expr,
186 {
187 fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
188 where
189 F: FnMut(Expr) -> Expr,
190 {
191 let mapped = match expr {
192 Expr::PosPred(expr) => {
193 let mapped = Box::new(map_internal(*expr, f));
194 Expr::PosPred(mapped)
195 }
196 Expr::NegPred(expr) => {
197 let mapped = Box::new(map_internal(*expr, f));
198 Expr::NegPred(mapped)
199 }
200 Expr::Seq(lhs, rhs) => {
201 let mapped_lhs = Box::new(map_internal(*lhs, f));
202 let mapped_rhs = Box::new(map_internal(*rhs, f));
203 Expr::Seq(mapped_lhs, mapped_rhs)
204 }
205 Expr::Choice(lhs, rhs) => {
206 let mapped_lhs = Box::new(map_internal(*lhs, f));
207 let mapped_rhs = Box::new(map_internal(*rhs, f));
208 Expr::Choice(mapped_lhs, mapped_rhs)
209 }
210 Expr::Rep(expr) => {
211 let mapped = Box::new(map_internal(*expr, f));
212 Expr::Rep(mapped)
213 }
214 Expr::RepOnce(expr) => {
215 let mapped = Box::new(map_internal(*expr, f));
216 Expr::RepOnce(mapped)
217 }
218 Expr::RepExact(expr, num) => {
219 let mapped = Box::new(map_internal(*expr, f));
220 Expr::RepExact(mapped, num)
221 }
222 Expr::RepMin(expr, max) => {
223 let mapped = Box::new(map_internal(*expr, f));
224 Expr::RepMin(mapped, max)
225 }
226 Expr::RepMax(expr, max) => {
227 let mapped = Box::new(map_internal(*expr, f));
228 Expr::RepMax(mapped, max)
229 }
230 Expr::RepMinMax(expr, min, max) => {
231 let mapped = Box::new(map_internal(*expr, f));
232 Expr::RepMinMax(mapped, min, max)
233 }
234 Expr::Opt(expr) => {
235 let mapped = Box::new(map_internal(*expr, f));
236 Expr::Opt(mapped)
237 }
238 Expr::Push(expr) => {
239 let mapped = Box::new(map_internal(*expr, f));
240 Expr::Push(mapped)
241 }
242 #[cfg(feature = "grammar-extras")]
243 Expr::NodeTag(expr, tag) => {
244 let mapped = Box::new(map_internal(*expr, f));
245 Expr::NodeTag(mapped, tag)
246 }
247 expr => expr,
248 };
249
250 f(mapped)
251 }
252
253 map_internal(self, &mut f)
254 }
255}
256
257impl core::fmt::Display for Expr {
258 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
259 match self {
260 Expr::Str(s) => write!(f, "{:?}", s),
261 Expr::Insens(s) => write!(f, "^{:?}", s),
262 Expr::Range(start, end) => {
263 let start = start.chars().next().expect("Empty range start.");
264 let end = end.chars().next().expect("Empty range end.");
265 write!(f, "({:?}..{:?})", start, end)
266 }
267 Expr::Ident(id) => write!(f, "{}", id),
268 Expr::PeekSlice(start, end) => match end {
269 Some(end) => write!(f, "PEEK[{}..{}]", start, end),
270 None => write!(f, "PEEK[{}..]", start),
271 },
272 Expr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
273 Expr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
274 Expr::Seq(lhs, rhs) => {
275 let mut nodes = Vec::new();
276 nodes.push(lhs);
277 let mut current = rhs;
278 while let Expr::Seq(lhs, rhs) = current.as_ref() {
279 nodes.push(lhs);
280 current = rhs;
281 }
282 nodes.push(current);
283 let sequence = nodes
284 .iter()
285 .map(|node| format!("{}", node))
286 .collect::<Vec<_>>()
287 .join(" ~ ");
288 write!(f, "({})", sequence)
289 }
290 Expr::Choice(lhs, rhs) => {
291 let mut nodes = Vec::new();
292 nodes.push(lhs);
293 let mut current = rhs;
294 while let Expr::Choice(lhs, rhs) = current.as_ref() {
295 nodes.push(lhs);
296 current = rhs;
297 }
298 nodes.push(current);
299 let sequence = nodes
300 .iter()
301 .map(|node| format!("{}", node))
302 .collect::<Vec<_>>()
303 .join(" | ");
304 write!(f, "({})", sequence)
305 }
306 Expr::Opt(expr) => write!(f, "{}?", expr),
307 Expr::Rep(expr) => write!(f, "{}*", expr),
308 Expr::RepOnce(expr) => write!(f, "{}+", expr),
309 Expr::RepExact(expr, n) => write!(f, "{}{{{}}}", expr, n),
310 Expr::RepMin(expr, min) => write!(f, "{}{{{},}}", expr, min),
311 Expr::RepMax(expr, max) => write!(f, "{}{{,{}}}", expr, max),
312 Expr::RepMinMax(expr, min, max) => write!(f, "{}{{{}, {}}}", expr, min, max),
313 Expr::Skip(strings) => {
314 let strings = strings
315 .iter()
316 .map(|s| format!("{:?}", s))
317 .collect::<Vec<_>>()
318 .join(" | ");
319 write!(f, "(!({}) ~ ANY)*", strings)
320 }
321 Expr::Push(expr) => write!(f, "PUSH({})", expr),
322 #[cfg(feature = "grammar-extras")]
323 Expr::NodeTag(expr, tag) => {
324 write!(f, "(#{} = {})", tag, expr)
325 }
326 }
327 }
328}
329
330/// The top down iterator for an expression.
331pub struct ExprTopDownIterator {
332 current: Option<Expr>,
333 next: Option<Expr>,
334 right_branches: Vec<Expr>,
335}
336
337impl ExprTopDownIterator {
338 /// Constructs a top-down iterator from the expression.
339 pub fn new(expr: &Expr) -> Self {
340 let mut iter = ExprTopDownIterator {
341 current: None,
342 next: None,
343 right_branches: vec![],
344 };
345 iter.iterate_expr(expr.clone());
346 iter
347 }
348
349 fn iterate_expr(&mut self, expr: Expr) {
350 self.current = Some(expr.clone());
351 match expr {
352 Expr::Seq(lhs, rhs) => {
353 self.right_branches.push(*rhs);
354 self.next = Some(*lhs);
355 }
356 Expr::Choice(lhs, rhs) => {
357 self.right_branches.push(*rhs);
358 self.next = Some(*lhs);
359 }
360 Expr::PosPred(expr)
361 | Expr::NegPred(expr)
362 | Expr::Rep(expr)
363 | Expr::RepOnce(expr)
364 | Expr::RepExact(expr, _)
365 | Expr::RepMin(expr, _)
366 | Expr::RepMax(expr, _)
367 | Expr::RepMinMax(expr, ..)
368 | Expr::Opt(expr)
369 | Expr::Push(expr) => {
370 self.next = Some(*expr);
371 }
372 #[cfg(feature = "grammar-extras")]
373 Expr::NodeTag(expr, _) => {
374 self.next = Some(*expr);
375 }
376 _ => {
377 self.next = None;
378 }
379 }
380 }
381}
382
383impl Iterator for ExprTopDownIterator {
384 type Item = Expr;
385
386 fn next(&mut self) -> Option<Self::Item> {
387 let result: Option = self.current.take();
388
389 if let Some(expr: Expr) = self.next.take() {
390 self.iterate_expr(expr);
391 } else if let Some(expr: Expr) = self.right_branches.pop() {
392 self.iterate_expr(expr);
393 }
394
395 result
396 }
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn top_down_iterator() {
405 let expr = Expr::Choice(
406 Box::new(Expr::Str(String::from("a"))),
407 Box::new(Expr::Str(String::from("b"))),
408 );
409 let mut top_down = expr.iter_top_down();
410 assert_eq!(top_down.next(), Some(expr));
411 assert_eq!(top_down.next(), Some(Expr::Str(String::from("a"))));
412 assert_eq!(top_down.next(), Some(Expr::Str(String::from("b"))));
413 assert_eq!(top_down.next(), None);
414 }
415
416 #[test]
417 fn identity() {
418 let expr = Expr::Choice(
419 Box::new(Expr::Seq(
420 Box::new(Expr::Ident("a".to_owned())),
421 Box::new(Expr::Str("b".to_owned())),
422 )),
423 Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep(
424 Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice(
425 Box::new(Expr::Insens("c".to_owned())),
426 Box::new(Expr::Push(Box::new(Expr::Range(
427 "'d'".to_owned(),
428 "'e'".to_owned(),
429 )))),
430 )))))),
431 )))))),
432 );
433
434 assert_eq!(
435 expr.clone()
436 .map_bottom_up(|expr| expr)
437 .map_top_down(|expr| expr),
438 expr,
439 );
440 }
441
442 mod display {
443 use super::super::*;
444
445 #[test]
446 fn string() {
447 assert_eq!(Expr::Str("a".to_owned()).to_string(), r#""a""#);
448 }
449
450 #[test]
451 fn insens() {
452 assert_eq!(Expr::Insens("a".to_owned()).to_string(), r#"^"a""#);
453 }
454
455 #[test]
456 fn range() {
457 assert_eq!(
458 Expr::Range("a".to_owned(), "z".to_owned()).to_string(),
459 r#"('a'..'z')"#,
460 );
461 }
462
463 #[test]
464 fn ident() {
465 assert_eq!(Expr::Ident("a".to_owned()).to_string(), "a");
466 }
467
468 #[test]
469 fn peek_slice() {
470 assert_eq!(Expr::PeekSlice(0, None).to_string(), "PEEK[0..]");
471 assert_eq!(Expr::PeekSlice(0, Some(-1)).to_string(), "PEEK[0..-1]");
472 }
473
474 #[test]
475 fn pos_pred() {
476 assert_eq!(
477 Expr::PosPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
478 "&e",
479 );
480 }
481
482 #[test]
483 fn neg_pred() {
484 assert_eq!(
485 Expr::NegPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
486 "!e",
487 );
488 }
489
490 #[test]
491 fn seq() {
492 assert_eq!(
493 Expr::Seq(
494 Box::new(Expr::Ident("e1".to_owned())),
495 Box::new(Expr::Ident("e2".to_owned())),
496 )
497 .to_string(),
498 "(e1 ~ e2)",
499 );
500 assert_eq!(
501 Expr::Seq(
502 Box::new(Expr::Ident("e1".to_owned())),
503 Box::new(Expr::Seq(
504 Box::new(Expr::Ident("e2".to_owned())),
505 Box::new(Expr::Ident("e3".to_owned())),
506 )),
507 )
508 .to_string(),
509 "(e1 ~ e2 ~ e3)",
510 );
511 assert_eq!(
512 Expr::Seq(
513 Box::new(Expr::Ident("e1".to_owned())),
514 Box::new(Expr::Seq(
515 Box::new(Expr::Ident("e2".to_owned())),
516 Box::new(Expr::Seq(
517 Box::new(Expr::Ident("e3".to_owned())),
518 Box::new(Expr::Ident("e4".to_owned())),
519 )),
520 )),
521 )
522 .to_string(),
523 "(e1 ~ e2 ~ e3 ~ e4)",
524 );
525 assert_eq!(
526 Expr::Seq(
527 Box::new(Expr::Ident("e1".to_owned())),
528 Box::new(Expr::Choice(
529 Box::new(Expr::Ident("e2".to_owned())),
530 Box::new(Expr::Seq(
531 Box::new(Expr::Ident("e3".to_owned())),
532 Box::new(Expr::Ident("e4".to_owned())),
533 )),
534 )),
535 )
536 .to_string(),
537 "(e1 ~ (e2 | (e3 ~ e4)))",
538 );
539 assert_eq!(
540 Expr::Seq(
541 Box::new(Expr::Ident("e1".to_owned())),
542 Box::new(Expr::Seq(
543 Box::new(Expr::Ident("e2".to_owned())),
544 Box::new(Expr::Choice(
545 Box::new(Expr::Ident("e3".to_owned())),
546 Box::new(Expr::Ident("e4".to_owned())),
547 )),
548 )),
549 )
550 .to_string(),
551 "(e1 ~ e2 ~ (e3 | e4))",
552 );
553 }
554
555 #[test]
556 fn choice() {
557 assert_eq!(
558 Expr::Choice(
559 Box::new(Expr::Ident("e1".to_owned())),
560 Box::new(Expr::Ident("e2".to_owned())),
561 )
562 .to_string(),
563 "(e1 | e2)",
564 );
565 assert_eq!(
566 Expr::Choice(
567 Box::new(Expr::Ident("e1".to_owned())),
568 Box::new(Expr::Choice(
569 Box::new(Expr::Ident("e2".to_owned())),
570 Box::new(Expr::Ident("e3".to_owned())),
571 )),
572 )
573 .to_string(),
574 "(e1 | e2 | e3)",
575 );
576 assert_eq!(
577 Expr::Choice(
578 Box::new(Expr::Ident("e1".to_owned())),
579 Box::new(Expr::Choice(
580 Box::new(Expr::Ident("e2".to_owned())),
581 Box::new(Expr::Choice(
582 Box::new(Expr::Ident("e3".to_owned())),
583 Box::new(Expr::Ident("e4".to_owned())),
584 )),
585 )),
586 )
587 .to_string(),
588 "(e1 | e2 | e3 | e4)",
589 );
590 assert_eq!(
591 Expr::Choice(
592 Box::new(Expr::Ident("e1".to_owned())),
593 Box::new(Expr::Seq(
594 Box::new(Expr::Ident("e2".to_owned())),
595 Box::new(Expr::Choice(
596 Box::new(Expr::Ident("e3".to_owned())),
597 Box::new(Expr::Ident("e4".to_owned())),
598 )),
599 )),
600 )
601 .to_string(),
602 "(e1 | (e2 ~ (e3 | e4)))",
603 );
604 }
605
606 #[test]
607 fn opt() {
608 assert_eq!(
609 Expr::Opt(Box::new(Expr::Ident("e".to_owned()))).to_string(),
610 "e?",
611 );
612 }
613
614 #[test]
615 fn rep() {
616 assert_eq!(
617 Expr::Rep(Box::new(Expr::Ident("e".to_owned()))).to_string(),
618 "e*",
619 );
620 }
621
622 #[test]
623 fn rep_once() {
624 assert_eq!(
625 Expr::RepOnce(Box::new(Expr::Ident("e".to_owned()))).to_string(),
626 "e+",
627 );
628 }
629
630 #[test]
631 fn rep_exact() {
632 assert_eq!(
633 Expr::RepExact(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
634 "e{1}",
635 );
636 }
637
638 #[test]
639 fn rep_min() {
640 assert_eq!(
641 Expr::RepMin(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
642 "e{1,}",
643 );
644 }
645
646 #[test]
647 fn rep_max() {
648 assert_eq!(
649 Expr::RepMax(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
650 "e{,1}",
651 );
652 }
653
654 #[test]
655 fn rep_min_max() {
656 assert_eq!(
657 Expr::RepMinMax(Box::new(Expr::Ident("e".to_owned())), 1, 2).to_string(),
658 "e{1, 2}",
659 );
660 }
661
662 #[test]
663 fn skip() {
664 assert_eq!(
665 Expr::Skip(
666 ["a", "bc"]
667 .into_iter()
668 .map(|s| s.to_owned())
669 .collect::<Vec<_>>(),
670 )
671 .to_string(),
672 r#"(!("a" | "bc") ~ ANY)*"#,
673 );
674 }
675
676 #[test]
677 fn push() {
678 assert_eq!(
679 Expr::Push(Box::new(Expr::Ident("e".to_owned()))).to_string(),
680 "PUSH(e)",
681 );
682 }
683
684 #[test]
685 #[cfg(feature = "grammar-extras")]
686 fn node_tag() {
687 assert_eq!(
688 Expr::NodeTag(Box::new(Expr::Ident("expr".to_owned())), "label".to_owned())
689 .to_string(),
690 "(#label = expr)",
691 );
692 }
693 }
694}
695