1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Different optimizations for pest's ASTs.
11
12use crate::ast::*;
13use std::collections::HashMap;
14
15#[cfg(test)]
16macro_rules! box_tree {
17 ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => (
18 $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ )
19 );
20 ($expr:expr) => ($expr);
21}
22
23mod concatenator;
24mod factorizer;
25mod lister;
26mod restorer;
27mod rotater;
28mod skipper;
29mod unroller;
30
31/// Takes pest's ASTs and optimizes them
32pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33 let optimized: Vec<OptimizedRule> = rulesimpl Iterator
34 .into_iter()
35 .map(rotater::rotate)
36 .map(skipper::skip)
37 .map(unroller::unroll)
38 .map(concatenator::concatenate)
39 .map(factorizer::factor)
40 .map(lister::list)
41 .map(rule_to_optimized_rule)
42 .collect();
43
44 let rules: HashMap = to_hash_map(&optimized);
45 optimizedimpl Iterator
46 .into_iter()
47 .map(|rule: OptimizedRule| restorer::restore_on_err(rule, &rules))
48 .collect()
49}
50
51fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
52 fn to_optimized(expr: Expr) -> OptimizedExpr {
53 match expr {
54 Expr::Str(string) => OptimizedExpr::Str(string),
55 Expr::Insens(string) => OptimizedExpr::Insens(string),
56 Expr::Range(start, end) => OptimizedExpr::Range(start, end),
57 Expr::Ident(ident) => OptimizedExpr::Ident(ident),
58 Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
59 Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
60 Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
61 Expr::Seq(lhs, rhs) => {
62 OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
63 }
64 Expr::Choice(lhs, rhs) => {
65 OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
66 }
67 Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
68 Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
69 Expr::Skip(strings) => OptimizedExpr::Skip(strings),
70 Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
71 #[cfg(feature = "grammar-extras")]
72 Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
73 Expr::RepOnce(_)
74 | Expr::RepExact(..)
75 | Expr::RepMin(..)
76 | Expr::RepMax(..)
77 | Expr::RepMinMax(..) => unreachable!("No valid transformation to OptimizedRule"),
78 }
79 }
80
81 OptimizedRule {
82 name: rule.name,
83 ty: rule.ty,
84 expr: to_optimized(rule.expr),
85 }
86}
87
88fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
89 rulesimpl Iterator
90 .iter()
91 .map(|r: &OptimizedRule| (r.name.clone(), r.expr.clone()))
92 .collect()
93}
94
95/// The optimized version of the pest AST's `Rule`.
96#[derive(Clone, Debug, Eq, PartialEq)]
97pub struct OptimizedRule {
98 /// The name of the rule.
99 pub name: String,
100 /// The type of the rule.
101 pub ty: RuleType,
102 /// The optimized expression of the rule.
103 pub expr: OptimizedExpr,
104}
105
106/// The optimized version of the pest AST's `Expr`.
107///
108/// # Warning: Semantic Versioning
109/// There may be non-breaking changes to the meta-grammar
110/// between minor versions. Those non-breaking changes, however,
111/// may translate into semver-breaking changes due to the additional variants
112/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
113/// future (e.g. by increasing MSRV and non_exhaustive annotations).
114#[derive(Clone, Debug, Eq, PartialEq)]
115pub enum OptimizedExpr {
116 /// Matches an exact string, e.g. `"a"`
117 Str(String),
118 /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
119 Insens(String),
120 /// Matches one character in the range, e.g. `'a'..'z'`
121 Range(String, String),
122 /// Matches the rule with the given name, e.g. `a`
123 Ident(String),
124 /// Matches a custom part of the stack, e.g. `PEEK[..]`
125 PeekSlice(i32, Option<i32>),
126 /// Positive lookahead; matches expression without making progress, e.g. `&e`
127 PosPred(Box<OptimizedExpr>),
128 /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
129 NegPred(Box<OptimizedExpr>),
130 /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
131 Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
132 /// Matches either of two expressions, e.g. `e1 | e2`
133 Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
134 /// Optionally matches an expression, e.g. `e?`
135 Opt(Box<OptimizedExpr>),
136 /// Matches an expression zero or more times, e.g. `e*`
137 Rep(Box<OptimizedExpr>),
138 /// Continues to match expressions until one of the strings in the `Vec` is found
139 Skip(Vec<String>),
140 /// Matches an expression and pushes it to the stack, e.g. `push(e)`
141 Push(Box<OptimizedExpr>),
142 /// Matches an expression and assigns a label to it, e.g. #label = exp
143 #[cfg(feature = "grammar-extras")]
144 NodeTag(Box<OptimizedExpr>, String),
145 /// Restores an expression's checkpoint
146 RestoreOnErr(Box<OptimizedExpr>),
147}
148
149impl OptimizedExpr {
150 /// Returns a top-down iterator over the `OptimizedExpr`.
151 pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
152 OptimizedExprTopDownIterator::new(self)
153 }
154
155 /// Applies `f` to the `OptimizedExpr` top-down.
156 pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
157 where
158 F: FnMut(OptimizedExpr) -> OptimizedExpr,
159 {
160 fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
161 where
162 F: FnMut(OptimizedExpr) -> OptimizedExpr,
163 {
164 let expr = f(expr);
165
166 match expr {
167 OptimizedExpr::PosPred(expr) => {
168 let mapped = Box::new(map_internal(*expr, f));
169 OptimizedExpr::PosPred(mapped)
170 }
171 OptimizedExpr::NegPred(expr) => {
172 let mapped = Box::new(map_internal(*expr, f));
173 OptimizedExpr::NegPred(mapped)
174 }
175 OptimizedExpr::Seq(lhs, rhs) => {
176 let mapped_lhs = Box::new(map_internal(*lhs, f));
177 let mapped_rhs = Box::new(map_internal(*rhs, f));
178 OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
179 }
180 OptimizedExpr::Choice(lhs, rhs) => {
181 let mapped_lhs = Box::new(map_internal(*lhs, f));
182 let mapped_rhs = Box::new(map_internal(*rhs, f));
183 OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
184 }
185 OptimizedExpr::Rep(expr) => {
186 let mapped = Box::new(map_internal(*expr, f));
187 OptimizedExpr::Rep(mapped)
188 }
189 OptimizedExpr::Opt(expr) => {
190 let mapped = Box::new(map_internal(*expr, f));
191 OptimizedExpr::Opt(mapped)
192 }
193 OptimizedExpr::Push(expr) => {
194 let mapped = Box::new(map_internal(*expr, f));
195 OptimizedExpr::Push(mapped)
196 }
197 expr => expr,
198 }
199 }
200
201 map_internal(self, &mut f)
202 }
203
204 /// Applies `f` to the `OptimizedExpr` bottom-up.
205 pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
206 where
207 F: FnMut(OptimizedExpr) -> OptimizedExpr,
208 {
209 fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
210 where
211 F: FnMut(OptimizedExpr) -> OptimizedExpr,
212 {
213 let mapped = match expr {
214 OptimizedExpr::PosPred(expr) => {
215 let mapped = Box::new(map_internal(*expr, f));
216 OptimizedExpr::PosPred(mapped)
217 }
218 OptimizedExpr::NegPred(expr) => {
219 let mapped = Box::new(map_internal(*expr, f));
220 OptimizedExpr::NegPred(mapped)
221 }
222 OptimizedExpr::Seq(lhs, rhs) => {
223 let mapped_lhs = Box::new(map_internal(*lhs, f));
224 let mapped_rhs = Box::new(map_internal(*rhs, f));
225 OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
226 }
227 OptimizedExpr::Choice(lhs, rhs) => {
228 let mapped_lhs = Box::new(map_internal(*lhs, f));
229 let mapped_rhs = Box::new(map_internal(*rhs, f));
230 OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
231 }
232 OptimizedExpr::Rep(expr) => {
233 let mapped = Box::new(map_internal(*expr, f));
234 OptimizedExpr::Rep(mapped)
235 }
236 OptimizedExpr::Opt(expr) => {
237 let mapped = Box::new(map_internal(*expr, f));
238 OptimizedExpr::Opt(mapped)
239 }
240 OptimizedExpr::Push(expr) => {
241 let mapped = Box::new(map_internal(*expr, f));
242 OptimizedExpr::Push(mapped)
243 }
244 expr => expr,
245 };
246
247 f(mapped)
248 }
249
250 map_internal(self, &mut f)
251 }
252}
253
254/// A top-down iterator over an `OptimizedExpr`.
255pub struct OptimizedExprTopDownIterator {
256 current: Option<OptimizedExpr>,
257 next: Option<OptimizedExpr>,
258 right_branches: Vec<OptimizedExpr>,
259}
260
261impl OptimizedExprTopDownIterator {
262 /// Creates a new top down iterator from an `OptimizedExpr`.
263 pub fn new(expr: &OptimizedExpr) -> Self {
264 let mut iter = OptimizedExprTopDownIterator {
265 current: None,
266 next: None,
267 right_branches: vec![],
268 };
269 iter.iterate_expr(expr.clone());
270 iter
271 }
272
273 fn iterate_expr(&mut self, expr: OptimizedExpr) {
274 self.current = Some(expr.clone());
275 match expr {
276 OptimizedExpr::Seq(lhs, rhs) => {
277 self.right_branches.push(*rhs);
278 self.next = Some(*lhs);
279 }
280 OptimizedExpr::Choice(lhs, rhs) => {
281 self.right_branches.push(*rhs);
282 self.next = Some(*lhs);
283 }
284 OptimizedExpr::PosPred(expr)
285 | OptimizedExpr::NegPred(expr)
286 | OptimizedExpr::Rep(expr)
287 | OptimizedExpr::Opt(expr)
288 | OptimizedExpr::Push(expr) => {
289 self.next = Some(*expr);
290 }
291 _ => {
292 self.next = None;
293 }
294 }
295 }
296}
297
298impl Iterator for OptimizedExprTopDownIterator {
299 type Item = OptimizedExpr;
300
301 fn next(&mut self) -> Option<Self::Item> {
302 let result: Option = self.current.take();
303
304 if let Some(expr: OptimizedExpr) = self.next.take() {
305 self.iterate_expr(expr);
306 } else if let Some(expr: OptimizedExpr) = self.right_branches.pop() {
307 self.iterate_expr(expr);
308 }
309
310 result
311 }
312}
313
314#[cfg(test)]
315mod tests {
316 use super::*;
317
318 #[test]
319 fn rotate() {
320 let rules = {
321 use crate::ast::Expr::*;
322 vec![Rule {
323 name: "rule".to_owned(),
324 ty: RuleType::Normal,
325 expr: box_tree!(Choice(
326 Choice(
327 Choice(Str(String::from("a")), Str(String::from("b"))),
328 Str(String::from("c"))
329 ),
330 Str(String::from("d"))
331 )),
332 }]
333 };
334 let rotated = {
335 use crate::optimizer::OptimizedExpr::*;
336 vec![OptimizedRule {
337 name: "rule".to_owned(),
338 ty: RuleType::Normal,
339 expr: box_tree!(Choice(
340 Str(String::from("a")),
341 Choice(
342 Str(String::from("b")),
343 Choice(Str(String::from("c")), Str(String::from("d")))
344 )
345 )),
346 }]
347 };
348
349 assert_eq!(optimize(rules), rotated);
350 }
351
352 #[test]
353 fn skip() {
354 let rules = {
355 use crate::ast::Expr::*;
356 vec![Rule {
357 name: "rule".to_owned(),
358 ty: RuleType::Atomic,
359 expr: box_tree!(Rep(Seq(
360 NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
361 Ident(String::from("ANY"))
362 ))),
363 }]
364 };
365 let skipped = vec![OptimizedRule {
366 name: "rule".to_owned(),
367 ty: RuleType::Atomic,
368 expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
369 }];
370
371 assert_eq!(optimize(rules), skipped);
372 }
373
374 #[test]
375 fn concat_strings() {
376 let rules = {
377 use crate::ast::Expr::*;
378 vec![Rule {
379 name: "rule".to_owned(),
380 ty: RuleType::Atomic,
381 expr: box_tree!(Seq(
382 Seq(Str(String::from("a")), Str(String::from("b"))),
383 Seq(Str(String::from("c")), Str(String::from("d")))
384 )),
385 }]
386 };
387 let concatenated = vec![OptimizedRule {
388 name: "rule".to_owned(),
389 ty: RuleType::Atomic,
390 expr: OptimizedExpr::Str(String::from("abcd")),
391 }];
392
393 assert_eq!(optimize(rules), concatenated);
394 }
395
396 #[test]
397 fn unroll_loop_exact() {
398 let rules = vec![Rule {
399 name: "rule".to_owned(),
400 ty: RuleType::Atomic,
401 expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
402 }];
403 let unrolled = {
404 use crate::optimizer::OptimizedExpr::*;
405 vec![OptimizedRule {
406 name: "rule".to_owned(),
407 ty: RuleType::Atomic,
408 expr: box_tree!(Seq(
409 Ident(String::from("a")),
410 Seq(Ident(String::from("a")), Ident(String::from("a")))
411 )),
412 }]
413 };
414
415 assert_eq!(optimize(rules), unrolled);
416 }
417
418 #[test]
419 fn unroll_loop_max() {
420 let rules = vec![Rule {
421 name: "rule".to_owned(),
422 ty: RuleType::Atomic,
423 expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
424 }];
425 let unrolled = {
426 use crate::optimizer::OptimizedExpr::*;
427 vec![OptimizedRule {
428 name: "rule".to_owned(),
429 ty: RuleType::Atomic,
430 expr: box_tree!(Seq(
431 Opt(Str(String::from("a"))),
432 Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
433 )),
434 }]
435 };
436
437 assert_eq!(optimize(rules), unrolled);
438 }
439
440 #[test]
441 fn unroll_loop_min() {
442 let rules = vec![Rule {
443 name: "rule".to_owned(),
444 ty: RuleType::Atomic,
445 expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
446 }];
447 let unrolled = {
448 use crate::optimizer::OptimizedExpr::*;
449 vec![OptimizedRule {
450 name: "rule".to_owned(),
451 ty: RuleType::Atomic,
452 expr: box_tree!(Seq(
453 Str(String::from("a")),
454 Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
455 )),
456 }]
457 };
458
459 assert_eq!(optimize(rules), unrolled);
460 }
461
462 #[test]
463 fn unroll_loop_min_max() {
464 let rules = vec![Rule {
465 name: "rule".to_owned(),
466 ty: RuleType::Atomic,
467 expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
468 }];
469 let unrolled = {
470 use crate::optimizer::OptimizedExpr::*;
471 vec![OptimizedRule {
472 name: "rule".to_owned(),
473 ty: RuleType::Atomic,
474 /* TODO possible room for improvement here:
475 * if the sequences were rolled out in the opposite
476 * order, we could further optimize the strings
477 * in cases like this.
478 Str(String::from(("aa")),
479 Opt(Str(String::from("a")))
480 */
481 expr: box_tree!(Seq(
482 Str(String::from("a")),
483 Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
484 )),
485 }]
486 };
487
488 assert_eq!(optimize(rules), unrolled);
489 }
490
491 #[test]
492 fn concat_insensitive_strings() {
493 let rules = {
494 use crate::ast::Expr::*;
495 vec![Rule {
496 name: "rule".to_owned(),
497 ty: RuleType::Atomic,
498 expr: box_tree!(Seq(
499 Seq(Insens(String::from("a")), Insens(String::from("b"))),
500 Seq(Insens(String::from("c")), Insens(String::from("d")))
501 )),
502 }]
503 };
504 let concatenated = vec![OptimizedRule {
505 name: "rule".to_owned(),
506 ty: RuleType::Atomic,
507 expr: OptimizedExpr::Insens(String::from("abcd")),
508 }];
509
510 assert_eq!(optimize(rules), concatenated);
511 }
512
513 #[test]
514 fn long_common_sequence() {
515 let rules = {
516 use crate::ast::Expr::*;
517 vec![Rule {
518 name: "rule".to_owned(),
519 ty: RuleType::Silent,
520 expr: box_tree!(Choice(
521 Seq(
522 Ident(String::from("a")),
523 Seq(Ident(String::from("b")), Ident(String::from("c")))
524 ),
525 Seq(
526 Seq(Ident(String::from("a")), Ident(String::from("b"))),
527 Ident(String::from("d"))
528 )
529 )),
530 }]
531 };
532 let optimized = {
533 use crate::optimizer::OptimizedExpr::*;
534 vec![OptimizedRule {
535 name: "rule".to_owned(),
536 ty: RuleType::Silent,
537 expr: box_tree!(Seq(
538 Ident(String::from("a")),
539 Seq(
540 Ident(String::from("b")),
541 Choice(Ident(String::from("c")), Ident(String::from("d")))
542 )
543 )),
544 }]
545 };
546
547 assert_eq!(optimize(rules), optimized);
548 }
549
550 #[test]
551 fn short_common_sequence() {
552 let rules = {
553 use crate::ast::Expr::*;
554 vec![Rule {
555 name: "rule".to_owned(),
556 ty: RuleType::Atomic,
557 expr: box_tree!(Choice(
558 Seq(Ident(String::from("a")), Ident(String::from("b"))),
559 Ident(String::from("a"))
560 )),
561 }]
562 };
563 let optimized = {
564 use crate::optimizer::OptimizedExpr::*;
565 vec![OptimizedRule {
566 name: "rule".to_owned(),
567 ty: RuleType::Atomic,
568 expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
569 }]
570 };
571
572 assert_eq!(optimize(rules), optimized);
573 }
574
575 #[test]
576 fn impossible_common_sequence() {
577 let rules = {
578 use crate::ast::Expr::*;
579 vec![Rule {
580 name: "rule".to_owned(),
581 ty: RuleType::Silent,
582 expr: box_tree!(Choice(
583 Ident(String::from("a")),
584 Seq(Ident(String::from("a")), Ident(String::from("b")))
585 )),
586 }]
587 };
588 let optimized = {
589 use crate::optimizer::OptimizedExpr::*;
590 vec![OptimizedRule {
591 name: "rule".to_owned(),
592 ty: RuleType::Silent,
593 expr: box_tree!(Ident(String::from("a"))),
594 }]
595 };
596
597 assert_eq!(optimize(rules), optimized);
598 }
599
600 #[test]
601 fn lister() {
602 let rules = {
603 use crate::ast::Expr::*;
604 vec![Rule {
605 name: "rule".to_owned(),
606 ty: RuleType::Silent,
607 expr: box_tree!(Seq(
608 Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
609 Ident(String::from("a"))
610 )),
611 }]
612 };
613 let optimized = {
614 use crate::optimizer::OptimizedExpr::*;
615 vec![OptimizedRule {
616 name: "rule".to_owned(),
617 ty: RuleType::Silent,
618 expr: box_tree!(Seq(
619 Ident(String::from("a")),
620 Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
621 )),
622 }]
623 };
624
625 assert_eq!(optimize(rules), optimized);
626 }
627}
628