1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 DragoČ™ Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | //! Different optimizations for pest's ASTs. |
11 | |
12 | use crate::ast::*; |
13 | use std::collections::HashMap; |
14 | |
15 | #[cfg (test)] |
16 | macro_rules! box_tree { |
17 | ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => ( |
18 | $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ ) |
19 | ); |
20 | ($expr:expr) => ($expr); |
21 | } |
22 | |
23 | mod concatenator; |
24 | mod factorizer; |
25 | mod lister; |
26 | mod restorer; |
27 | mod rotater; |
28 | mod skipper; |
29 | mod unroller; |
30 | |
31 | /// Takes pest's ASTs and optimizes them |
32 | pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> { |
33 | let map: HashMap = to_hash_map(&rules); |
34 | let optimized: Vec<OptimizedRule> = rulesimpl Iterator |
35 | .into_iter() |
36 | .map(rotater::rotate) |
37 | .map(|rule: Rule| skipper::skip(rule, &map)) |
38 | .map(unroller::unroll) |
39 | .map(concatenator::concatenate) |
40 | .map(factorizer::factor) |
41 | .map(lister::list) |
42 | .map(rule_to_optimized_rule) |
43 | .collect(); |
44 | |
45 | let optimized_map: HashMap = to_optimized_hash_map(&optimized); |
46 | optimizedimpl Iterator |
47 | .into_iter() |
48 | .map(|rule: OptimizedRule| restorer::restore_on_err(rule, &optimized_map)) |
49 | .collect() |
50 | } |
51 | |
52 | fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule { |
53 | fn to_optimized(expr: Expr) -> OptimizedExpr { |
54 | match expr { |
55 | Expr::Str(string) => OptimizedExpr::Str(string), |
56 | Expr::Insens(string) => OptimizedExpr::Insens(string), |
57 | Expr::Range(start, end) => OptimizedExpr::Range(start, end), |
58 | Expr::Ident(ident) => OptimizedExpr::Ident(ident), |
59 | Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end), |
60 | Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))), |
61 | Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))), |
62 | Expr::Seq(lhs, rhs) => { |
63 | OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) |
64 | } |
65 | Expr::Choice(lhs, rhs) => { |
66 | OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) |
67 | } |
68 | Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))), |
69 | Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))), |
70 | Expr::Skip(strings) => OptimizedExpr::Skip(strings), |
71 | Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))), |
72 | #[cfg (feature = "grammar-extras" )] |
73 | Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag), |
74 | #[cfg (feature = "grammar-extras" )] |
75 | Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))), |
76 | #[cfg (not(feature = "grammar-extras" ))] |
77 | Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule" ), |
78 | Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => { |
79 | unreachable!("No valid transformation to OptimizedRule" ) |
80 | } |
81 | } |
82 | } |
83 | |
84 | OptimizedRule { |
85 | name: rule.name, |
86 | ty: rule.ty, |
87 | expr: to_optimized(rule.expr), |
88 | } |
89 | } |
90 | |
91 | macro_rules! to_hash_map { |
92 | ($func_name:ident, $rule:ty, $expr:ty) => { |
93 | fn $func_name(rules: &[$rule]) -> HashMap<String, $expr> { |
94 | rules |
95 | .iter() |
96 | .map(|r| (r.name.clone(), r.expr.clone())) |
97 | .collect() |
98 | } |
99 | }; |
100 | } |
101 | to_hash_map!(to_hash_map, Rule, Expr); |
102 | to_hash_map!(to_optimized_hash_map, OptimizedRule, OptimizedExpr); |
103 | |
104 | /// The optimized version of the pest AST's `Rule`. |
105 | #[derive (Clone, Debug, Eq, PartialEq)] |
106 | pub struct OptimizedRule { |
107 | /// The name of the rule. |
108 | pub name: String, |
109 | /// The type of the rule. |
110 | pub ty: RuleType, |
111 | /// The optimized expression of the rule. |
112 | pub expr: OptimizedExpr, |
113 | } |
114 | |
115 | /// The optimized version of the pest AST's `Expr`. |
116 | /// |
117 | /// # Warning: Semantic Versioning |
118 | /// There may be non-breaking changes to the meta-grammar |
119 | /// between minor versions. Those non-breaking changes, however, |
120 | /// may translate into semver-breaking changes due to the additional variants |
121 | /// propaged from the `Rule` enum. This is a known issue and will be fixed in the |
122 | /// future (e.g. by increasing MSRV and non_exhaustive annotations). |
123 | #[derive (Clone, Debug, Eq, PartialEq)] |
124 | pub enum OptimizedExpr { |
125 | /// Matches an exact string, e.g. `"a"` |
126 | Str(String), |
127 | /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` |
128 | Insens(String), |
129 | /// Matches one character in the range, e.g. `'a'..'z'` |
130 | Range(String, String), |
131 | /// Matches the rule with the given name, e.g. `a` |
132 | Ident(String), |
133 | /// Matches a custom part of the stack, e.g. `PEEK[..]` |
134 | PeekSlice(i32, Option<i32>), |
135 | /// Positive lookahead; matches expression without making progress, e.g. `&e` |
136 | PosPred(Box<OptimizedExpr>), |
137 | /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` |
138 | NegPred(Box<OptimizedExpr>), |
139 | /// Matches a sequence of two expressions, e.g. `e1 ~ e2` |
140 | Seq(Box<OptimizedExpr>, Box<OptimizedExpr>), |
141 | /// Matches either of two expressions, e.g. `e1 | e2` |
142 | Choice(Box<OptimizedExpr>, Box<OptimizedExpr>), |
143 | /// Optionally matches an expression, e.g. `e?` |
144 | Opt(Box<OptimizedExpr>), |
145 | /// Matches an expression zero or more times, e.g. `e*` |
146 | Rep(Box<OptimizedExpr>), |
147 | /// Matches an expression one or more times, e.g. `e+` |
148 | #[cfg (feature = "grammar-extras" )] |
149 | RepOnce(Box<OptimizedExpr>), |
150 | /// Continues to match expressions until one of the strings in the `Vec` is found |
151 | Skip(Vec<String>), |
152 | /// Matches an expression and pushes it to the stack, e.g. `push(e)` |
153 | Push(Box<OptimizedExpr>), |
154 | /// Matches an expression and assigns a label to it, e.g. #label = exp |
155 | #[cfg (feature = "grammar-extras" )] |
156 | NodeTag(Box<OptimizedExpr>, String), |
157 | /// Restores an expression's checkpoint |
158 | RestoreOnErr(Box<OptimizedExpr>), |
159 | } |
160 | |
161 | impl OptimizedExpr { |
162 | /// Returns a top-down iterator over the `OptimizedExpr`. |
163 | pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator { |
164 | OptimizedExprTopDownIterator::new(self) |
165 | } |
166 | |
167 | /// Applies `f` to the `OptimizedExpr` top-down. |
168 | pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr |
169 | where |
170 | F: FnMut(OptimizedExpr) -> OptimizedExpr, |
171 | { |
172 | fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr |
173 | where |
174 | F: FnMut(OptimizedExpr) -> OptimizedExpr, |
175 | { |
176 | let expr = f(expr); |
177 | |
178 | match expr { |
179 | OptimizedExpr::PosPred(expr) => { |
180 | let mapped = Box::new(map_internal(*expr, f)); |
181 | OptimizedExpr::PosPred(mapped) |
182 | } |
183 | OptimizedExpr::NegPred(expr) => { |
184 | let mapped = Box::new(map_internal(*expr, f)); |
185 | OptimizedExpr::NegPred(mapped) |
186 | } |
187 | OptimizedExpr::Seq(lhs, rhs) => { |
188 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
189 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
190 | OptimizedExpr::Seq(mapped_lhs, mapped_rhs) |
191 | } |
192 | OptimizedExpr::Choice(lhs, rhs) => { |
193 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
194 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
195 | OptimizedExpr::Choice(mapped_lhs, mapped_rhs) |
196 | } |
197 | OptimizedExpr::Rep(expr) => { |
198 | let mapped = Box::new(map_internal(*expr, f)); |
199 | OptimizedExpr::Rep(mapped) |
200 | } |
201 | OptimizedExpr::Opt(expr) => { |
202 | let mapped = Box::new(map_internal(*expr, f)); |
203 | OptimizedExpr::Opt(mapped) |
204 | } |
205 | OptimizedExpr::Push(expr) => { |
206 | let mapped = Box::new(map_internal(*expr, f)); |
207 | OptimizedExpr::Push(mapped) |
208 | } |
209 | expr => expr, |
210 | } |
211 | } |
212 | |
213 | map_internal(self, &mut f) |
214 | } |
215 | |
216 | /// Applies `f` to the `OptimizedExpr` bottom-up. |
217 | pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr |
218 | where |
219 | F: FnMut(OptimizedExpr) -> OptimizedExpr, |
220 | { |
221 | fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr |
222 | where |
223 | F: FnMut(OptimizedExpr) -> OptimizedExpr, |
224 | { |
225 | let mapped = match expr { |
226 | OptimizedExpr::PosPred(expr) => { |
227 | let mapped = Box::new(map_internal(*expr, f)); |
228 | OptimizedExpr::PosPred(mapped) |
229 | } |
230 | OptimizedExpr::NegPred(expr) => { |
231 | let mapped = Box::new(map_internal(*expr, f)); |
232 | OptimizedExpr::NegPred(mapped) |
233 | } |
234 | OptimizedExpr::Seq(lhs, rhs) => { |
235 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
236 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
237 | OptimizedExpr::Seq(mapped_lhs, mapped_rhs) |
238 | } |
239 | OptimizedExpr::Choice(lhs, rhs) => { |
240 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
241 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
242 | OptimizedExpr::Choice(mapped_lhs, mapped_rhs) |
243 | } |
244 | OptimizedExpr::Rep(expr) => { |
245 | let mapped = Box::new(map_internal(*expr, f)); |
246 | OptimizedExpr::Rep(mapped) |
247 | } |
248 | OptimizedExpr::Opt(expr) => { |
249 | let mapped = Box::new(map_internal(*expr, f)); |
250 | OptimizedExpr::Opt(mapped) |
251 | } |
252 | OptimizedExpr::Push(expr) => { |
253 | let mapped = Box::new(map_internal(*expr, f)); |
254 | OptimizedExpr::Push(mapped) |
255 | } |
256 | expr => expr, |
257 | }; |
258 | |
259 | f(mapped) |
260 | } |
261 | |
262 | map_internal(self, &mut f) |
263 | } |
264 | } |
265 | |
266 | impl core::fmt::Display for OptimizedExpr { |
267 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
268 | match self { |
269 | OptimizedExpr::Str(s) => write!(f, " {:?}" , s), |
270 | OptimizedExpr::Insens(s) => write!(f, "^ {:?}" , s), |
271 | OptimizedExpr::Range(start, end) => { |
272 | let start = start.chars().next().expect("Empty range start." ); |
273 | let end = end.chars().next().expect("Empty range end." ); |
274 | write!(f, "( {:?}.. {:?})" , start, end) |
275 | } |
276 | OptimizedExpr::Ident(id) => write!(f, " {}" , id), |
277 | OptimizedExpr::PeekSlice(start, end) => match end { |
278 | Some(end) => write!(f, "PEEK[ {}.. {}]" , start, end), |
279 | None => write!(f, "PEEK[ {}..]" , start), |
280 | }, |
281 | OptimizedExpr::PosPred(expr) => write!(f, "& {}" , expr.as_ref()), |
282 | OptimizedExpr::NegPred(expr) => write!(f, "! {}" , expr.as_ref()), |
283 | OptimizedExpr::Seq(lhs, rhs) => { |
284 | let mut nodes = Vec::new(); |
285 | nodes.push(lhs); |
286 | let mut current = rhs; |
287 | while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() { |
288 | nodes.push(lhs); |
289 | current = rhs; |
290 | } |
291 | nodes.push(current); |
292 | let sequence = nodes |
293 | .iter() |
294 | .map(|node| format!(" {}" , node)) |
295 | .collect::<Vec<_>>() |
296 | .join(" ~ " ); |
297 | write!(f, "( {})" , sequence) |
298 | } |
299 | OptimizedExpr::Choice(lhs, rhs) => { |
300 | let mut nodes = Vec::new(); |
301 | nodes.push(lhs); |
302 | let mut current = rhs; |
303 | while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() { |
304 | nodes.push(lhs); |
305 | current = rhs; |
306 | } |
307 | nodes.push(current); |
308 | let sequence = nodes |
309 | .iter() |
310 | .map(|node| format!(" {}" , node)) |
311 | .collect::<Vec<_>>() |
312 | .join(" | " ); |
313 | write!(f, "( {})" , sequence) |
314 | } |
315 | OptimizedExpr::Opt(expr) => write!(f, " {}?" , expr), |
316 | OptimizedExpr::Rep(expr) => write!(f, " {}*" , expr), |
317 | #[cfg (feature = "grammar-extras" )] |
318 | OptimizedExpr::RepOnce(expr) => write!(f, "{}+" , expr), |
319 | OptimizedExpr::Skip(strings) => { |
320 | let strings = strings |
321 | .iter() |
322 | .map(|s| format!(" {:?}" , s)) |
323 | .collect::<Vec<_>>() |
324 | .join(" | " ); |
325 | write!(f, "(!( {}) ~ ANY)*" , strings) |
326 | } |
327 | OptimizedExpr::Push(expr) => write!(f, "PUSH( {})" , expr), |
328 | #[cfg (feature = "grammar-extras" )] |
329 | OptimizedExpr::NodeTag(expr, tag) => { |
330 | write!(f, "(#{} = {})" , tag, expr) |
331 | } |
332 | OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f), |
333 | } |
334 | } |
335 | } |
336 | |
337 | /// A top-down iterator over an `OptimizedExpr`. |
338 | pub struct OptimizedExprTopDownIterator { |
339 | current: Option<OptimizedExpr>, |
340 | next: Option<OptimizedExpr>, |
341 | right_branches: Vec<OptimizedExpr>, |
342 | } |
343 | |
344 | impl OptimizedExprTopDownIterator { |
345 | /// Creates a new top down iterator from an `OptimizedExpr`. |
346 | pub fn new(expr: &OptimizedExpr) -> Self { |
347 | let mut iter = OptimizedExprTopDownIterator { |
348 | current: None, |
349 | next: None, |
350 | right_branches: vec![], |
351 | }; |
352 | iter.iterate_expr(expr.clone()); |
353 | iter |
354 | } |
355 | |
356 | fn iterate_expr(&mut self, expr: OptimizedExpr) { |
357 | self.current = Some(expr.clone()); |
358 | match expr { |
359 | OptimizedExpr::Seq(lhs, rhs) => { |
360 | self.right_branches.push(*rhs); |
361 | self.next = Some(*lhs); |
362 | } |
363 | OptimizedExpr::Choice(lhs, rhs) => { |
364 | self.right_branches.push(*rhs); |
365 | self.next = Some(*lhs); |
366 | } |
367 | OptimizedExpr::PosPred(expr) |
368 | | OptimizedExpr::NegPred(expr) |
369 | | OptimizedExpr::Rep(expr) |
370 | | OptimizedExpr::Opt(expr) |
371 | | OptimizedExpr::Push(expr) => { |
372 | self.next = Some(*expr); |
373 | } |
374 | _ => { |
375 | self.next = None; |
376 | } |
377 | } |
378 | } |
379 | } |
380 | |
381 | impl Iterator for OptimizedExprTopDownIterator { |
382 | type Item = OptimizedExpr; |
383 | |
384 | fn next(&mut self) -> Option<Self::Item> { |
385 | let result: Option = self.current.take(); |
386 | |
387 | if let Some(expr: OptimizedExpr) = self.next.take() { |
388 | self.iterate_expr(expr); |
389 | } else if let Some(expr: OptimizedExpr) = self.right_branches.pop() { |
390 | self.iterate_expr(expr); |
391 | } |
392 | |
393 | result |
394 | } |
395 | } |
396 | |
397 | #[cfg (test)] |
398 | mod tests { |
399 | use super::*; |
400 | |
401 | #[test ] |
402 | fn rotate() { |
403 | let rules = { |
404 | use crate::ast::Expr::*; |
405 | vec![Rule { |
406 | name: "rule" .to_owned(), |
407 | ty: RuleType::Normal, |
408 | expr: box_tree!(Choice( |
409 | Choice( |
410 | Choice(Str(String::from("a" )), Str(String::from("b" ))), |
411 | Str(String::from("c" )) |
412 | ), |
413 | Str(String::from("d" )) |
414 | )), |
415 | }] |
416 | }; |
417 | let rotated = { |
418 | use crate::optimizer::OptimizedExpr::*; |
419 | vec![OptimizedRule { |
420 | name: "rule" .to_owned(), |
421 | ty: RuleType::Normal, |
422 | expr: box_tree!(Choice( |
423 | Str(String::from("a" )), |
424 | Choice( |
425 | Str(String::from("b" )), |
426 | Choice(Str(String::from("c" )), Str(String::from("d" ))) |
427 | ) |
428 | )), |
429 | }] |
430 | }; |
431 | |
432 | assert_eq!(optimize(rules), rotated); |
433 | } |
434 | |
435 | #[test ] |
436 | fn skip() { |
437 | let rules = { |
438 | use crate::ast::Expr::*; |
439 | vec![Rule { |
440 | name: "rule" .to_owned(), |
441 | ty: RuleType::Atomic, |
442 | expr: box_tree!(Rep(Seq( |
443 | NegPred(Choice(Str(String::from("a" )), Str(String::from("b" )))), |
444 | Ident(String::from("ANY" )) |
445 | ))), |
446 | }] |
447 | }; |
448 | let skipped = vec![OptimizedRule { |
449 | name: "rule" .to_owned(), |
450 | ty: RuleType::Atomic, |
451 | expr: OptimizedExpr::Skip(vec![String::from("a" ), String::from("b" )]), |
452 | }]; |
453 | |
454 | assert_eq!(optimize(rules), skipped); |
455 | } |
456 | |
457 | #[test ] |
458 | fn concat_strings() { |
459 | let rules = { |
460 | use crate::ast::Expr::*; |
461 | vec![Rule { |
462 | name: "rule" .to_owned(), |
463 | ty: RuleType::Atomic, |
464 | expr: box_tree!(Seq( |
465 | Seq(Str(String::from("a" )), Str(String::from("b" ))), |
466 | Seq(Str(String::from("c" )), Str(String::from("d" ))) |
467 | )), |
468 | }] |
469 | }; |
470 | let concatenated = vec![OptimizedRule { |
471 | name: "rule" .to_owned(), |
472 | ty: RuleType::Atomic, |
473 | expr: OptimizedExpr::Str(String::from("abcd" )), |
474 | }]; |
475 | |
476 | assert_eq!(optimize(rules), concatenated); |
477 | } |
478 | |
479 | #[test ] |
480 | fn unroll_loop_exact() { |
481 | let rules = vec![Rule { |
482 | name: "rule" .to_owned(), |
483 | ty: RuleType::Atomic, |
484 | expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a" ))), 3), |
485 | }]; |
486 | let unrolled = { |
487 | use crate::optimizer::OptimizedExpr::*; |
488 | vec![OptimizedRule { |
489 | name: "rule" .to_owned(), |
490 | ty: RuleType::Atomic, |
491 | expr: box_tree!(Seq( |
492 | Ident(String::from("a" )), |
493 | Seq(Ident(String::from("a" )), Ident(String::from("a" ))) |
494 | )), |
495 | }] |
496 | }; |
497 | |
498 | assert_eq!(optimize(rules), unrolled); |
499 | } |
500 | |
501 | #[test ] |
502 | fn unroll_loop_max() { |
503 | let rules = vec![Rule { |
504 | name: "rule" .to_owned(), |
505 | ty: RuleType::Atomic, |
506 | expr: Expr::RepMax(Box::new(Expr::Str("a" .to_owned())), 3), |
507 | }]; |
508 | let unrolled = { |
509 | use crate::optimizer::OptimizedExpr::*; |
510 | vec![OptimizedRule { |
511 | name: "rule" .to_owned(), |
512 | ty: RuleType::Atomic, |
513 | expr: box_tree!(Seq( |
514 | Opt(Str(String::from("a" ))), |
515 | Seq(Opt(Str(String::from("a" ))), Opt(Str(String::from("a" )))) |
516 | )), |
517 | }] |
518 | }; |
519 | |
520 | assert_eq!(optimize(rules), unrolled); |
521 | } |
522 | |
523 | #[test ] |
524 | fn unroll_loop_min() { |
525 | let rules = vec![Rule { |
526 | name: "rule" .to_owned(), |
527 | ty: RuleType::Atomic, |
528 | expr: Expr::RepMin(Box::new(Expr::Str("a" .to_owned())), 2), |
529 | }]; |
530 | let unrolled = { |
531 | use crate::optimizer::OptimizedExpr::*; |
532 | vec![OptimizedRule { |
533 | name: "rule" .to_owned(), |
534 | ty: RuleType::Atomic, |
535 | expr: box_tree!(Seq( |
536 | Str(String::from("a" )), |
537 | Seq(Str(String::from("a" )), Rep(Str(String::from("a" )))) |
538 | )), |
539 | }] |
540 | }; |
541 | |
542 | assert_eq!(optimize(rules), unrolled); |
543 | } |
544 | |
545 | #[test ] |
546 | fn unroll_loop_min_max() { |
547 | let rules = vec![Rule { |
548 | name: "rule" .to_owned(), |
549 | ty: RuleType::Atomic, |
550 | expr: Expr::RepMinMax(Box::new(Expr::Str("a" .to_owned())), 2, 3), |
551 | }]; |
552 | let unrolled = { |
553 | use crate::optimizer::OptimizedExpr::*; |
554 | vec![OptimizedRule { |
555 | name: "rule" .to_owned(), |
556 | ty: RuleType::Atomic, |
557 | /* TODO possible room for improvement here: |
558 | * if the sequences were rolled out in the opposite |
559 | * order, we could further optimize the strings |
560 | * in cases like this. |
561 | Str(String::from(("aa")), |
562 | Opt(Str(String::from("a"))) |
563 | */ |
564 | expr: box_tree!(Seq( |
565 | Str(String::from("a" )), |
566 | Seq(Str(String::from("a" )), Opt(Str(String::from("a" )))) |
567 | )), |
568 | }] |
569 | }; |
570 | |
571 | assert_eq!(optimize(rules), unrolled); |
572 | } |
573 | |
574 | #[test ] |
575 | fn concat_insensitive_strings() { |
576 | let rules = { |
577 | use crate::ast::Expr::*; |
578 | vec![Rule { |
579 | name: "rule" .to_owned(), |
580 | ty: RuleType::Atomic, |
581 | expr: box_tree!(Seq( |
582 | Seq(Insens(String::from("a" )), Insens(String::from("b" ))), |
583 | Seq(Insens(String::from("c" )), Insens(String::from("d" ))) |
584 | )), |
585 | }] |
586 | }; |
587 | let concatenated = vec![OptimizedRule { |
588 | name: "rule" .to_owned(), |
589 | ty: RuleType::Atomic, |
590 | expr: OptimizedExpr::Insens(String::from("abcd" )), |
591 | }]; |
592 | |
593 | assert_eq!(optimize(rules), concatenated); |
594 | } |
595 | |
596 | #[test ] |
597 | fn long_common_sequence() { |
598 | let rules = { |
599 | use crate::ast::Expr::*; |
600 | vec![Rule { |
601 | name: "rule" .to_owned(), |
602 | ty: RuleType::Silent, |
603 | expr: box_tree!(Choice( |
604 | Seq( |
605 | Ident(String::from("a" )), |
606 | Seq(Ident(String::from("b" )), Ident(String::from("c" ))) |
607 | ), |
608 | Seq( |
609 | Seq(Ident(String::from("a" )), Ident(String::from("b" ))), |
610 | Ident(String::from("d" )) |
611 | ) |
612 | )), |
613 | }] |
614 | }; |
615 | let optimized = { |
616 | use crate::optimizer::OptimizedExpr::*; |
617 | vec![OptimizedRule { |
618 | name: "rule" .to_owned(), |
619 | ty: RuleType::Silent, |
620 | expr: box_tree!(Seq( |
621 | Ident(String::from("a" )), |
622 | Seq( |
623 | Ident(String::from("b" )), |
624 | Choice(Ident(String::from("c" )), Ident(String::from("d" ))) |
625 | ) |
626 | )), |
627 | }] |
628 | }; |
629 | |
630 | assert_eq!(optimize(rules), optimized); |
631 | } |
632 | |
633 | #[test ] |
634 | fn short_common_sequence() { |
635 | let rules = { |
636 | use crate::ast::Expr::*; |
637 | vec![Rule { |
638 | name: "rule" .to_owned(), |
639 | ty: RuleType::Atomic, |
640 | expr: box_tree!(Choice( |
641 | Seq(Ident(String::from("a" )), Ident(String::from("b" ))), |
642 | Ident(String::from("a" )) |
643 | )), |
644 | }] |
645 | }; |
646 | let optimized = { |
647 | use crate::optimizer::OptimizedExpr::*; |
648 | vec![OptimizedRule { |
649 | name: "rule" .to_owned(), |
650 | ty: RuleType::Atomic, |
651 | expr: box_tree!(Seq(Ident(String::from("a" )), Opt(Ident(String::from("b" ))))), |
652 | }] |
653 | }; |
654 | |
655 | assert_eq!(optimize(rules), optimized); |
656 | } |
657 | |
658 | #[test ] |
659 | fn impossible_common_sequence() { |
660 | let rules = { |
661 | use crate::ast::Expr::*; |
662 | vec![Rule { |
663 | name: "rule" .to_owned(), |
664 | ty: RuleType::Silent, |
665 | expr: box_tree!(Choice( |
666 | Ident(String::from("a" )), |
667 | Seq(Ident(String::from("a" )), Ident(String::from("b" ))) |
668 | )), |
669 | }] |
670 | }; |
671 | let optimized = { |
672 | use crate::optimizer::OptimizedExpr::*; |
673 | vec![OptimizedRule { |
674 | name: "rule" .to_owned(), |
675 | ty: RuleType::Silent, |
676 | expr: box_tree!(Ident(String::from("a" ))), |
677 | }] |
678 | }; |
679 | |
680 | assert_eq!(optimize(rules), optimized); |
681 | } |
682 | |
683 | #[test ] |
684 | fn lister() { |
685 | let rules = { |
686 | use crate::ast::Expr::*; |
687 | vec![Rule { |
688 | name: "rule" .to_owned(), |
689 | ty: RuleType::Silent, |
690 | expr: box_tree!(Seq( |
691 | Rep(Seq(Ident(String::from("a" )), Ident(String::from("b" )))), |
692 | Ident(String::from("a" )) |
693 | )), |
694 | }] |
695 | }; |
696 | let optimized = { |
697 | use crate::optimizer::OptimizedExpr::*; |
698 | vec![OptimizedRule { |
699 | name: "rule" .to_owned(), |
700 | ty: RuleType::Silent, |
701 | expr: box_tree!(Seq( |
702 | Ident(String::from("a" )), |
703 | Rep(Seq(Ident(String::from("b" )), Ident(String::from("a" )))) |
704 | )), |
705 | }] |
706 | }; |
707 | |
708 | assert_eq!(optimize(rules), optimized); |
709 | } |
710 | |
711 | mod display { |
712 | use super::super::*; |
713 | /// In previous implementation of Display for OptimizedExpr |
714 | /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me), |
715 | /// Str("\n") will be displayed as |
716 | /// " |
717 | /// " |
718 | /// |
719 | /// It will not break the compilation in normal use. |
720 | /// |
721 | /// But when I use it in automatically generating documents, |
722 | /// it will quite confusing and we'll be unable to distinguish \n and \r. |
723 | /// |
724 | /// And `cargo expand` will emit codes that can't be compiled, |
725 | /// for it expand `#[doc("...")]` to `/// ...`, |
726 | /// and when the document comment breaks the line, |
727 | /// it will be expanded into wrong codes. |
728 | #[test ] |
729 | fn control_character() { |
730 | assert_eq!(OptimizedExpr::Str(" \n" .to_owned()).to_string(), " \"\\n \"" ); |
731 | assert_eq!( |
732 | OptimizedExpr::Insens(" \n" .to_owned()).to_string(), |
733 | "^ \"\\n \"" , |
734 | ); |
735 | assert_eq!( |
736 | OptimizedExpr::Range(" \n" .to_owned(), " \r" .to_owned()).to_string(), |
737 | "(' \\n'..' \\r')" , |
738 | ); |
739 | assert_eq!( |
740 | OptimizedExpr::Skip(vec![ |
741 | " \n" .to_owned(), |
742 | " \r" .to_owned(), |
743 | " \n\r" .to_owned(), |
744 | " \0" .to_owned(), |
745 | ]) |
746 | .to_string(), |
747 | r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"# , |
748 | ); |
749 | |
750 | assert_ne!(OptimizedExpr::Str(" \n" .to_owned()).to_string(), " \"\n\"" ); |
751 | } |
752 | |
753 | #[test ] |
754 | fn str() { |
755 | assert_eq!(OptimizedExpr::Str("a" .to_owned()).to_string(), r#""a""# ); |
756 | } |
757 | |
758 | #[test ] |
759 | fn insens() { |
760 | assert_eq!(OptimizedExpr::Insens("a" .to_owned()).to_string(), r#"^"a""# ); |
761 | } |
762 | |
763 | #[test ] |
764 | fn range() { |
765 | assert_eq!( |
766 | OptimizedExpr::Range("a" .to_owned(), "z" .to_owned()).to_string(), |
767 | r#"('a'..'z')"# , |
768 | ); |
769 | } |
770 | |
771 | #[test ] |
772 | fn ident() { |
773 | assert_eq!(OptimizedExpr::Ident("a" .to_owned()).to_string(), r#"a"# ); |
774 | } |
775 | |
776 | #[test ] |
777 | fn peek_slice() { |
778 | assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]" ); |
779 | assert_eq!( |
780 | OptimizedExpr::PeekSlice(0, Some(-1)).to_string(), |
781 | "PEEK[0..-1]" , |
782 | ); |
783 | assert_eq!( |
784 | OptimizedExpr::PeekSlice(2, Some(3)).to_string(), |
785 | "PEEK[2..3]" , |
786 | ); |
787 | assert_eq!( |
788 | OptimizedExpr::PeekSlice(2, Some(-1)).to_string(), |
789 | "PEEK[2..-1]" , |
790 | ); |
791 | assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]" ); |
792 | } |
793 | |
794 | #[test ] |
795 | fn pos_pred() { |
796 | assert_eq!( |
797 | OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new( |
798 | OptimizedExpr::Ident("a" .to_owned()), |
799 | )))) |
800 | .to_string(), |
801 | "&!a" , |
802 | ); |
803 | assert_eq!( |
804 | OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice( |
805 | Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident( |
806 | "a" .to_owned(), |
807 | )))), |
808 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
809 | ))) |
810 | .to_string(), |
811 | r#"&(a* | "a")"# , |
812 | ); |
813 | assert_eq!( |
814 | OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new( |
815 | OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a" .to_owned()))), |
816 | )))) |
817 | .to_string(), |
818 | "&!a" , |
819 | ); |
820 | } |
821 | |
822 | #[test ] |
823 | fn neg_pred() { |
824 | assert_eq!( |
825 | OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e" .to_owned()))).to_string(), |
826 | r#"!e"# , |
827 | ); |
828 | assert_eq!( |
829 | OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice( |
830 | Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident( |
831 | "a" .to_owned(), |
832 | )))), |
833 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
834 | ))) |
835 | .to_string(), |
836 | r#"!(PUSH(a) | "a")"# , |
837 | ); |
838 | } |
839 | |
840 | #[test ] |
841 | fn seq() { |
842 | assert_eq!( |
843 | OptimizedExpr::Seq( |
844 | Box::new(OptimizedExpr::Ident("e1" .to_owned())), |
845 | Box::new(OptimizedExpr::Ident("e2" .to_owned())), |
846 | ) |
847 | .to_string(), |
848 | r#"(e1 ~ e2)"# , |
849 | ); |
850 | assert_eq!( |
851 | Expr::Seq( |
852 | Box::new(Expr::Ident("e1" .to_owned())), |
853 | Box::new(Expr::Seq( |
854 | Box::new(Expr::Ident("e2" .to_owned())), |
855 | Box::new(Expr::Ident("e3" .to_owned())), |
856 | )), |
857 | ) |
858 | .to_string(), |
859 | "(e1 ~ e2 ~ e3)" , |
860 | ); |
861 | assert_eq!( |
862 | Expr::Seq( |
863 | Box::new(Expr::Ident("e1" .to_owned())), |
864 | Box::new(Expr::Seq( |
865 | Box::new(Expr::Ident("e2" .to_owned())), |
866 | Box::new(Expr::Seq( |
867 | Box::new(Expr::Ident("e3" .to_owned())), |
868 | Box::new(Expr::Ident("e4" .to_owned())), |
869 | )), |
870 | )), |
871 | ) |
872 | .to_string(), |
873 | "(e1 ~ e2 ~ e3 ~ e4)" , |
874 | ); |
875 | assert_eq!( |
876 | Expr::Seq( |
877 | Box::new(Expr::Ident("e1" .to_owned())), |
878 | Box::new(Expr::Choice( |
879 | Box::new(Expr::Ident("e2" .to_owned())), |
880 | Box::new(Expr::Seq( |
881 | Box::new(Expr::Ident("e3" .to_owned())), |
882 | Box::new(Expr::Ident("e4" .to_owned())), |
883 | )), |
884 | )), |
885 | ) |
886 | .to_string(), |
887 | "(e1 ~ (e2 | (e3 ~ e4)))" , |
888 | ); |
889 | assert_eq!( |
890 | Expr::Seq( |
891 | Box::new(Expr::Ident("e1" .to_owned())), |
892 | Box::new(Expr::Seq( |
893 | Box::new(Expr::Ident("e2" .to_owned())), |
894 | Box::new(Expr::Choice( |
895 | Box::new(Expr::Ident("e3" .to_owned())), |
896 | Box::new(Expr::Ident("e4" .to_owned())), |
897 | )), |
898 | )), |
899 | ) |
900 | .to_string(), |
901 | "(e1 ~ e2 ~ (e3 | e4))" , |
902 | ); |
903 | assert_eq!( |
904 | OptimizedExpr::Seq( |
905 | Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str( |
906 | "a" .to_owned(), |
907 | )))), |
908 | Box::new(OptimizedExpr::Seq( |
909 | Box::new(OptimizedExpr::Ident("b" .to_owned())), |
910 | Box::new(OptimizedExpr::Insens("c" .to_owned())), |
911 | )), |
912 | ) |
913 | .to_string(), |
914 | r#"("a"* ~ b ~ ^"c")"# , |
915 | ); |
916 | assert_eq!( |
917 | OptimizedExpr::Seq( |
918 | Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range( |
919 | "a" .to_owned(), |
920 | "z" .to_owned(), |
921 | )))), |
922 | Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt( |
923 | Box::new(OptimizedExpr::Range("A" .to_owned(), "Z" .to_owned())), |
924 | )))), |
925 | ) |
926 | .to_string(), |
927 | "(&('a'..'z') ~ !('A'..'Z')?)" , |
928 | ); |
929 | } |
930 | |
931 | #[test ] |
932 | fn choice() { |
933 | assert_eq!( |
934 | OptimizedExpr::Choice( |
935 | Box::new(OptimizedExpr::Ident("e1" .to_owned())), |
936 | Box::new(OptimizedExpr::Ident("e2" .to_owned())), |
937 | ) |
938 | .to_string(), |
939 | r#"(e1 | e2)"# , |
940 | ); |
941 | assert_eq!( |
942 | Expr::Choice( |
943 | Box::new(Expr::Ident("e1" .to_owned())), |
944 | Box::new(Expr::Choice( |
945 | Box::new(Expr::Ident("e2" .to_owned())), |
946 | Box::new(Expr::Ident("e3" .to_owned())), |
947 | )), |
948 | ) |
949 | .to_string(), |
950 | "(e1 | e2 | e3)" , |
951 | ); |
952 | assert_eq!( |
953 | Expr::Choice( |
954 | Box::new(Expr::Ident("e1" .to_owned())), |
955 | Box::new(Expr::Choice( |
956 | Box::new(Expr::Ident("e2" .to_owned())), |
957 | Box::new(Expr::Choice( |
958 | Box::new(Expr::Ident("e3" .to_owned())), |
959 | Box::new(Expr::Ident("e4" .to_owned())), |
960 | )), |
961 | )), |
962 | ) |
963 | .to_string(), |
964 | "(e1 | e2 | e3 | e4)" , |
965 | ); |
966 | assert_eq!( |
967 | Expr::Choice( |
968 | Box::new(Expr::Ident("e1" .to_owned())), |
969 | Box::new(Expr::Seq( |
970 | Box::new(Expr::Ident("e2" .to_owned())), |
971 | Box::new(Expr::Choice( |
972 | Box::new(Expr::Ident("e3" .to_owned())), |
973 | Box::new(Expr::Ident("e4" .to_owned())), |
974 | )), |
975 | )), |
976 | ) |
977 | .to_string(), |
978 | "(e1 | (e2 ~ (e3 | e4)))" , |
979 | ); |
980 | assert_eq!( |
981 | OptimizedExpr::Choice( |
982 | Box::new(OptimizedExpr::Str("a" .to_owned())), |
983 | Box::new(OptimizedExpr::Choice( |
984 | Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident( |
985 | "b" .to_owned(), |
986 | )))), |
987 | Box::new(OptimizedExpr::Insens("c" .to_owned())), |
988 | )), |
989 | ) |
990 | .to_string(), |
991 | r#"("a" | PUSH(b) | ^"c")"# , |
992 | ); |
993 | } |
994 | |
995 | #[test ] |
996 | fn opt() { |
997 | assert_eq!( |
998 | OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e" .to_owned()))).to_string(), |
999 | "e?" , |
1000 | ); |
1001 | } |
1002 | |
1003 | #[test ] |
1004 | fn rep() { |
1005 | assert_eq!( |
1006 | OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x" .to_owned()))).to_string(), |
1007 | "x*" , |
1008 | ); |
1009 | assert_eq!( |
1010 | OptimizedExpr::Rep(Box::new(OptimizedExpr::Range( |
1011 | "0" .to_owned(), |
1012 | "9" .to_owned(), |
1013 | ))) |
1014 | .to_string(), |
1015 | "('0'..'9')*" , |
1016 | ); |
1017 | } |
1018 | |
1019 | #[test ] |
1020 | #[cfg (feature = "grammar-extras" )] |
1021 | fn rep_once() { |
1022 | assert_eq!( |
1023 | OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e" .to_owned()))).to_string(), |
1024 | "e+" , |
1025 | ); |
1026 | assert_eq!( |
1027 | OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range( |
1028 | "0" .to_owned(), |
1029 | "9" .to_owned(), |
1030 | ))) |
1031 | .to_string(), |
1032 | "('0'..'9')+" , |
1033 | ); |
1034 | } |
1035 | |
1036 | #[test ] |
1037 | fn skip() { |
1038 | assert_eq!( |
1039 | OptimizedExpr::Skip( |
1040 | ["a" , "bc" ] |
1041 | .into_iter() |
1042 | .map(|s| s.to_owned()) |
1043 | .collect::<Vec<_>>(), |
1044 | ) |
1045 | .to_string(), |
1046 | r#"(!("a" | "bc") ~ ANY)*"# , |
1047 | ); |
1048 | } |
1049 | |
1050 | #[test ] |
1051 | fn inline_skip() { |
1052 | use crate::ast::Expr::*; |
1053 | let rules = vec![ |
1054 | Rule { |
1055 | name: "inline" .to_owned(), |
1056 | ty: RuleType::Atomic, |
1057 | expr: Str("a" .to_owned()), |
1058 | }, |
1059 | Rule { |
1060 | name: "skip" .to_owned(), |
1061 | ty: RuleType::Atomic, |
1062 | expr: box_tree!(Rep(Seq( |
1063 | NegPred(Choice( |
1064 | Ident(String::from("inline" )), |
1065 | Str(String::from("b" )) |
1066 | )), |
1067 | Ident("ANY" .to_owned()) |
1068 | ))), |
1069 | }, |
1070 | ]; |
1071 | let map = to_hash_map(&rules); |
1072 | let rule = skipper::skip(rules[1].clone(), &map); |
1073 | assert!(matches!(rule, Rule { expr: Skip(..), .. })); |
1074 | let choices = match rule.expr { |
1075 | Skip(choices) => choices, |
1076 | _ => unreachable!(), |
1077 | }; |
1078 | assert_eq!(choices, vec!["a" .to_owned(), "b" .to_owned()]); |
1079 | } |
1080 | |
1081 | #[test ] |
1082 | fn push() { |
1083 | assert_eq!( |
1084 | OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e" .to_owned()))).to_string(), |
1085 | "PUSH(e)" , |
1086 | ); |
1087 | } |
1088 | |
1089 | #[test ] |
1090 | #[cfg (feature = "grammar-extras" )] |
1091 | fn node_tag() { |
1092 | assert_eq!( |
1093 | OptimizedExpr::NodeTag( |
1094 | Box::new(OptimizedExpr::Ident("expr" .to_owned())), |
1095 | "label" .to_owned(), |
1096 | ) |
1097 | .to_string(), |
1098 | r#"(#label = expr)"# , |
1099 | ); |
1100 | assert_eq!( |
1101 | OptimizedExpr::NodeTag( |
1102 | Box::new(OptimizedExpr::Ident("x" .to_owned())), |
1103 | "X" .to_owned(), |
1104 | ) |
1105 | .to_string(), |
1106 | r#"(#X = x)"# , |
1107 | ); |
1108 | assert_eq!( |
1109 | OptimizedExpr::NodeTag( |
1110 | Box::new(OptimizedExpr::Seq( |
1111 | Box::new(OptimizedExpr::Ident("x" .to_owned())), |
1112 | Box::new(OptimizedExpr::Str("y" .to_owned())), |
1113 | )), |
1114 | "X" .to_owned(), |
1115 | ) |
1116 | .to_string(), |
1117 | r#"(#X = (x ~ "y"))"# , |
1118 | ); |
1119 | } |
1120 | |
1121 | #[test ] |
1122 | fn restore_on_err() { |
1123 | assert_eq!( |
1124 | OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e" .to_owned()))) |
1125 | .to_string(), |
1126 | "e" , |
1127 | ); |
1128 | } |
1129 | } |
1130 | } |
1131 | |