1 | // pest. The Elegant Parser |
2 | // Copyright (c) 2018 Dragoș Tiselice |
3 | // |
4 | // Licensed under the Apache License, Version 2.0 |
5 | // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
6 | // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
7 | // option. All files in the project carrying such notice may not be copied, |
8 | // modified, or distributed except according to those terms. |
9 | |
10 | //! Types for the pest's abstract syntax tree. |
11 | |
12 | /// A grammar rule |
13 | #[derive (Clone, Debug, Eq, PartialEq)] |
14 | pub struct Rule { |
15 | /// The name of the rule |
16 | pub name: String, |
17 | /// The rule's type (silent, atomic, ...) |
18 | pub ty: RuleType, |
19 | /// The rule's expression |
20 | pub expr: Expr, |
21 | } |
22 | |
23 | /// All possible rule types |
24 | #[derive (Clone, Copy, Debug, Eq, PartialEq)] |
25 | pub enum RuleType { |
26 | /// The normal rule type |
27 | Normal, |
28 | /// Silent rules are just like normal rules |
29 | /// — when run, they function the same way — |
30 | /// except they do not produce pairs or tokens. |
31 | /// If a rule is silent, it will never appear in a parse result. |
32 | /// (their syntax is `_{ ... }`) |
33 | Silent, |
34 | /// atomic rule prevent implicit whitespace: inside an atomic rule, |
35 | /// the tilde ~ means "immediately followed by", |
36 | /// and repetition operators (asterisk * and plus sign +) |
37 | /// have no implicit separation. In addition, all other rules |
38 | /// called from an atomic rule are also treated as atomic. |
39 | /// In an atomic rule, interior matching rules are silent. |
40 | /// (their syntax is `@{ ... }`) |
41 | Atomic, |
42 | /// Compound atomic rules are similar to atomic rules, |
43 | /// but they produce inner tokens as normal. |
44 | /// (their syntax is `${ ... }`) |
45 | CompoundAtomic, |
46 | /// Non-atomic rules cancel the effect of atomic rules. |
47 | /// (their syntax is `!{ ... }`) |
48 | NonAtomic, |
49 | } |
50 | |
51 | /// All possible rule expressions |
52 | /// |
53 | /// # Warning: Semantic Versioning |
54 | /// There may be non-breaking changes to the meta-grammar |
55 | /// between minor versions. Those non-breaking changes, however, |
56 | /// may translate into semver-breaking changes due to the additional variants |
57 | /// propaged from the `Rule` enum. This is a known issue and will be fixed in the |
58 | /// future (e.g. by increasing MSRV and non_exhaustive annotations). |
59 | #[derive (Clone, Debug, Eq, PartialEq)] |
60 | pub enum Expr { |
61 | /// Matches an exact string, e.g. `"a"` |
62 | Str(String), |
63 | /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` |
64 | Insens(String), |
65 | /// Matches one character in the range, e.g. `'a'..'z'` |
66 | Range(String, String), |
67 | /// Matches the rule with the given name, e.g. `a` |
68 | Ident(String), |
69 | /// Matches a custom part of the stack, e.g. `PEEK[..]` |
70 | PeekSlice(i32, Option<i32>), |
71 | /// Positive lookahead; matches expression without making progress, e.g. `&e` |
72 | PosPred(Box<Expr>), |
73 | /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` |
74 | NegPred(Box<Expr>), |
75 | /// Matches a sequence of two expressions, e.g. `e1 ~ e2` |
76 | Seq(Box<Expr>, Box<Expr>), |
77 | /// Matches either of two expressions, e.g. `e1 | e2` |
78 | Choice(Box<Expr>, Box<Expr>), |
79 | /// Optionally matches an expression, e.g. `e?` |
80 | Opt(Box<Expr>), |
81 | /// Matches an expression zero or more times, e.g. `e*` |
82 | Rep(Box<Expr>), |
83 | /// Matches an expression one or more times, e.g. `e+` |
84 | RepOnce(Box<Expr>), |
85 | /// Matches an expression an exact number of times, e.g. `e{n}` |
86 | RepExact(Box<Expr>, u32), |
87 | /// Matches an expression at least a number of times, e.g. `e{n,}` |
88 | RepMin(Box<Expr>, u32), |
89 | /// Matches an expression at most a number of times, e.g. `e{,n}` |
90 | RepMax(Box<Expr>, u32), |
91 | /// Matches an expression a number of times within a range, e.g. `e{m, n}` |
92 | RepMinMax(Box<Expr>, u32, u32), |
93 | /// Continues to match expressions until one of the strings in the `Vec` is found |
94 | Skip(Vec<String>), |
95 | /// Matches an expression and pushes it to the stack, e.g. `push(e)` |
96 | Push(Box<Expr>), |
97 | /// Matches an expression and assigns a label to it, e.g. #label = exp |
98 | #[cfg (feature = "grammar-extras" )] |
99 | NodeTag(Box<Expr>, String), |
100 | } |
101 | |
102 | impl Expr { |
103 | /// Returns the iterator that steps the expression from top to bottom. |
104 | pub fn iter_top_down(&self) -> ExprTopDownIterator { |
105 | ExprTopDownIterator::new(self) |
106 | } |
107 | |
108 | /// Applies `f` to the expression and all its children (top to bottom). |
109 | pub fn map_top_down<F>(self, mut f: F) -> Expr |
110 | where |
111 | F: FnMut(Expr) -> Expr, |
112 | { |
113 | fn map_internal<F>(expr: Expr, f: &mut F) -> Expr |
114 | where |
115 | F: FnMut(Expr) -> Expr, |
116 | { |
117 | let expr = f(expr); |
118 | |
119 | match expr { |
120 | Expr::PosPred(expr) => { |
121 | let mapped = Box::new(map_internal(*expr, f)); |
122 | Expr::PosPred(mapped) |
123 | } |
124 | Expr::NegPred(expr) => { |
125 | let mapped = Box::new(map_internal(*expr, f)); |
126 | Expr::NegPred(mapped) |
127 | } |
128 | Expr::Seq(lhs, rhs) => { |
129 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
130 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
131 | Expr::Seq(mapped_lhs, mapped_rhs) |
132 | } |
133 | Expr::Choice(lhs, rhs) => { |
134 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
135 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
136 | Expr::Choice(mapped_lhs, mapped_rhs) |
137 | } |
138 | Expr::Rep(expr) => { |
139 | let mapped = Box::new(map_internal(*expr, f)); |
140 | Expr::Rep(mapped) |
141 | } |
142 | Expr::RepOnce(expr) => { |
143 | let mapped = Box::new(map_internal(*expr, f)); |
144 | Expr::RepOnce(mapped) |
145 | } |
146 | Expr::RepExact(expr, max) => { |
147 | let mapped = Box::new(map_internal(*expr, f)); |
148 | Expr::RepExact(mapped, max) |
149 | } |
150 | Expr::RepMin(expr, num) => { |
151 | let mapped = Box::new(map_internal(*expr, f)); |
152 | Expr::RepMin(mapped, num) |
153 | } |
154 | Expr::RepMax(expr, num) => { |
155 | let mapped = Box::new(map_internal(*expr, f)); |
156 | Expr::RepMax(mapped, num) |
157 | } |
158 | Expr::RepMinMax(expr, min, max) => { |
159 | let mapped = Box::new(map_internal(*expr, f)); |
160 | Expr::RepMinMax(mapped, min, max) |
161 | } |
162 | Expr::Opt(expr) => { |
163 | let mapped = Box::new(map_internal(*expr, f)); |
164 | Expr::Opt(mapped) |
165 | } |
166 | Expr::Push(expr) => { |
167 | let mapped = Box::new(map_internal(*expr, f)); |
168 | Expr::Push(mapped) |
169 | } |
170 | #[cfg (feature = "grammar-extras" )] |
171 | Expr::NodeTag(expr, tag) => { |
172 | let mapped = Box::new(map_internal(*expr, f)); |
173 | Expr::NodeTag(mapped, tag) |
174 | } |
175 | expr => expr, |
176 | } |
177 | } |
178 | |
179 | map_internal(self, &mut f) |
180 | } |
181 | |
182 | /// Applies `f` to the expression and all its children (bottom up). |
183 | pub fn map_bottom_up<F>(self, mut f: F) -> Expr |
184 | where |
185 | F: FnMut(Expr) -> Expr, |
186 | { |
187 | fn map_internal<F>(expr: Expr, f: &mut F) -> Expr |
188 | where |
189 | F: FnMut(Expr) -> Expr, |
190 | { |
191 | let mapped = match expr { |
192 | Expr::PosPred(expr) => { |
193 | let mapped = Box::new(map_internal(*expr, f)); |
194 | Expr::PosPred(mapped) |
195 | } |
196 | Expr::NegPred(expr) => { |
197 | let mapped = Box::new(map_internal(*expr, f)); |
198 | Expr::NegPred(mapped) |
199 | } |
200 | Expr::Seq(lhs, rhs) => { |
201 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
202 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
203 | Expr::Seq(mapped_lhs, mapped_rhs) |
204 | } |
205 | Expr::Choice(lhs, rhs) => { |
206 | let mapped_lhs = Box::new(map_internal(*lhs, f)); |
207 | let mapped_rhs = Box::new(map_internal(*rhs, f)); |
208 | Expr::Choice(mapped_lhs, mapped_rhs) |
209 | } |
210 | Expr::Rep(expr) => { |
211 | let mapped = Box::new(map_internal(*expr, f)); |
212 | Expr::Rep(mapped) |
213 | } |
214 | Expr::RepOnce(expr) => { |
215 | let mapped = Box::new(map_internal(*expr, f)); |
216 | Expr::RepOnce(mapped) |
217 | } |
218 | Expr::RepExact(expr, num) => { |
219 | let mapped = Box::new(map_internal(*expr, f)); |
220 | Expr::RepExact(mapped, num) |
221 | } |
222 | Expr::RepMin(expr, max) => { |
223 | let mapped = Box::new(map_internal(*expr, f)); |
224 | Expr::RepMin(mapped, max) |
225 | } |
226 | Expr::RepMax(expr, max) => { |
227 | let mapped = Box::new(map_internal(*expr, f)); |
228 | Expr::RepMax(mapped, max) |
229 | } |
230 | Expr::RepMinMax(expr, min, max) => { |
231 | let mapped = Box::new(map_internal(*expr, f)); |
232 | Expr::RepMinMax(mapped, min, max) |
233 | } |
234 | Expr::Opt(expr) => { |
235 | let mapped = Box::new(map_internal(*expr, f)); |
236 | Expr::Opt(mapped) |
237 | } |
238 | Expr::Push(expr) => { |
239 | let mapped = Box::new(map_internal(*expr, f)); |
240 | Expr::Push(mapped) |
241 | } |
242 | #[cfg (feature = "grammar-extras" )] |
243 | Expr::NodeTag(expr, tag) => { |
244 | let mapped = Box::new(map_internal(*expr, f)); |
245 | Expr::NodeTag(mapped, tag) |
246 | } |
247 | expr => expr, |
248 | }; |
249 | |
250 | f(mapped) |
251 | } |
252 | |
253 | map_internal(self, &mut f) |
254 | } |
255 | } |
256 | |
257 | /// The top down iterator for an expression. |
258 | pub struct ExprTopDownIterator { |
259 | current: Option<Expr>, |
260 | next: Option<Expr>, |
261 | right_branches: Vec<Expr>, |
262 | } |
263 | |
264 | impl ExprTopDownIterator { |
265 | /// Constructs a top-down iterator from the expression. |
266 | pub fn new(expr: &Expr) -> Self { |
267 | let mut iter = ExprTopDownIterator { |
268 | current: None, |
269 | next: None, |
270 | right_branches: vec![], |
271 | }; |
272 | iter.iterate_expr(expr.clone()); |
273 | iter |
274 | } |
275 | |
276 | fn iterate_expr(&mut self, expr: Expr) { |
277 | self.current = Some(expr.clone()); |
278 | match expr { |
279 | Expr::Seq(lhs, rhs) => { |
280 | self.right_branches.push(*rhs); |
281 | self.next = Some(*lhs); |
282 | } |
283 | Expr::Choice(lhs, rhs) => { |
284 | self.right_branches.push(*rhs); |
285 | self.next = Some(*lhs); |
286 | } |
287 | Expr::PosPred(expr) |
288 | | Expr::NegPred(expr) |
289 | | Expr::Rep(expr) |
290 | | Expr::RepOnce(expr) |
291 | | Expr::RepExact(expr, _) |
292 | | Expr::RepMin(expr, _) |
293 | | Expr::RepMax(expr, _) |
294 | | Expr::RepMinMax(expr, ..) |
295 | | Expr::Opt(expr) |
296 | | Expr::Push(expr) => { |
297 | self.next = Some(*expr); |
298 | } |
299 | #[cfg (feature = "grammar-extras" )] |
300 | Expr::NodeTag(expr, _) => { |
301 | self.next = Some(*expr); |
302 | } |
303 | _ => { |
304 | self.next = None; |
305 | } |
306 | } |
307 | } |
308 | } |
309 | |
310 | impl Iterator for ExprTopDownIterator { |
311 | type Item = Expr; |
312 | |
313 | fn next(&mut self) -> Option<Self::Item> { |
314 | let result: Option = self.current.take(); |
315 | |
316 | if let Some(expr: Expr) = self.next.take() { |
317 | self.iterate_expr(expr); |
318 | } else if let Some(expr: Expr) = self.right_branches.pop() { |
319 | self.iterate_expr(expr); |
320 | } |
321 | |
322 | result |
323 | } |
324 | } |
325 | |
326 | #[cfg (test)] |
327 | mod tests { |
328 | use super::*; |
329 | |
330 | #[test ] |
331 | fn top_down_iterator() { |
332 | let expr = Expr::Choice( |
333 | Box::new(Expr::Str(String::from("a" ))), |
334 | Box::new(Expr::Str(String::from("b" ))), |
335 | ); |
336 | let mut top_down = expr.iter_top_down(); |
337 | assert_eq!(top_down.next(), Some(expr)); |
338 | assert_eq!(top_down.next(), Some(Expr::Str(String::from("a" )))); |
339 | assert_eq!(top_down.next(), Some(Expr::Str(String::from("b" )))); |
340 | assert_eq!(top_down.next(), None); |
341 | } |
342 | |
343 | #[test ] |
344 | fn identity() { |
345 | let expr = Expr::Choice( |
346 | Box::new(Expr::Seq( |
347 | Box::new(Expr::Ident("a" .to_owned())), |
348 | Box::new(Expr::Str("b" .to_owned())), |
349 | )), |
350 | Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep( |
351 | Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice( |
352 | Box::new(Expr::Insens("c" .to_owned())), |
353 | Box::new(Expr::Push(Box::new(Expr::Range( |
354 | "'d'" .to_owned(), |
355 | "'e'" .to_owned(), |
356 | )))), |
357 | )))))), |
358 | )))))), |
359 | ); |
360 | |
361 | assert_eq!( |
362 | expr.clone() |
363 | .map_bottom_up(|expr| expr) |
364 | .map_top_down(|expr| expr), |
365 | expr |
366 | ); |
367 | } |
368 | } |
369 | |