1 | use std::fmt; |
2 | |
3 | use crate::ast::{self, Ast}; |
4 | |
5 | /// A trait for visiting an abstract syntax tree (AST) in depth first order. |
6 | /// |
7 | /// The principle aim of this trait is to enable callers to perform case |
8 | /// analysis on an abstract syntax tree without necessarily using recursion. |
9 | /// In particular, this permits callers to do case analysis with constant stack |
10 | /// usage, which can be important since the size of an abstract syntax tree |
11 | /// may be proportional to end user input. |
12 | /// |
13 | /// Typical usage of this trait involves providing an implementation and then |
14 | /// running it using the [`visit`](fn.visit.html) function. |
15 | /// |
16 | /// Note that the abstract syntax tree for a regular expression is quite |
17 | /// complex. Unless you specifically need it, you might be able to use the |
18 | /// much simpler |
19 | /// [high-level intermediate representation](../hir/struct.Hir.html) |
20 | /// and its |
21 | /// [corresponding `Visitor` trait](../hir/trait.Visitor.html) |
22 | /// instead. |
23 | pub trait Visitor { |
24 | /// The result of visiting an AST. |
25 | type Output; |
26 | /// An error that visiting an AST might return. |
27 | type Err; |
28 | |
29 | /// All implementors of `Visitor` must provide a `finish` method, which |
30 | /// yields the result of visiting the AST or an error. |
31 | fn finish(self) -> Result<Self::Output, Self::Err>; |
32 | |
33 | /// This method is called before beginning traversal of the AST. |
34 | fn start(&mut self) {} |
35 | |
36 | /// This method is called on an `Ast` before descending into child `Ast` |
37 | /// nodes. |
38 | fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> { |
39 | Ok(()) |
40 | } |
41 | |
42 | /// This method is called on an `Ast` after descending all of its child |
43 | /// `Ast` nodes. |
44 | fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> { |
45 | Ok(()) |
46 | } |
47 | |
48 | /// This method is called between child nodes of an |
49 | /// [`Alternation`](struct.Alternation.html). |
50 | fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { |
51 | Ok(()) |
52 | } |
53 | |
54 | /// This method is called on every |
55 | /// [`ClassSetItem`](enum.ClassSetItem.html) |
56 | /// before descending into child nodes. |
57 | fn visit_class_set_item_pre( |
58 | &mut self, |
59 | _ast: &ast::ClassSetItem, |
60 | ) -> Result<(), Self::Err> { |
61 | Ok(()) |
62 | } |
63 | |
64 | /// This method is called on every |
65 | /// [`ClassSetItem`](enum.ClassSetItem.html) |
66 | /// after descending into child nodes. |
67 | fn visit_class_set_item_post( |
68 | &mut self, |
69 | _ast: &ast::ClassSetItem, |
70 | ) -> Result<(), Self::Err> { |
71 | Ok(()) |
72 | } |
73 | |
74 | /// This method is called on every |
75 | /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html) |
76 | /// before descending into child nodes. |
77 | fn visit_class_set_binary_op_pre( |
78 | &mut self, |
79 | _ast: &ast::ClassSetBinaryOp, |
80 | ) -> Result<(), Self::Err> { |
81 | Ok(()) |
82 | } |
83 | |
84 | /// This method is called on every |
85 | /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html) |
86 | /// after descending into child nodes. |
87 | fn visit_class_set_binary_op_post( |
88 | &mut self, |
89 | _ast: &ast::ClassSetBinaryOp, |
90 | ) -> Result<(), Self::Err> { |
91 | Ok(()) |
92 | } |
93 | |
94 | /// This method is called between the left hand and right hand child nodes |
95 | /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html). |
96 | fn visit_class_set_binary_op_in( |
97 | &mut self, |
98 | _ast: &ast::ClassSetBinaryOp, |
99 | ) -> Result<(), Self::Err> { |
100 | Ok(()) |
101 | } |
102 | } |
103 | |
104 | /// Executes an implementation of `Visitor` in constant stack space. |
105 | /// |
106 | /// This function will visit every node in the given `Ast` while calling the |
107 | /// appropriate methods provided by the |
108 | /// [`Visitor`](trait.Visitor.html) trait. |
109 | /// |
110 | /// The primary use case for this method is when one wants to perform case |
111 | /// analysis over an `Ast` without using a stack size proportional to the depth |
112 | /// of the `Ast`. Namely, this method will instead use constant stack size, but |
113 | /// will use heap space proportional to the size of the `Ast`. This may be |
114 | /// desirable in cases where the size of `Ast` is proportional to end user |
115 | /// input. |
116 | /// |
117 | /// If the visitor returns an error at any point, then visiting is stopped and |
118 | /// the error is returned. |
119 | pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> { |
120 | HeapVisitor::new().visit(ast, visitor) |
121 | } |
122 | |
123 | /// HeapVisitor visits every item in an `Ast` recursively using constant stack |
124 | /// size and a heap size proportional to the size of the `Ast`. |
125 | struct HeapVisitor<'a> { |
126 | /// A stack of `Ast` nodes. This is roughly analogous to the call stack |
127 | /// used in a typical recursive visitor. |
128 | stack: Vec<(&'a Ast, Frame<'a>)>, |
129 | /// Similar to the `Ast` stack above, but is used only for character |
130 | /// classes. In particular, character classes embed their own mini |
131 | /// recursive syntax. |
132 | stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>, |
133 | } |
134 | |
135 | /// Represents a single stack frame while performing structural induction over |
136 | /// an `Ast`. |
137 | enum Frame<'a> { |
138 | /// A stack frame allocated just before descending into a repetition |
139 | /// operator's child node. |
140 | Repetition(&'a ast::Repetition), |
141 | /// A stack frame allocated just before descending into a group's child |
142 | /// node. |
143 | Group(&'a ast::Group), |
144 | /// The stack frame used while visiting every child node of a concatenation |
145 | /// of expressions. |
146 | Concat { |
147 | /// The child node we are currently visiting. |
148 | head: &'a Ast, |
149 | /// The remaining child nodes to visit (which may be empty). |
150 | tail: &'a [Ast], |
151 | }, |
152 | /// The stack frame used while visiting every child node of an alternation |
153 | /// of expressions. |
154 | Alternation { |
155 | /// The child node we are currently visiting. |
156 | head: &'a Ast, |
157 | /// The remaining child nodes to visit (which may be empty). |
158 | tail: &'a [Ast], |
159 | }, |
160 | } |
161 | |
162 | /// Represents a single stack frame while performing structural induction over |
163 | /// a character class. |
164 | enum ClassFrame<'a> { |
165 | /// The stack frame used while visiting every child node of a union of |
166 | /// character class items. |
167 | Union { |
168 | /// The child node we are currently visiting. |
169 | head: &'a ast::ClassSetItem, |
170 | /// The remaining child nodes to visit (which may be empty). |
171 | tail: &'a [ast::ClassSetItem], |
172 | }, |
173 | /// The stack frame used while a binary class operation. |
174 | Binary { op: &'a ast::ClassSetBinaryOp }, |
175 | /// A stack frame allocated just before descending into a binary operator's |
176 | /// left hand child node. |
177 | BinaryLHS { |
178 | op: &'a ast::ClassSetBinaryOp, |
179 | lhs: &'a ast::ClassSet, |
180 | rhs: &'a ast::ClassSet, |
181 | }, |
182 | /// A stack frame allocated just before descending into a binary operator's |
183 | /// right hand child node. |
184 | BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet }, |
185 | } |
186 | |
187 | /// A representation of the inductive step when performing structural induction |
188 | /// over a character class. |
189 | /// |
190 | /// Note that there is no analogous explicit type for the inductive step for |
191 | /// `Ast` nodes because the inductive step is just an `Ast`. For character |
192 | /// classes, the inductive step can produce one of two possible child nodes: |
193 | /// an item or a binary operation. (An item cannot be a binary operation |
194 | /// because that would imply binary operations can be unioned in the concrete |
195 | /// syntax, which is not possible.) |
196 | enum ClassInduct<'a> { |
197 | Item(&'a ast::ClassSetItem), |
198 | BinaryOp(&'a ast::ClassSetBinaryOp), |
199 | } |
200 | |
201 | impl<'a> HeapVisitor<'a> { |
202 | fn new() -> HeapVisitor<'a> { |
203 | HeapVisitor { stack: vec![], stack_class: vec![] } |
204 | } |
205 | |
206 | fn visit<V: Visitor>( |
207 | &mut self, |
208 | mut ast: &'a Ast, |
209 | mut visitor: V, |
210 | ) -> Result<V::Output, V::Err> { |
211 | self.stack.clear(); |
212 | self.stack_class.clear(); |
213 | |
214 | visitor.start(); |
215 | loop { |
216 | visitor.visit_pre(ast)?; |
217 | if let Some(x) = self.induct(ast, &mut visitor)? { |
218 | let child = x.child(); |
219 | self.stack.push((ast, x)); |
220 | ast = child; |
221 | continue; |
222 | } |
223 | // No induction means we have a base case, so we can post visit |
224 | // it now. |
225 | visitor.visit_post(ast)?; |
226 | |
227 | // At this point, we now try to pop our call stack until it is |
228 | // either empty or we hit another inductive case. |
229 | loop { |
230 | let (post_ast, frame) = match self.stack.pop() { |
231 | None => return visitor.finish(), |
232 | Some((post_ast, frame)) => (post_ast, frame), |
233 | }; |
234 | // If this is a concat/alternate, then we might have additional |
235 | // inductive steps to process. |
236 | if let Some(x) = self.pop(frame) { |
237 | if let Frame::Alternation { .. } = x { |
238 | visitor.visit_alternation_in()?; |
239 | } |
240 | ast = x.child(); |
241 | self.stack.push((post_ast, x)); |
242 | break; |
243 | } |
244 | // Otherwise, we've finished visiting all the child nodes for |
245 | // this AST, so we can post visit it now. |
246 | visitor.visit_post(post_ast)?; |
247 | } |
248 | } |
249 | } |
250 | |
251 | /// Build a stack frame for the given AST if one is needed (which occurs if |
252 | /// and only if there are child nodes in the AST). Otherwise, return None. |
253 | /// |
254 | /// If this visits a class, then the underlying visitor implementation may |
255 | /// return an error which will be passed on here. |
256 | fn induct<V: Visitor>( |
257 | &mut self, |
258 | ast: &'a Ast, |
259 | visitor: &mut V, |
260 | ) -> Result<Option<Frame<'a>>, V::Err> { |
261 | Ok(match *ast { |
262 | Ast::Class(ast::Class::Bracketed(ref x)) => { |
263 | self.visit_class(x, visitor)?; |
264 | None |
265 | } |
266 | Ast::Repetition(ref x) => Some(Frame::Repetition(x)), |
267 | Ast::Group(ref x) => Some(Frame::Group(x)), |
268 | Ast::Concat(ref x) if x.asts.is_empty() => None, |
269 | Ast::Concat(ref x) => { |
270 | Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] }) |
271 | } |
272 | Ast::Alternation(ref x) if x.asts.is_empty() => None, |
273 | Ast::Alternation(ref x) => Some(Frame::Alternation { |
274 | head: &x.asts[0], |
275 | tail: &x.asts[1..], |
276 | }), |
277 | _ => None, |
278 | }) |
279 | } |
280 | |
281 | /// Pops the given frame. If the frame has an additional inductive step, |
282 | /// then return it, otherwise return `None`. |
283 | fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> { |
284 | match induct { |
285 | Frame::Repetition(_) => None, |
286 | Frame::Group(_) => None, |
287 | Frame::Concat { tail, .. } => { |
288 | if tail.is_empty() { |
289 | None |
290 | } else { |
291 | Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) |
292 | } |
293 | } |
294 | Frame::Alternation { tail, .. } => { |
295 | if tail.is_empty() { |
296 | None |
297 | } else { |
298 | Some(Frame::Alternation { |
299 | head: &tail[0], |
300 | tail: &tail[1..], |
301 | }) |
302 | } |
303 | } |
304 | } |
305 | } |
306 | |
307 | fn visit_class<V: Visitor>( |
308 | &mut self, |
309 | ast: &'a ast::ClassBracketed, |
310 | visitor: &mut V, |
311 | ) -> Result<(), V::Err> { |
312 | let mut ast = ClassInduct::from_bracketed(ast); |
313 | loop { |
314 | self.visit_class_pre(&ast, visitor)?; |
315 | if let Some(x) = self.induct_class(&ast) { |
316 | let child = x.child(); |
317 | self.stack_class.push((ast, x)); |
318 | ast = child; |
319 | continue; |
320 | } |
321 | self.visit_class_post(&ast, visitor)?; |
322 | |
323 | // At this point, we now try to pop our call stack until it is |
324 | // either empty or we hit another inductive case. |
325 | loop { |
326 | let (post_ast, frame) = match self.stack_class.pop() { |
327 | None => return Ok(()), |
328 | Some((post_ast, frame)) => (post_ast, frame), |
329 | }; |
330 | // If this is a union or a binary op, then we might have |
331 | // additional inductive steps to process. |
332 | if let Some(x) = self.pop_class(frame) { |
333 | if let ClassFrame::BinaryRHS { ref op, .. } = x { |
334 | visitor.visit_class_set_binary_op_in(op)?; |
335 | } |
336 | ast = x.child(); |
337 | self.stack_class.push((post_ast, x)); |
338 | break; |
339 | } |
340 | // Otherwise, we've finished visiting all the child nodes for |
341 | // this class node, so we can post visit it now. |
342 | self.visit_class_post(&post_ast, visitor)?; |
343 | } |
344 | } |
345 | } |
346 | |
347 | /// Call the appropriate `Visitor` methods given an inductive step. |
348 | fn visit_class_pre<V: Visitor>( |
349 | &self, |
350 | ast: &ClassInduct<'a>, |
351 | visitor: &mut V, |
352 | ) -> Result<(), V::Err> { |
353 | match *ast { |
354 | ClassInduct::Item(item) => { |
355 | visitor.visit_class_set_item_pre(item)?; |
356 | } |
357 | ClassInduct::BinaryOp(op) => { |
358 | visitor.visit_class_set_binary_op_pre(op)?; |
359 | } |
360 | } |
361 | Ok(()) |
362 | } |
363 | |
364 | /// Call the appropriate `Visitor` methods given an inductive step. |
365 | fn visit_class_post<V: Visitor>( |
366 | &self, |
367 | ast: &ClassInduct<'a>, |
368 | visitor: &mut V, |
369 | ) -> Result<(), V::Err> { |
370 | match *ast { |
371 | ClassInduct::Item(item) => { |
372 | visitor.visit_class_set_item_post(item)?; |
373 | } |
374 | ClassInduct::BinaryOp(op) => { |
375 | visitor.visit_class_set_binary_op_post(op)?; |
376 | } |
377 | } |
378 | Ok(()) |
379 | } |
380 | |
381 | /// Build a stack frame for the given class node if one is needed (which |
382 | /// occurs if and only if there are child nodes). Otherwise, return None. |
383 | fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> { |
384 | match *ast { |
385 | ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => { |
386 | match x.kind { |
387 | ast::ClassSet::Item(ref item) => { |
388 | Some(ClassFrame::Union { head: item, tail: &[] }) |
389 | } |
390 | ast::ClassSet::BinaryOp(ref op) => { |
391 | Some(ClassFrame::Binary { op }) |
392 | } |
393 | } |
394 | } |
395 | ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => { |
396 | if x.items.is_empty() { |
397 | None |
398 | } else { |
399 | Some(ClassFrame::Union { |
400 | head: &x.items[0], |
401 | tail: &x.items[1..], |
402 | }) |
403 | } |
404 | } |
405 | ClassInduct::BinaryOp(op) => { |
406 | Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) |
407 | } |
408 | _ => None, |
409 | } |
410 | } |
411 | |
412 | /// Pops the given frame. If the frame has an additional inductive step, |
413 | /// then return it, otherwise return `None`. |
414 | fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> { |
415 | match induct { |
416 | ClassFrame::Union { tail, .. } => { |
417 | if tail.is_empty() { |
418 | None |
419 | } else { |
420 | Some(ClassFrame::Union { |
421 | head: &tail[0], |
422 | tail: &tail[1..], |
423 | }) |
424 | } |
425 | } |
426 | ClassFrame::Binary { .. } => None, |
427 | ClassFrame::BinaryLHS { op, rhs, .. } => { |
428 | Some(ClassFrame::BinaryRHS { op, rhs }) |
429 | } |
430 | ClassFrame::BinaryRHS { .. } => None, |
431 | } |
432 | } |
433 | } |
434 | |
435 | impl<'a> Frame<'a> { |
436 | /// Perform the next inductive step on this frame and return the next |
437 | /// child AST node to visit. |
438 | fn child(&self) -> &'a Ast { |
439 | match *self { |
440 | Frame::Repetition(rep) => &rep.ast, |
441 | Frame::Group(group) => &group.ast, |
442 | Frame::Concat { head, .. } => head, |
443 | Frame::Alternation { head, .. } => head, |
444 | } |
445 | } |
446 | } |
447 | |
448 | impl<'a> ClassFrame<'a> { |
449 | /// Perform the next inductive step on this frame and return the next |
450 | /// child class node to visit. |
451 | fn child(&self) -> ClassInduct<'a> { |
452 | match *self { |
453 | ClassFrame::Union { head, .. } => ClassInduct::Item(head), |
454 | ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op), |
455 | ClassFrame::BinaryLHS { ref lhs, .. } => { |
456 | ClassInduct::from_set(lhs) |
457 | } |
458 | ClassFrame::BinaryRHS { ref rhs, .. } => { |
459 | ClassInduct::from_set(rhs) |
460 | } |
461 | } |
462 | } |
463 | } |
464 | |
465 | impl<'a> ClassInduct<'a> { |
466 | fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> { |
467 | ClassInduct::from_set(&ast.kind) |
468 | } |
469 | |
470 | fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> { |
471 | match *ast { |
472 | ast::ClassSet::Item(ref item) => ClassInduct::Item(item), |
473 | ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op), |
474 | } |
475 | } |
476 | } |
477 | |
478 | impl<'a> fmt::Debug for ClassFrame<'a> { |
479 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
480 | let x = match *self { |
481 | ClassFrame::Union { .. } => "Union" , |
482 | ClassFrame::Binary { .. } => "Binary" , |
483 | ClassFrame::BinaryLHS { .. } => "BinaryLHS" , |
484 | ClassFrame::BinaryRHS { .. } => "BinaryRHS" , |
485 | }; |
486 | write!(f, "{}" , x) |
487 | } |
488 | } |
489 | |
490 | impl<'a> fmt::Debug for ClassInduct<'a> { |
491 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
492 | let x = match *self { |
493 | ClassInduct::Item(it) => match *it { |
494 | ast::ClassSetItem::Empty(_) => "Item(Empty)" , |
495 | ast::ClassSetItem::Literal(_) => "Item(Literal)" , |
496 | ast::ClassSetItem::Range(_) => "Item(Range)" , |
497 | ast::ClassSetItem::Ascii(_) => "Item(Ascii)" , |
498 | ast::ClassSetItem::Perl(_) => "Item(Perl)" , |
499 | ast::ClassSetItem::Unicode(_) => "Item(Unicode)" , |
500 | ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)" , |
501 | ast::ClassSetItem::Union(_) => "Item(Union)" , |
502 | }, |
503 | ClassInduct::BinaryOp(it) => match it.kind { |
504 | ast::ClassSetBinaryOpKind::Intersection => { |
505 | "BinaryOp(Intersection)" |
506 | } |
507 | ast::ClassSetBinaryOpKind::Difference => { |
508 | "BinaryOp(Difference)" |
509 | } |
510 | ast::ClassSetBinaryOpKind::SymmetricDifference => { |
511 | "BinaryOp(SymmetricDifference)" |
512 | } |
513 | }, |
514 | }; |
515 | write!(f, "{}" , x) |
516 | } |
517 | } |
518 | |