1 | use alloc::{vec, vec::Vec}; |
2 | |
3 | use crate::ast::{self, Ast}; |
4 | |
5 | /// A trait for visiting an abstract syntax tree (AST) in depth first order. |
6 | /// |
7 | /// The principle aim of this trait is to enable callers to perform case |
8 | /// analysis on an abstract syntax tree without necessarily using recursion. |
9 | /// In particular, this permits callers to do case analysis with constant stack |
10 | /// usage, which can be important since the size of an abstract syntax tree |
11 | /// may be proportional to end user input. |
12 | /// |
13 | /// Typical usage of this trait involves providing an implementation and then |
14 | /// running it using the [`visit`] function. |
15 | /// |
16 | /// Note that the abstract syntax tree for a regular expression is quite |
17 | /// complex. Unless you specifically need it, you might be able to use the much |
18 | /// simpler [high-level intermediate representation](crate::hir::Hir) and its |
19 | /// [corresponding `Visitor` trait](crate::hir::Visitor) instead. |
20 | pub trait Visitor { |
21 | /// The result of visiting an AST. |
22 | type Output; |
23 | /// An error that visiting an AST might return. |
24 | type Err; |
25 | |
26 | /// All implementors of `Visitor` must provide a `finish` method, which |
27 | /// yields the result of visiting the AST or an error. |
28 | fn finish(self) -> Result<Self::Output, Self::Err>; |
29 | |
30 | /// This method is called before beginning traversal of the AST. |
31 | fn start(&mut self) {} |
32 | |
33 | /// This method is called on an `Ast` before descending into child `Ast` |
34 | /// nodes. |
35 | fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> { |
36 | Ok(()) |
37 | } |
38 | |
39 | /// This method is called on an `Ast` after descending all of its child |
40 | /// `Ast` nodes. |
41 | fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> { |
42 | Ok(()) |
43 | } |
44 | |
45 | /// This method is called between child nodes of an |
46 | /// [`Alternation`](ast::Alternation). |
47 | fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { |
48 | Ok(()) |
49 | } |
50 | |
51 | /// This method is called between child nodes of a concatenation. |
52 | fn visit_concat_in(&mut self) -> Result<(), Self::Err> { |
53 | Ok(()) |
54 | } |
55 | |
56 | /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) |
57 | /// before descending into child nodes. |
58 | fn visit_class_set_item_pre( |
59 | &mut self, |
60 | _ast: &ast::ClassSetItem, |
61 | ) -> Result<(), Self::Err> { |
62 | Ok(()) |
63 | } |
64 | |
65 | /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) |
66 | /// after descending into child nodes. |
67 | fn visit_class_set_item_post( |
68 | &mut self, |
69 | _ast: &ast::ClassSetItem, |
70 | ) -> Result<(), Self::Err> { |
71 | Ok(()) |
72 | } |
73 | |
74 | /// This method is called on every |
75 | /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into |
76 | /// child nodes. |
77 | fn visit_class_set_binary_op_pre( |
78 | &mut self, |
79 | _ast: &ast::ClassSetBinaryOp, |
80 | ) -> Result<(), Self::Err> { |
81 | Ok(()) |
82 | } |
83 | |
84 | /// This method is called on every |
85 | /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child |
86 | /// nodes. |
87 | fn visit_class_set_binary_op_post( |
88 | &mut self, |
89 | _ast: &ast::ClassSetBinaryOp, |
90 | ) -> Result<(), Self::Err> { |
91 | Ok(()) |
92 | } |
93 | |
94 | /// This method is called between the left hand and right hand child nodes |
95 | /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp). |
96 | fn visit_class_set_binary_op_in( |
97 | &mut self, |
98 | _ast: &ast::ClassSetBinaryOp, |
99 | ) -> Result<(), Self::Err> { |
100 | Ok(()) |
101 | } |
102 | } |
103 | |
104 | /// Executes an implementation of `Visitor` in constant stack space. |
105 | /// |
106 | /// This function will visit every node in the given `Ast` while calling the |
107 | /// appropriate methods provided by the [`Visitor`] trait. |
108 | /// |
109 | /// The primary use case for this method is when one wants to perform case |
110 | /// analysis over an `Ast` without using a stack size proportional to the depth |
111 | /// of the `Ast`. Namely, this method will instead use constant stack size, but |
112 | /// will use heap space proportional to the size of the `Ast`. This may be |
113 | /// desirable in cases where the size of `Ast` is proportional to end user |
114 | /// input. |
115 | /// |
116 | /// If the visitor returns an error at any point, then visiting is stopped and |
117 | /// the error is returned. |
118 | pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> { |
119 | HeapVisitor::new().visit(ast, visitor) |
120 | } |
121 | |
122 | /// HeapVisitor visits every item in an `Ast` recursively using constant stack |
123 | /// size and a heap size proportional to the size of the `Ast`. |
124 | struct HeapVisitor<'a> { |
125 | /// A stack of `Ast` nodes. This is roughly analogous to the call stack |
126 | /// used in a typical recursive visitor. |
127 | stack: Vec<(&'a Ast, Frame<'a>)>, |
128 | /// Similar to the `Ast` stack above, but is used only for character |
129 | /// classes. In particular, character classes embed their own mini |
130 | /// recursive syntax. |
131 | stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>, |
132 | } |
133 | |
134 | /// Represents a single stack frame while performing structural induction over |
135 | /// an `Ast`. |
136 | enum Frame<'a> { |
137 | /// A stack frame allocated just before descending into a repetition |
138 | /// operator's child node. |
139 | Repetition(&'a ast::Repetition), |
140 | /// A stack frame allocated just before descending into a group's child |
141 | /// node. |
142 | Group(&'a ast::Group), |
143 | /// The stack frame used while visiting every child node of a concatenation |
144 | /// of expressions. |
145 | Concat { |
146 | /// The child node we are currently visiting. |
147 | head: &'a Ast, |
148 | /// The remaining child nodes to visit (which may be empty). |
149 | tail: &'a [Ast], |
150 | }, |
151 | /// The stack frame used while visiting every child node of an alternation |
152 | /// of expressions. |
153 | Alternation { |
154 | /// The child node we are currently visiting. |
155 | head: &'a Ast, |
156 | /// The remaining child nodes to visit (which may be empty). |
157 | tail: &'a [Ast], |
158 | }, |
159 | } |
160 | |
161 | /// Represents a single stack frame while performing structural induction over |
162 | /// a character class. |
163 | enum ClassFrame<'a> { |
164 | /// The stack frame used while visiting every child node of a union of |
165 | /// character class items. |
166 | Union { |
167 | /// The child node we are currently visiting. |
168 | head: &'a ast::ClassSetItem, |
169 | /// The remaining child nodes to visit (which may be empty). |
170 | tail: &'a [ast::ClassSetItem], |
171 | }, |
172 | /// The stack frame used while a binary class operation. |
173 | Binary { op: &'a ast::ClassSetBinaryOp }, |
174 | /// A stack frame allocated just before descending into a binary operator's |
175 | /// left hand child node. |
176 | BinaryLHS { |
177 | op: &'a ast::ClassSetBinaryOp, |
178 | lhs: &'a ast::ClassSet, |
179 | rhs: &'a ast::ClassSet, |
180 | }, |
181 | /// A stack frame allocated just before descending into a binary operator's |
182 | /// right hand child node. |
183 | BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet }, |
184 | } |
185 | |
186 | /// A representation of the inductive step when performing structural induction |
187 | /// over a character class. |
188 | /// |
189 | /// Note that there is no analogous explicit type for the inductive step for |
190 | /// `Ast` nodes because the inductive step is just an `Ast`. For character |
191 | /// classes, the inductive step can produce one of two possible child nodes: |
192 | /// an item or a binary operation. (An item cannot be a binary operation |
193 | /// because that would imply binary operations can be unioned in the concrete |
194 | /// syntax, which is not possible.) |
195 | enum ClassInduct<'a> { |
196 | Item(&'a ast::ClassSetItem), |
197 | BinaryOp(&'a ast::ClassSetBinaryOp), |
198 | } |
199 | |
200 | impl<'a> HeapVisitor<'a> { |
201 | fn new() -> HeapVisitor<'a> { |
202 | HeapVisitor { stack: vec![], stack_class: vec![] } |
203 | } |
204 | |
205 | fn visit<V: Visitor>( |
206 | &mut self, |
207 | mut ast: &'a Ast, |
208 | mut visitor: V, |
209 | ) -> Result<V::Output, V::Err> { |
210 | self.stack.clear(); |
211 | self.stack_class.clear(); |
212 | |
213 | visitor.start(); |
214 | loop { |
215 | visitor.visit_pre(ast)?; |
216 | if let Some(x) = self.induct(ast, &mut visitor)? { |
217 | let child = x.child(); |
218 | self.stack.push((ast, x)); |
219 | ast = child; |
220 | continue; |
221 | } |
222 | // No induction means we have a base case, so we can post visit |
223 | // it now. |
224 | visitor.visit_post(ast)?; |
225 | |
226 | // At this point, we now try to pop our call stack until it is |
227 | // either empty or we hit another inductive case. |
228 | loop { |
229 | let (post_ast, frame) = match self.stack.pop() { |
230 | None => return visitor.finish(), |
231 | Some((post_ast, frame)) => (post_ast, frame), |
232 | }; |
233 | // If this is a concat/alternate, then we might have additional |
234 | // inductive steps to process. |
235 | if let Some(x) = self.pop(frame) { |
236 | match x { |
237 | Frame::Alternation { .. } => { |
238 | visitor.visit_alternation_in()?; |
239 | } |
240 | Frame::Concat { .. } => { |
241 | visitor.visit_concat_in()?; |
242 | } |
243 | _ => {} |
244 | } |
245 | ast = x.child(); |
246 | self.stack.push((post_ast, x)); |
247 | break; |
248 | } |
249 | // Otherwise, we've finished visiting all the child nodes for |
250 | // this AST, so we can post visit it now. |
251 | visitor.visit_post(post_ast)?; |
252 | } |
253 | } |
254 | } |
255 | |
256 | /// Build a stack frame for the given AST if one is needed (which occurs if |
257 | /// and only if there are child nodes in the AST). Otherwise, return None. |
258 | /// |
259 | /// If this visits a class, then the underlying visitor implementation may |
260 | /// return an error which will be passed on here. |
261 | fn induct<V: Visitor>( |
262 | &mut self, |
263 | ast: &'a Ast, |
264 | visitor: &mut V, |
265 | ) -> Result<Option<Frame<'a>>, V::Err> { |
266 | Ok(match *ast { |
267 | Ast::ClassBracketed(ref x) => { |
268 | self.visit_class(x, visitor)?; |
269 | None |
270 | } |
271 | Ast::Repetition(ref x) => Some(Frame::Repetition(x)), |
272 | Ast::Group(ref x) => Some(Frame::Group(x)), |
273 | Ast::Concat(ref x) if x.asts.is_empty() => None, |
274 | Ast::Concat(ref x) => { |
275 | Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] }) |
276 | } |
277 | Ast::Alternation(ref x) if x.asts.is_empty() => None, |
278 | Ast::Alternation(ref x) => Some(Frame::Alternation { |
279 | head: &x.asts[0], |
280 | tail: &x.asts[1..], |
281 | }), |
282 | _ => None, |
283 | }) |
284 | } |
285 | |
286 | /// Pops the given frame. If the frame has an additional inductive step, |
287 | /// then return it, otherwise return `None`. |
288 | fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> { |
289 | match induct { |
290 | Frame::Repetition(_) => None, |
291 | Frame::Group(_) => None, |
292 | Frame::Concat { tail, .. } => { |
293 | if tail.is_empty() { |
294 | None |
295 | } else { |
296 | Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) |
297 | } |
298 | } |
299 | Frame::Alternation { tail, .. } => { |
300 | if tail.is_empty() { |
301 | None |
302 | } else { |
303 | Some(Frame::Alternation { |
304 | head: &tail[0], |
305 | tail: &tail[1..], |
306 | }) |
307 | } |
308 | } |
309 | } |
310 | } |
311 | |
312 | fn visit_class<V: Visitor>( |
313 | &mut self, |
314 | ast: &'a ast::ClassBracketed, |
315 | visitor: &mut V, |
316 | ) -> Result<(), V::Err> { |
317 | let mut ast = ClassInduct::from_bracketed(ast); |
318 | loop { |
319 | self.visit_class_pre(&ast, visitor)?; |
320 | if let Some(x) = self.induct_class(&ast) { |
321 | let child = x.child(); |
322 | self.stack_class.push((ast, x)); |
323 | ast = child; |
324 | continue; |
325 | } |
326 | self.visit_class_post(&ast, visitor)?; |
327 | |
328 | // At this point, we now try to pop our call stack until it is |
329 | // either empty or we hit another inductive case. |
330 | loop { |
331 | let (post_ast, frame) = match self.stack_class.pop() { |
332 | None => return Ok(()), |
333 | Some((post_ast, frame)) => (post_ast, frame), |
334 | }; |
335 | // If this is a union or a binary op, then we might have |
336 | // additional inductive steps to process. |
337 | if let Some(x) = self.pop_class(frame) { |
338 | if let ClassFrame::BinaryRHS { ref op, .. } = x { |
339 | visitor.visit_class_set_binary_op_in(op)?; |
340 | } |
341 | ast = x.child(); |
342 | self.stack_class.push((post_ast, x)); |
343 | break; |
344 | } |
345 | // Otherwise, we've finished visiting all the child nodes for |
346 | // this class node, so we can post visit it now. |
347 | self.visit_class_post(&post_ast, visitor)?; |
348 | } |
349 | } |
350 | } |
351 | |
352 | /// Call the appropriate `Visitor` methods given an inductive step. |
353 | fn visit_class_pre<V: Visitor>( |
354 | &self, |
355 | ast: &ClassInduct<'a>, |
356 | visitor: &mut V, |
357 | ) -> Result<(), V::Err> { |
358 | match *ast { |
359 | ClassInduct::Item(item) => { |
360 | visitor.visit_class_set_item_pre(item)?; |
361 | } |
362 | ClassInduct::BinaryOp(op) => { |
363 | visitor.visit_class_set_binary_op_pre(op)?; |
364 | } |
365 | } |
366 | Ok(()) |
367 | } |
368 | |
369 | /// Call the appropriate `Visitor` methods given an inductive step. |
370 | fn visit_class_post<V: Visitor>( |
371 | &self, |
372 | ast: &ClassInduct<'a>, |
373 | visitor: &mut V, |
374 | ) -> Result<(), V::Err> { |
375 | match *ast { |
376 | ClassInduct::Item(item) => { |
377 | visitor.visit_class_set_item_post(item)?; |
378 | } |
379 | ClassInduct::BinaryOp(op) => { |
380 | visitor.visit_class_set_binary_op_post(op)?; |
381 | } |
382 | } |
383 | Ok(()) |
384 | } |
385 | |
386 | /// Build a stack frame for the given class node if one is needed (which |
387 | /// occurs if and only if there are child nodes). Otherwise, return None. |
388 | fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> { |
389 | match *ast { |
390 | ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => { |
391 | match x.kind { |
392 | ast::ClassSet::Item(ref item) => { |
393 | Some(ClassFrame::Union { head: item, tail: &[] }) |
394 | } |
395 | ast::ClassSet::BinaryOp(ref op) => { |
396 | Some(ClassFrame::Binary { op }) |
397 | } |
398 | } |
399 | } |
400 | ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => { |
401 | if x.items.is_empty() { |
402 | None |
403 | } else { |
404 | Some(ClassFrame::Union { |
405 | head: &x.items[0], |
406 | tail: &x.items[1..], |
407 | }) |
408 | } |
409 | } |
410 | ClassInduct::BinaryOp(op) => { |
411 | Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) |
412 | } |
413 | _ => None, |
414 | } |
415 | } |
416 | |
417 | /// Pops the given frame. If the frame has an additional inductive step, |
418 | /// then return it, otherwise return `None`. |
419 | fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> { |
420 | match induct { |
421 | ClassFrame::Union { tail, .. } => { |
422 | if tail.is_empty() { |
423 | None |
424 | } else { |
425 | Some(ClassFrame::Union { |
426 | head: &tail[0], |
427 | tail: &tail[1..], |
428 | }) |
429 | } |
430 | } |
431 | ClassFrame::Binary { .. } => None, |
432 | ClassFrame::BinaryLHS { op, rhs, .. } => { |
433 | Some(ClassFrame::BinaryRHS { op, rhs }) |
434 | } |
435 | ClassFrame::BinaryRHS { .. } => None, |
436 | } |
437 | } |
438 | } |
439 | |
440 | impl<'a> Frame<'a> { |
441 | /// Perform the next inductive step on this frame and return the next |
442 | /// child AST node to visit. |
443 | fn child(&self) -> &'a Ast { |
444 | match *self { |
445 | Frame::Repetition(rep) => &rep.ast, |
446 | Frame::Group(group) => &group.ast, |
447 | Frame::Concat { head, .. } => head, |
448 | Frame::Alternation { head, .. } => head, |
449 | } |
450 | } |
451 | } |
452 | |
453 | impl<'a> ClassFrame<'a> { |
454 | /// Perform the next inductive step on this frame and return the next |
455 | /// child class node to visit. |
456 | fn child(&self) -> ClassInduct<'a> { |
457 | match *self { |
458 | ClassFrame::Union { head, .. } => ClassInduct::Item(head), |
459 | ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op), |
460 | ClassFrame::BinaryLHS { ref lhs, .. } => { |
461 | ClassInduct::from_set(lhs) |
462 | } |
463 | ClassFrame::BinaryRHS { ref rhs, .. } => { |
464 | ClassInduct::from_set(rhs) |
465 | } |
466 | } |
467 | } |
468 | } |
469 | |
470 | impl<'a> ClassInduct<'a> { |
471 | fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> { |
472 | ClassInduct::from_set(&ast.kind) |
473 | } |
474 | |
475 | fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> { |
476 | match *ast { |
477 | ast::ClassSet::Item(ref item) => ClassInduct::Item(item), |
478 | ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op), |
479 | } |
480 | } |
481 | } |
482 | |
483 | impl<'a> core::fmt::Debug for ClassFrame<'a> { |
484 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
485 | let x = match *self { |
486 | ClassFrame::Union { .. } => "Union" , |
487 | ClassFrame::Binary { .. } => "Binary" , |
488 | ClassFrame::BinaryLHS { .. } => "BinaryLHS" , |
489 | ClassFrame::BinaryRHS { .. } => "BinaryRHS" , |
490 | }; |
491 | write!(f, "{}" , x) |
492 | } |
493 | } |
494 | |
495 | impl<'a> core::fmt::Debug for ClassInduct<'a> { |
496 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
497 | let x = match *self { |
498 | ClassInduct::Item(it) => match *it { |
499 | ast::ClassSetItem::Empty(_) => "Item(Empty)" , |
500 | ast::ClassSetItem::Literal(_) => "Item(Literal)" , |
501 | ast::ClassSetItem::Range(_) => "Item(Range)" , |
502 | ast::ClassSetItem::Ascii(_) => "Item(Ascii)" , |
503 | ast::ClassSetItem::Perl(_) => "Item(Perl)" , |
504 | ast::ClassSetItem::Unicode(_) => "Item(Unicode)" , |
505 | ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)" , |
506 | ast::ClassSetItem::Union(_) => "Item(Union)" , |
507 | }, |
508 | ClassInduct::BinaryOp(it) => match it.kind { |
509 | ast::ClassSetBinaryOpKind::Intersection => { |
510 | "BinaryOp(Intersection)" |
511 | } |
512 | ast::ClassSetBinaryOpKind::Difference => { |
513 | "BinaryOp(Difference)" |
514 | } |
515 | ast::ClassSetBinaryOpKind::SymmetricDifference => { |
516 | "BinaryOp(SymmetricDifference)" |
517 | } |
518 | }, |
519 | }; |
520 | write!(f, "{}" , x) |
521 | } |
522 | } |
523 | |