1 | use alloc::{vec, vec::Vec}; |
2 | |
3 | use crate::ast::{self, Ast}; |
4 | |
5 | /// A trait for visiting an abstract syntax tree (AST) in depth first order. |
6 | /// |
7 | /// The principle aim of this trait is to enable callers to perform case |
8 | /// analysis on an abstract syntax tree without necessarily using recursion. |
9 | /// In particular, this permits callers to do case analysis with constant stack |
10 | /// usage, which can be important since the size of an abstract syntax tree |
11 | /// may be proportional to end user input. |
12 | /// |
13 | /// Typical usage of this trait involves providing an implementation and then |
14 | /// running it using the [`visit`] function. |
15 | /// |
16 | /// Note that the abstract syntax tree for a regular expression is quite |
17 | /// complex. Unless you specifically need it, you might be able to use the much |
18 | /// simpler [high-level intermediate representation](crate::hir::Hir) and its |
19 | /// [corresponding `Visitor` trait](crate::hir::Visitor) instead. |
20 | pub trait Visitor { |
21 | /// The result of visiting an AST. |
22 | type Output; |
23 | /// An error that visiting an AST might return. |
24 | type Err; |
25 | |
26 | /// All implementors of `Visitor` must provide a `finish` method, which |
27 | /// yields the result of visiting the AST or an error. |
28 | fn finish(self) -> Result<Self::Output, Self::Err>; |
29 | |
30 | /// This method is called before beginning traversal of the AST. |
31 | fn start(&mut self) {} |
32 | |
33 | /// This method is called on an `Ast` before descending into child `Ast` |
34 | /// nodes. |
35 | fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> { |
36 | Ok(()) |
37 | } |
38 | |
39 | /// This method is called on an `Ast` after descending all of its child |
40 | /// `Ast` nodes. |
41 | fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> { |
42 | Ok(()) |
43 | } |
44 | |
45 | /// This method is called between child nodes of an |
46 | /// [`Alternation`](ast::Alternation). |
47 | fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { |
48 | Ok(()) |
49 | } |
50 | |
51 | /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) |
52 | /// before descending into child nodes. |
53 | fn visit_class_set_item_pre( |
54 | &mut self, |
55 | _ast: &ast::ClassSetItem, |
56 | ) -> Result<(), Self::Err> { |
57 | Ok(()) |
58 | } |
59 | |
60 | /// This method is called on every [`ClassSetItem`](ast::ClassSetItem) |
61 | /// after descending into child nodes. |
62 | fn visit_class_set_item_post( |
63 | &mut self, |
64 | _ast: &ast::ClassSetItem, |
65 | ) -> Result<(), Self::Err> { |
66 | Ok(()) |
67 | } |
68 | |
69 | /// This method is called on every |
70 | /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into |
71 | /// child nodes. |
72 | fn visit_class_set_binary_op_pre( |
73 | &mut self, |
74 | _ast: &ast::ClassSetBinaryOp, |
75 | ) -> Result<(), Self::Err> { |
76 | Ok(()) |
77 | } |
78 | |
79 | /// This method is called on every |
80 | /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child |
81 | /// nodes. |
82 | fn visit_class_set_binary_op_post( |
83 | &mut self, |
84 | _ast: &ast::ClassSetBinaryOp, |
85 | ) -> Result<(), Self::Err> { |
86 | Ok(()) |
87 | } |
88 | |
89 | /// This method is called between the left hand and right hand child nodes |
90 | /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp). |
91 | fn visit_class_set_binary_op_in( |
92 | &mut self, |
93 | _ast: &ast::ClassSetBinaryOp, |
94 | ) -> Result<(), Self::Err> { |
95 | Ok(()) |
96 | } |
97 | } |
98 | |
99 | /// Executes an implementation of `Visitor` in constant stack space. |
100 | /// |
101 | /// This function will visit every node in the given `Ast` while calling the |
102 | /// appropriate methods provided by the [`Visitor`] trait. |
103 | /// |
104 | /// The primary use case for this method is when one wants to perform case |
105 | /// analysis over an `Ast` without using a stack size proportional to the depth |
106 | /// of the `Ast`. Namely, this method will instead use constant stack size, but |
107 | /// will use heap space proportional to the size of the `Ast`. This may be |
108 | /// desirable in cases where the size of `Ast` is proportional to end user |
109 | /// input. |
110 | /// |
111 | /// If the visitor returns an error at any point, then visiting is stopped and |
112 | /// the error is returned. |
113 | pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> { |
114 | HeapVisitor::new().visit(ast, visitor) |
115 | } |
116 | |
117 | /// HeapVisitor visits every item in an `Ast` recursively using constant stack |
118 | /// size and a heap size proportional to the size of the `Ast`. |
119 | struct HeapVisitor<'a> { |
120 | /// A stack of `Ast` nodes. This is roughly analogous to the call stack |
121 | /// used in a typical recursive visitor. |
122 | stack: Vec<(&'a Ast, Frame<'a>)>, |
123 | /// Similar to the `Ast` stack above, but is used only for character |
124 | /// classes. In particular, character classes embed their own mini |
125 | /// recursive syntax. |
126 | stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>, |
127 | } |
128 | |
129 | /// Represents a single stack frame while performing structural induction over |
130 | /// an `Ast`. |
131 | enum Frame<'a> { |
132 | /// A stack frame allocated just before descending into a repetition |
133 | /// operator's child node. |
134 | Repetition(&'a ast::Repetition), |
135 | /// A stack frame allocated just before descending into a group's child |
136 | /// node. |
137 | Group(&'a ast::Group), |
138 | /// The stack frame used while visiting every child node of a concatenation |
139 | /// of expressions. |
140 | Concat { |
141 | /// The child node we are currently visiting. |
142 | head: &'a Ast, |
143 | /// The remaining child nodes to visit (which may be empty). |
144 | tail: &'a [Ast], |
145 | }, |
146 | /// The stack frame used while visiting every child node of an alternation |
147 | /// of expressions. |
148 | Alternation { |
149 | /// The child node we are currently visiting. |
150 | head: &'a Ast, |
151 | /// The remaining child nodes to visit (which may be empty). |
152 | tail: &'a [Ast], |
153 | }, |
154 | } |
155 | |
156 | /// Represents a single stack frame while performing structural induction over |
157 | /// a character class. |
158 | enum ClassFrame<'a> { |
159 | /// The stack frame used while visiting every child node of a union of |
160 | /// character class items. |
161 | Union { |
162 | /// The child node we are currently visiting. |
163 | head: &'a ast::ClassSetItem, |
164 | /// The remaining child nodes to visit (which may be empty). |
165 | tail: &'a [ast::ClassSetItem], |
166 | }, |
167 | /// The stack frame used while a binary class operation. |
168 | Binary { op: &'a ast::ClassSetBinaryOp }, |
169 | /// A stack frame allocated just before descending into a binary operator's |
170 | /// left hand child node. |
171 | BinaryLHS { |
172 | op: &'a ast::ClassSetBinaryOp, |
173 | lhs: &'a ast::ClassSet, |
174 | rhs: &'a ast::ClassSet, |
175 | }, |
176 | /// A stack frame allocated just before descending into a binary operator's |
177 | /// right hand child node. |
178 | BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet }, |
179 | } |
180 | |
181 | /// A representation of the inductive step when performing structural induction |
182 | /// over a character class. |
183 | /// |
184 | /// Note that there is no analogous explicit type for the inductive step for |
185 | /// `Ast` nodes because the inductive step is just an `Ast`. For character |
186 | /// classes, the inductive step can produce one of two possible child nodes: |
187 | /// an item or a binary operation. (An item cannot be a binary operation |
188 | /// because that would imply binary operations can be unioned in the concrete |
189 | /// syntax, which is not possible.) |
190 | enum ClassInduct<'a> { |
191 | Item(&'a ast::ClassSetItem), |
192 | BinaryOp(&'a ast::ClassSetBinaryOp), |
193 | } |
194 | |
195 | impl<'a> HeapVisitor<'a> { |
196 | fn new() -> HeapVisitor<'a> { |
197 | HeapVisitor { stack: vec![], stack_class: vec![] } |
198 | } |
199 | |
200 | fn visit<V: Visitor>( |
201 | &mut self, |
202 | mut ast: &'a Ast, |
203 | mut visitor: V, |
204 | ) -> Result<V::Output, V::Err> { |
205 | self.stack.clear(); |
206 | self.stack_class.clear(); |
207 | |
208 | visitor.start(); |
209 | loop { |
210 | visitor.visit_pre(ast)?; |
211 | if let Some(x) = self.induct(ast, &mut visitor)? { |
212 | let child = x.child(); |
213 | self.stack.push((ast, x)); |
214 | ast = child; |
215 | continue; |
216 | } |
217 | // No induction means we have a base case, so we can post visit |
218 | // it now. |
219 | visitor.visit_post(ast)?; |
220 | |
221 | // At this point, we now try to pop our call stack until it is |
222 | // either empty or we hit another inductive case. |
223 | loop { |
224 | let (post_ast, frame) = match self.stack.pop() { |
225 | None => return visitor.finish(), |
226 | Some((post_ast, frame)) => (post_ast, frame), |
227 | }; |
228 | // If this is a concat/alternate, then we might have additional |
229 | // inductive steps to process. |
230 | if let Some(x) = self.pop(frame) { |
231 | if let Frame::Alternation { .. } = x { |
232 | visitor.visit_alternation_in()?; |
233 | } |
234 | ast = x.child(); |
235 | self.stack.push((post_ast, x)); |
236 | break; |
237 | } |
238 | // Otherwise, we've finished visiting all the child nodes for |
239 | // this AST, so we can post visit it now. |
240 | visitor.visit_post(post_ast)?; |
241 | } |
242 | } |
243 | } |
244 | |
245 | /// Build a stack frame for the given AST if one is needed (which occurs if |
246 | /// and only if there are child nodes in the AST). Otherwise, return None. |
247 | /// |
248 | /// If this visits a class, then the underlying visitor implementation may |
249 | /// return an error which will be passed on here. |
250 | fn induct<V: Visitor>( |
251 | &mut self, |
252 | ast: &'a Ast, |
253 | visitor: &mut V, |
254 | ) -> Result<Option<Frame<'a>>, V::Err> { |
255 | Ok(match *ast { |
256 | Ast::Class(ast::Class::Bracketed(ref x)) => { |
257 | self.visit_class(x, visitor)?; |
258 | None |
259 | } |
260 | Ast::Repetition(ref x) => Some(Frame::Repetition(x)), |
261 | Ast::Group(ref x) => Some(Frame::Group(x)), |
262 | Ast::Concat(ref x) if x.asts.is_empty() => None, |
263 | Ast::Concat(ref x) => { |
264 | Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] }) |
265 | } |
266 | Ast::Alternation(ref x) if x.asts.is_empty() => None, |
267 | Ast::Alternation(ref x) => Some(Frame::Alternation { |
268 | head: &x.asts[0], |
269 | tail: &x.asts[1..], |
270 | }), |
271 | _ => None, |
272 | }) |
273 | } |
274 | |
275 | /// Pops the given frame. If the frame has an additional inductive step, |
276 | /// then return it, otherwise return `None`. |
277 | fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> { |
278 | match induct { |
279 | Frame::Repetition(_) => None, |
280 | Frame::Group(_) => None, |
281 | Frame::Concat { tail, .. } => { |
282 | if tail.is_empty() { |
283 | None |
284 | } else { |
285 | Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) |
286 | } |
287 | } |
288 | Frame::Alternation { tail, .. } => { |
289 | if tail.is_empty() { |
290 | None |
291 | } else { |
292 | Some(Frame::Alternation { |
293 | head: &tail[0], |
294 | tail: &tail[1..], |
295 | }) |
296 | } |
297 | } |
298 | } |
299 | } |
300 | |
301 | fn visit_class<V: Visitor>( |
302 | &mut self, |
303 | ast: &'a ast::ClassBracketed, |
304 | visitor: &mut V, |
305 | ) -> Result<(), V::Err> { |
306 | let mut ast = ClassInduct::from_bracketed(ast); |
307 | loop { |
308 | self.visit_class_pre(&ast, visitor)?; |
309 | if let Some(x) = self.induct_class(&ast) { |
310 | let child = x.child(); |
311 | self.stack_class.push((ast, x)); |
312 | ast = child; |
313 | continue; |
314 | } |
315 | self.visit_class_post(&ast, visitor)?; |
316 | |
317 | // At this point, we now try to pop our call stack until it is |
318 | // either empty or we hit another inductive case. |
319 | loop { |
320 | let (post_ast, frame) = match self.stack_class.pop() { |
321 | None => return Ok(()), |
322 | Some((post_ast, frame)) => (post_ast, frame), |
323 | }; |
324 | // If this is a union or a binary op, then we might have |
325 | // additional inductive steps to process. |
326 | if let Some(x) = self.pop_class(frame) { |
327 | if let ClassFrame::BinaryRHS { ref op, .. } = x { |
328 | visitor.visit_class_set_binary_op_in(op)?; |
329 | } |
330 | ast = x.child(); |
331 | self.stack_class.push((post_ast, x)); |
332 | break; |
333 | } |
334 | // Otherwise, we've finished visiting all the child nodes for |
335 | // this class node, so we can post visit it now. |
336 | self.visit_class_post(&post_ast, visitor)?; |
337 | } |
338 | } |
339 | } |
340 | |
341 | /// Call the appropriate `Visitor` methods given an inductive step. |
342 | fn visit_class_pre<V: Visitor>( |
343 | &self, |
344 | ast: &ClassInduct<'a>, |
345 | visitor: &mut V, |
346 | ) -> Result<(), V::Err> { |
347 | match *ast { |
348 | ClassInduct::Item(item) => { |
349 | visitor.visit_class_set_item_pre(item)?; |
350 | } |
351 | ClassInduct::BinaryOp(op) => { |
352 | visitor.visit_class_set_binary_op_pre(op)?; |
353 | } |
354 | } |
355 | Ok(()) |
356 | } |
357 | |
358 | /// Call the appropriate `Visitor` methods given an inductive step. |
359 | fn visit_class_post<V: Visitor>( |
360 | &self, |
361 | ast: &ClassInduct<'a>, |
362 | visitor: &mut V, |
363 | ) -> Result<(), V::Err> { |
364 | match *ast { |
365 | ClassInduct::Item(item) => { |
366 | visitor.visit_class_set_item_post(item)?; |
367 | } |
368 | ClassInduct::BinaryOp(op) => { |
369 | visitor.visit_class_set_binary_op_post(op)?; |
370 | } |
371 | } |
372 | Ok(()) |
373 | } |
374 | |
375 | /// Build a stack frame for the given class node if one is needed (which |
376 | /// occurs if and only if there are child nodes). Otherwise, return None. |
377 | fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> { |
378 | match *ast { |
379 | ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => { |
380 | match x.kind { |
381 | ast::ClassSet::Item(ref item) => { |
382 | Some(ClassFrame::Union { head: item, tail: &[] }) |
383 | } |
384 | ast::ClassSet::BinaryOp(ref op) => { |
385 | Some(ClassFrame::Binary { op }) |
386 | } |
387 | } |
388 | } |
389 | ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => { |
390 | if x.items.is_empty() { |
391 | None |
392 | } else { |
393 | Some(ClassFrame::Union { |
394 | head: &x.items[0], |
395 | tail: &x.items[1..], |
396 | }) |
397 | } |
398 | } |
399 | ClassInduct::BinaryOp(op) => { |
400 | Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs }) |
401 | } |
402 | _ => None, |
403 | } |
404 | } |
405 | |
406 | /// Pops the given frame. If the frame has an additional inductive step, |
407 | /// then return it, otherwise return `None`. |
408 | fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> { |
409 | match induct { |
410 | ClassFrame::Union { tail, .. } => { |
411 | if tail.is_empty() { |
412 | None |
413 | } else { |
414 | Some(ClassFrame::Union { |
415 | head: &tail[0], |
416 | tail: &tail[1..], |
417 | }) |
418 | } |
419 | } |
420 | ClassFrame::Binary { .. } => None, |
421 | ClassFrame::BinaryLHS { op, rhs, .. } => { |
422 | Some(ClassFrame::BinaryRHS { op, rhs }) |
423 | } |
424 | ClassFrame::BinaryRHS { .. } => None, |
425 | } |
426 | } |
427 | } |
428 | |
429 | impl<'a> Frame<'a> { |
430 | /// Perform the next inductive step on this frame and return the next |
431 | /// child AST node to visit. |
432 | fn child(&self) -> &'a Ast { |
433 | match *self { |
434 | Frame::Repetition(rep: &Repetition) => &rep.ast, |
435 | Frame::Group(group: &Group) => &group.ast, |
436 | Frame::Concat { head: &Ast, .. } => head, |
437 | Frame::Alternation { head: &Ast, .. } => head, |
438 | } |
439 | } |
440 | } |
441 | |
442 | impl<'a> ClassFrame<'a> { |
443 | /// Perform the next inductive step on this frame and return the next |
444 | /// child class node to visit. |
445 | fn child(&self) -> ClassInduct<'a> { |
446 | match *self { |
447 | ClassFrame::Union { head: &ClassSetItem, .. } => ClassInduct::Item(head), |
448 | ClassFrame::Binary { op: &ClassSetBinaryOp, .. } => ClassInduct::BinaryOp(op), |
449 | ClassFrame::BinaryLHS { ref lhs: &&ClassSet, .. } => { |
450 | ClassInduct::from_set(ast:lhs) |
451 | } |
452 | ClassFrame::BinaryRHS { ref rhs: &&ClassSet, .. } => { |
453 | ClassInduct::from_set(ast:rhs) |
454 | } |
455 | } |
456 | } |
457 | } |
458 | |
459 | impl<'a> ClassInduct<'a> { |
460 | fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> { |
461 | ClassInduct::from_set(&ast.kind) |
462 | } |
463 | |
464 | fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> { |
465 | match *ast { |
466 | ast::ClassSet::Item(ref item: &ClassSetItem) => ClassInduct::Item(item), |
467 | ast::ClassSet::BinaryOp(ref op: &ClassSetBinaryOp) => ClassInduct::BinaryOp(op), |
468 | } |
469 | } |
470 | } |
471 | |
472 | impl<'a> core::fmt::Debug for ClassFrame<'a> { |
473 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
474 | let x: &str = match *self { |
475 | ClassFrame::Union { .. } => "Union" , |
476 | ClassFrame::Binary { .. } => "Binary" , |
477 | ClassFrame::BinaryLHS { .. } => "BinaryLHS" , |
478 | ClassFrame::BinaryRHS { .. } => "BinaryRHS" , |
479 | }; |
480 | write!(f, " {}" , x) |
481 | } |
482 | } |
483 | |
484 | impl<'a> core::fmt::Debug for ClassInduct<'a> { |
485 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
486 | let x = match *self { |
487 | ClassInduct::Item(it) => match *it { |
488 | ast::ClassSetItem::Empty(_) => "Item(Empty)" , |
489 | ast::ClassSetItem::Literal(_) => "Item(Literal)" , |
490 | ast::ClassSetItem::Range(_) => "Item(Range)" , |
491 | ast::ClassSetItem::Ascii(_) => "Item(Ascii)" , |
492 | ast::ClassSetItem::Perl(_) => "Item(Perl)" , |
493 | ast::ClassSetItem::Unicode(_) => "Item(Unicode)" , |
494 | ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)" , |
495 | ast::ClassSetItem::Union(_) => "Item(Union)" , |
496 | }, |
497 | ClassInduct::BinaryOp(it) => match it.kind { |
498 | ast::ClassSetBinaryOpKind::Intersection => { |
499 | "BinaryOp(Intersection)" |
500 | } |
501 | ast::ClassSetBinaryOpKind::Difference => { |
502 | "BinaryOp(Difference)" |
503 | } |
504 | ast::ClassSetBinaryOpKind::SymmetricDifference => { |
505 | "BinaryOp(SymmetricDifference)" |
506 | } |
507 | }, |
508 | }; |
509 | write!(f, " {}" , x) |
510 | } |
511 | } |
512 | |