1use alloc::{vec, vec::Vec};
2
3use crate::ast::{self, Ast};
4
5/// A trait for visiting an abstract syntax tree (AST) in depth first order.
6///
7/// The principle aim of this trait is to enable callers to perform case
8/// analysis on an abstract syntax tree without necessarily using recursion.
9/// In particular, this permits callers to do case analysis with constant stack
10/// usage, which can be important since the size of an abstract syntax tree
11/// may be proportional to end user input.
12///
13/// Typical usage of this trait involves providing an implementation and then
14/// running it using the [`visit`] function.
15///
16/// Note that the abstract syntax tree for a regular expression is quite
17/// complex. Unless you specifically need it, you might be able to use the much
18/// simpler [high-level intermediate representation](crate::hir::Hir) and its
19/// [corresponding `Visitor` trait](crate::hir::Visitor) instead.
20pub trait Visitor {
21 /// The result of visiting an AST.
22 type Output;
23 /// An error that visiting an AST might return.
24 type Err;
25
26 /// All implementors of `Visitor` must provide a `finish` method, which
27 /// yields the result of visiting the AST or an error.
28 fn finish(self) -> Result<Self::Output, Self::Err>;
29
30 /// This method is called before beginning traversal of the AST.
31 fn start(&mut self) {}
32
33 /// This method is called on an `Ast` before descending into child `Ast`
34 /// nodes.
35 fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
36 Ok(())
37 }
38
39 /// This method is called on an `Ast` after descending all of its child
40 /// `Ast` nodes.
41 fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
42 Ok(())
43 }
44
45 /// This method is called between child nodes of an
46 /// [`Alternation`](ast::Alternation).
47 fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
48 Ok(())
49 }
50
51 /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
52 /// before descending into child nodes.
53 fn visit_class_set_item_pre(
54 &mut self,
55 _ast: &ast::ClassSetItem,
56 ) -> Result<(), Self::Err> {
57 Ok(())
58 }
59
60 /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
61 /// after descending into child nodes.
62 fn visit_class_set_item_post(
63 &mut self,
64 _ast: &ast::ClassSetItem,
65 ) -> Result<(), Self::Err> {
66 Ok(())
67 }
68
69 /// This method is called on every
70 /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into
71 /// child nodes.
72 fn visit_class_set_binary_op_pre(
73 &mut self,
74 _ast: &ast::ClassSetBinaryOp,
75 ) -> Result<(), Self::Err> {
76 Ok(())
77 }
78
79 /// This method is called on every
80 /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child
81 /// nodes.
82 fn visit_class_set_binary_op_post(
83 &mut self,
84 _ast: &ast::ClassSetBinaryOp,
85 ) -> Result<(), Self::Err> {
86 Ok(())
87 }
88
89 /// This method is called between the left hand and right hand child nodes
90 /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp).
91 fn visit_class_set_binary_op_in(
92 &mut self,
93 _ast: &ast::ClassSetBinaryOp,
94 ) -> Result<(), Self::Err> {
95 Ok(())
96 }
97}
98
99/// Executes an implementation of `Visitor` in constant stack space.
100///
101/// This function will visit every node in the given `Ast` while calling the
102/// appropriate methods provided by the [`Visitor`] trait.
103///
104/// The primary use case for this method is when one wants to perform case
105/// analysis over an `Ast` without using a stack size proportional to the depth
106/// of the `Ast`. Namely, this method will instead use constant stack size, but
107/// will use heap space proportional to the size of the `Ast`. This may be
108/// desirable in cases where the size of `Ast` is proportional to end user
109/// input.
110///
111/// If the visitor returns an error at any point, then visiting is stopped and
112/// the error is returned.
113pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
114 HeapVisitor::new().visit(ast, visitor)
115}
116
117/// HeapVisitor visits every item in an `Ast` recursively using constant stack
118/// size and a heap size proportional to the size of the `Ast`.
119struct HeapVisitor<'a> {
120 /// A stack of `Ast` nodes. This is roughly analogous to the call stack
121 /// used in a typical recursive visitor.
122 stack: Vec<(&'a Ast, Frame<'a>)>,
123 /// Similar to the `Ast` stack above, but is used only for character
124 /// classes. In particular, character classes embed their own mini
125 /// recursive syntax.
126 stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
127}
128
129/// Represents a single stack frame while performing structural induction over
130/// an `Ast`.
131enum Frame<'a> {
132 /// A stack frame allocated just before descending into a repetition
133 /// operator's child node.
134 Repetition(&'a ast::Repetition),
135 /// A stack frame allocated just before descending into a group's child
136 /// node.
137 Group(&'a ast::Group),
138 /// The stack frame used while visiting every child node of a concatenation
139 /// of expressions.
140 Concat {
141 /// The child node we are currently visiting.
142 head: &'a Ast,
143 /// The remaining child nodes to visit (which may be empty).
144 tail: &'a [Ast],
145 },
146 /// The stack frame used while visiting every child node of an alternation
147 /// of expressions.
148 Alternation {
149 /// The child node we are currently visiting.
150 head: &'a Ast,
151 /// The remaining child nodes to visit (which may be empty).
152 tail: &'a [Ast],
153 },
154}
155
156/// Represents a single stack frame while performing structural induction over
157/// a character class.
158enum ClassFrame<'a> {
159 /// The stack frame used while visiting every child node of a union of
160 /// character class items.
161 Union {
162 /// The child node we are currently visiting.
163 head: &'a ast::ClassSetItem,
164 /// The remaining child nodes to visit (which may be empty).
165 tail: &'a [ast::ClassSetItem],
166 },
167 /// The stack frame used while a binary class operation.
168 Binary { op: &'a ast::ClassSetBinaryOp },
169 /// A stack frame allocated just before descending into a binary operator's
170 /// left hand child node.
171 BinaryLHS {
172 op: &'a ast::ClassSetBinaryOp,
173 lhs: &'a ast::ClassSet,
174 rhs: &'a ast::ClassSet,
175 },
176 /// A stack frame allocated just before descending into a binary operator's
177 /// right hand child node.
178 BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
179}
180
181/// A representation of the inductive step when performing structural induction
182/// over a character class.
183///
184/// Note that there is no analogous explicit type for the inductive step for
185/// `Ast` nodes because the inductive step is just an `Ast`. For character
186/// classes, the inductive step can produce one of two possible child nodes:
187/// an item or a binary operation. (An item cannot be a binary operation
188/// because that would imply binary operations can be unioned in the concrete
189/// syntax, which is not possible.)
190enum ClassInduct<'a> {
191 Item(&'a ast::ClassSetItem),
192 BinaryOp(&'a ast::ClassSetBinaryOp),
193}
194
195impl<'a> HeapVisitor<'a> {
196 fn new() -> HeapVisitor<'a> {
197 HeapVisitor { stack: vec![], stack_class: vec![] }
198 }
199
200 fn visit<V: Visitor>(
201 &mut self,
202 mut ast: &'a Ast,
203 mut visitor: V,
204 ) -> Result<V::Output, V::Err> {
205 self.stack.clear();
206 self.stack_class.clear();
207
208 visitor.start();
209 loop {
210 visitor.visit_pre(ast)?;
211 if let Some(x) = self.induct(ast, &mut visitor)? {
212 let child = x.child();
213 self.stack.push((ast, x));
214 ast = child;
215 continue;
216 }
217 // No induction means we have a base case, so we can post visit
218 // it now.
219 visitor.visit_post(ast)?;
220
221 // At this point, we now try to pop our call stack until it is
222 // either empty or we hit another inductive case.
223 loop {
224 let (post_ast, frame) = match self.stack.pop() {
225 None => return visitor.finish(),
226 Some((post_ast, frame)) => (post_ast, frame),
227 };
228 // If this is a concat/alternate, then we might have additional
229 // inductive steps to process.
230 if let Some(x) = self.pop(frame) {
231 if let Frame::Alternation { .. } = x {
232 visitor.visit_alternation_in()?;
233 }
234 ast = x.child();
235 self.stack.push((post_ast, x));
236 break;
237 }
238 // Otherwise, we've finished visiting all the child nodes for
239 // this AST, so we can post visit it now.
240 visitor.visit_post(post_ast)?;
241 }
242 }
243 }
244
245 /// Build a stack frame for the given AST if one is needed (which occurs if
246 /// and only if there are child nodes in the AST). Otherwise, return None.
247 ///
248 /// If this visits a class, then the underlying visitor implementation may
249 /// return an error which will be passed on here.
250 fn induct<V: Visitor>(
251 &mut self,
252 ast: &'a Ast,
253 visitor: &mut V,
254 ) -> Result<Option<Frame<'a>>, V::Err> {
255 Ok(match *ast {
256 Ast::Class(ast::Class::Bracketed(ref x)) => {
257 self.visit_class(x, visitor)?;
258 None
259 }
260 Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
261 Ast::Group(ref x) => Some(Frame::Group(x)),
262 Ast::Concat(ref x) if x.asts.is_empty() => None,
263 Ast::Concat(ref x) => {
264 Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
265 }
266 Ast::Alternation(ref x) if x.asts.is_empty() => None,
267 Ast::Alternation(ref x) => Some(Frame::Alternation {
268 head: &x.asts[0],
269 tail: &x.asts[1..],
270 }),
271 _ => None,
272 })
273 }
274
275 /// Pops the given frame. If the frame has an additional inductive step,
276 /// then return it, otherwise return `None`.
277 fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
278 match induct {
279 Frame::Repetition(_) => None,
280 Frame::Group(_) => None,
281 Frame::Concat { tail, .. } => {
282 if tail.is_empty() {
283 None
284 } else {
285 Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
286 }
287 }
288 Frame::Alternation { tail, .. } => {
289 if tail.is_empty() {
290 None
291 } else {
292 Some(Frame::Alternation {
293 head: &tail[0],
294 tail: &tail[1..],
295 })
296 }
297 }
298 }
299 }
300
301 fn visit_class<V: Visitor>(
302 &mut self,
303 ast: &'a ast::ClassBracketed,
304 visitor: &mut V,
305 ) -> Result<(), V::Err> {
306 let mut ast = ClassInduct::from_bracketed(ast);
307 loop {
308 self.visit_class_pre(&ast, visitor)?;
309 if let Some(x) = self.induct_class(&ast) {
310 let child = x.child();
311 self.stack_class.push((ast, x));
312 ast = child;
313 continue;
314 }
315 self.visit_class_post(&ast, visitor)?;
316
317 // At this point, we now try to pop our call stack until it is
318 // either empty or we hit another inductive case.
319 loop {
320 let (post_ast, frame) = match self.stack_class.pop() {
321 None => return Ok(()),
322 Some((post_ast, frame)) => (post_ast, frame),
323 };
324 // If this is a union or a binary op, then we might have
325 // additional inductive steps to process.
326 if let Some(x) = self.pop_class(frame) {
327 if let ClassFrame::BinaryRHS { ref op, .. } = x {
328 visitor.visit_class_set_binary_op_in(op)?;
329 }
330 ast = x.child();
331 self.stack_class.push((post_ast, x));
332 break;
333 }
334 // Otherwise, we've finished visiting all the child nodes for
335 // this class node, so we can post visit it now.
336 self.visit_class_post(&post_ast, visitor)?;
337 }
338 }
339 }
340
341 /// Call the appropriate `Visitor` methods given an inductive step.
342 fn visit_class_pre<V: Visitor>(
343 &self,
344 ast: &ClassInduct<'a>,
345 visitor: &mut V,
346 ) -> Result<(), V::Err> {
347 match *ast {
348 ClassInduct::Item(item) => {
349 visitor.visit_class_set_item_pre(item)?;
350 }
351 ClassInduct::BinaryOp(op) => {
352 visitor.visit_class_set_binary_op_pre(op)?;
353 }
354 }
355 Ok(())
356 }
357
358 /// Call the appropriate `Visitor` methods given an inductive step.
359 fn visit_class_post<V: Visitor>(
360 &self,
361 ast: &ClassInduct<'a>,
362 visitor: &mut V,
363 ) -> Result<(), V::Err> {
364 match *ast {
365 ClassInduct::Item(item) => {
366 visitor.visit_class_set_item_post(item)?;
367 }
368 ClassInduct::BinaryOp(op) => {
369 visitor.visit_class_set_binary_op_post(op)?;
370 }
371 }
372 Ok(())
373 }
374
375 /// Build a stack frame for the given class node if one is needed (which
376 /// occurs if and only if there are child nodes). Otherwise, return None.
377 fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
378 match *ast {
379 ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
380 match x.kind {
381 ast::ClassSet::Item(ref item) => {
382 Some(ClassFrame::Union { head: item, tail: &[] })
383 }
384 ast::ClassSet::BinaryOp(ref op) => {
385 Some(ClassFrame::Binary { op })
386 }
387 }
388 }
389 ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
390 if x.items.is_empty() {
391 None
392 } else {
393 Some(ClassFrame::Union {
394 head: &x.items[0],
395 tail: &x.items[1..],
396 })
397 }
398 }
399 ClassInduct::BinaryOp(op) => {
400 Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
401 }
402 _ => None,
403 }
404 }
405
406 /// Pops the given frame. If the frame has an additional inductive step,
407 /// then return it, otherwise return `None`.
408 fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
409 match induct {
410 ClassFrame::Union { tail, .. } => {
411 if tail.is_empty() {
412 None
413 } else {
414 Some(ClassFrame::Union {
415 head: &tail[0],
416 tail: &tail[1..],
417 })
418 }
419 }
420 ClassFrame::Binary { .. } => None,
421 ClassFrame::BinaryLHS { op, rhs, .. } => {
422 Some(ClassFrame::BinaryRHS { op, rhs })
423 }
424 ClassFrame::BinaryRHS { .. } => None,
425 }
426 }
427}
428
429impl<'a> Frame<'a> {
430 /// Perform the next inductive step on this frame and return the next
431 /// child AST node to visit.
432 fn child(&self) -> &'a Ast {
433 match *self {
434 Frame::Repetition(rep: &Repetition) => &rep.ast,
435 Frame::Group(group: &Group) => &group.ast,
436 Frame::Concat { head: &Ast, .. } => head,
437 Frame::Alternation { head: &Ast, .. } => head,
438 }
439 }
440}
441
442impl<'a> ClassFrame<'a> {
443 /// Perform the next inductive step on this frame and return the next
444 /// child class node to visit.
445 fn child(&self) -> ClassInduct<'a> {
446 match *self {
447 ClassFrame::Union { head: &ClassSetItem, .. } => ClassInduct::Item(head),
448 ClassFrame::Binary { op: &ClassSetBinaryOp, .. } => ClassInduct::BinaryOp(op),
449 ClassFrame::BinaryLHS { ref lhs: &&ClassSet, .. } => {
450 ClassInduct::from_set(ast:lhs)
451 }
452 ClassFrame::BinaryRHS { ref rhs: &&ClassSet, .. } => {
453 ClassInduct::from_set(ast:rhs)
454 }
455 }
456 }
457}
458
459impl<'a> ClassInduct<'a> {
460 fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
461 ClassInduct::from_set(&ast.kind)
462 }
463
464 fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
465 match *ast {
466 ast::ClassSet::Item(ref item: &ClassSetItem) => ClassInduct::Item(item),
467 ast::ClassSet::BinaryOp(ref op: &ClassSetBinaryOp) => ClassInduct::BinaryOp(op),
468 }
469 }
470}
471
472impl<'a> core::fmt::Debug for ClassFrame<'a> {
473 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
474 let x: &str = match *self {
475 ClassFrame::Union { .. } => "Union",
476 ClassFrame::Binary { .. } => "Binary",
477 ClassFrame::BinaryLHS { .. } => "BinaryLHS",
478 ClassFrame::BinaryRHS { .. } => "BinaryRHS",
479 };
480 write!(f, "{}", x)
481 }
482}
483
484impl<'a> core::fmt::Debug for ClassInduct<'a> {
485 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
486 let x = match *self {
487 ClassInduct::Item(it) => match *it {
488 ast::ClassSetItem::Empty(_) => "Item(Empty)",
489 ast::ClassSetItem::Literal(_) => "Item(Literal)",
490 ast::ClassSetItem::Range(_) => "Item(Range)",
491 ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
492 ast::ClassSetItem::Perl(_) => "Item(Perl)",
493 ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
494 ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
495 ast::ClassSetItem::Union(_) => "Item(Union)",
496 },
497 ClassInduct::BinaryOp(it) => match it.kind {
498 ast::ClassSetBinaryOpKind::Intersection => {
499 "BinaryOp(Intersection)"
500 }
501 ast::ClassSetBinaryOpKind::Difference => {
502 "BinaryOp(Difference)"
503 }
504 ast::ClassSetBinaryOpKind::SymmetricDifference => {
505 "BinaryOp(SymmetricDifference)"
506 }
507 },
508 };
509 write!(f, "{}", x)
510 }
511}
512