1use alloc::{vec, vec::Vec};
2
3use crate::ast::{self, Ast};
4
5/// A trait for visiting an abstract syntax tree (AST) in depth first order.
6///
7/// The principle aim of this trait is to enable callers to perform case
8/// analysis on an abstract syntax tree without necessarily using recursion.
9/// In particular, this permits callers to do case analysis with constant stack
10/// usage, which can be important since the size of an abstract syntax tree
11/// may be proportional to end user input.
12///
13/// Typical usage of this trait involves providing an implementation and then
14/// running it using the [`visit`] function.
15///
16/// Note that the abstract syntax tree for a regular expression is quite
17/// complex. Unless you specifically need it, you might be able to use the much
18/// simpler [high-level intermediate representation](crate::hir::Hir) and its
19/// [corresponding `Visitor` trait](crate::hir::Visitor) instead.
20pub trait Visitor {
21 /// The result of visiting an AST.
22 type Output;
23 /// An error that visiting an AST might return.
24 type Err;
25
26 /// All implementors of `Visitor` must provide a `finish` method, which
27 /// yields the result of visiting the AST or an error.
28 fn finish(self) -> Result<Self::Output, Self::Err>;
29
30 /// This method is called before beginning traversal of the AST.
31 fn start(&mut self) {}
32
33 /// This method is called on an `Ast` before descending into child `Ast`
34 /// nodes.
35 fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
36 Ok(())
37 }
38
39 /// This method is called on an `Ast` after descending all of its child
40 /// `Ast` nodes.
41 fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
42 Ok(())
43 }
44
45 /// This method is called between child nodes of an
46 /// [`Alternation`](ast::Alternation).
47 fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
48 Ok(())
49 }
50
51 /// This method is called between child nodes of a concatenation.
52 fn visit_concat_in(&mut self) -> Result<(), Self::Err> {
53 Ok(())
54 }
55
56 /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
57 /// before descending into child nodes.
58 fn visit_class_set_item_pre(
59 &mut self,
60 _ast: &ast::ClassSetItem,
61 ) -> Result<(), Self::Err> {
62 Ok(())
63 }
64
65 /// This method is called on every [`ClassSetItem`](ast::ClassSetItem)
66 /// after descending into child nodes.
67 fn visit_class_set_item_post(
68 &mut self,
69 _ast: &ast::ClassSetItem,
70 ) -> Result<(), Self::Err> {
71 Ok(())
72 }
73
74 /// This method is called on every
75 /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) before descending into
76 /// child nodes.
77 fn visit_class_set_binary_op_pre(
78 &mut self,
79 _ast: &ast::ClassSetBinaryOp,
80 ) -> Result<(), Self::Err> {
81 Ok(())
82 }
83
84 /// This method is called on every
85 /// [`ClassSetBinaryOp`](ast::ClassSetBinaryOp) after descending into child
86 /// nodes.
87 fn visit_class_set_binary_op_post(
88 &mut self,
89 _ast: &ast::ClassSetBinaryOp,
90 ) -> Result<(), Self::Err> {
91 Ok(())
92 }
93
94 /// This method is called between the left hand and right hand child nodes
95 /// of a [`ClassSetBinaryOp`](ast::ClassSetBinaryOp).
96 fn visit_class_set_binary_op_in(
97 &mut self,
98 _ast: &ast::ClassSetBinaryOp,
99 ) -> Result<(), Self::Err> {
100 Ok(())
101 }
102}
103
104/// Executes an implementation of `Visitor` in constant stack space.
105///
106/// This function will visit every node in the given `Ast` while calling the
107/// appropriate methods provided by the [`Visitor`] trait.
108///
109/// The primary use case for this method is when one wants to perform case
110/// analysis over an `Ast` without using a stack size proportional to the depth
111/// of the `Ast`. Namely, this method will instead use constant stack size, but
112/// will use heap space proportional to the size of the `Ast`. This may be
113/// desirable in cases where the size of `Ast` is proportional to end user
114/// input.
115///
116/// If the visitor returns an error at any point, then visiting is stopped and
117/// the error is returned.
118pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
119 HeapVisitor::new().visit(ast, visitor)
120}
121
122/// HeapVisitor visits every item in an `Ast` recursively using constant stack
123/// size and a heap size proportional to the size of the `Ast`.
124struct HeapVisitor<'a> {
125 /// A stack of `Ast` nodes. This is roughly analogous to the call stack
126 /// used in a typical recursive visitor.
127 stack: Vec<(&'a Ast, Frame<'a>)>,
128 /// Similar to the `Ast` stack above, but is used only for character
129 /// classes. In particular, character classes embed their own mini
130 /// recursive syntax.
131 stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
132}
133
134/// Represents a single stack frame while performing structural induction over
135/// an `Ast`.
136enum Frame<'a> {
137 /// A stack frame allocated just before descending into a repetition
138 /// operator's child node.
139 Repetition(&'a ast::Repetition),
140 /// A stack frame allocated just before descending into a group's child
141 /// node.
142 Group(&'a ast::Group),
143 /// The stack frame used while visiting every child node of a concatenation
144 /// of expressions.
145 Concat {
146 /// The child node we are currently visiting.
147 head: &'a Ast,
148 /// The remaining child nodes to visit (which may be empty).
149 tail: &'a [Ast],
150 },
151 /// The stack frame used while visiting every child node of an alternation
152 /// of expressions.
153 Alternation {
154 /// The child node we are currently visiting.
155 head: &'a Ast,
156 /// The remaining child nodes to visit (which may be empty).
157 tail: &'a [Ast],
158 },
159}
160
161/// Represents a single stack frame while performing structural induction over
162/// a character class.
163enum ClassFrame<'a> {
164 /// The stack frame used while visiting every child node of a union of
165 /// character class items.
166 Union {
167 /// The child node we are currently visiting.
168 head: &'a ast::ClassSetItem,
169 /// The remaining child nodes to visit (which may be empty).
170 tail: &'a [ast::ClassSetItem],
171 },
172 /// The stack frame used while a binary class operation.
173 Binary { op: &'a ast::ClassSetBinaryOp },
174 /// A stack frame allocated just before descending into a binary operator's
175 /// left hand child node.
176 BinaryLHS {
177 op: &'a ast::ClassSetBinaryOp,
178 lhs: &'a ast::ClassSet,
179 rhs: &'a ast::ClassSet,
180 },
181 /// A stack frame allocated just before descending into a binary operator's
182 /// right hand child node.
183 BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
184}
185
186/// A representation of the inductive step when performing structural induction
187/// over a character class.
188///
189/// Note that there is no analogous explicit type for the inductive step for
190/// `Ast` nodes because the inductive step is just an `Ast`. For character
191/// classes, the inductive step can produce one of two possible child nodes:
192/// an item or a binary operation. (An item cannot be a binary operation
193/// because that would imply binary operations can be unioned in the concrete
194/// syntax, which is not possible.)
195enum ClassInduct<'a> {
196 Item(&'a ast::ClassSetItem),
197 BinaryOp(&'a ast::ClassSetBinaryOp),
198}
199
200impl<'a> HeapVisitor<'a> {
201 fn new() -> HeapVisitor<'a> {
202 HeapVisitor { stack: vec![], stack_class: vec![] }
203 }
204
205 fn visit<V: Visitor>(
206 &mut self,
207 mut ast: &'a Ast,
208 mut visitor: V,
209 ) -> Result<V::Output, V::Err> {
210 self.stack.clear();
211 self.stack_class.clear();
212
213 visitor.start();
214 loop {
215 visitor.visit_pre(ast)?;
216 if let Some(x) = self.induct(ast, &mut visitor)? {
217 let child = x.child();
218 self.stack.push((ast, x));
219 ast = child;
220 continue;
221 }
222 // No induction means we have a base case, so we can post visit
223 // it now.
224 visitor.visit_post(ast)?;
225
226 // At this point, we now try to pop our call stack until it is
227 // either empty or we hit another inductive case.
228 loop {
229 let (post_ast, frame) = match self.stack.pop() {
230 None => return visitor.finish(),
231 Some((post_ast, frame)) => (post_ast, frame),
232 };
233 // If this is a concat/alternate, then we might have additional
234 // inductive steps to process.
235 if let Some(x) = self.pop(frame) {
236 match x {
237 Frame::Alternation { .. } => {
238 visitor.visit_alternation_in()?;
239 }
240 Frame::Concat { .. } => {
241 visitor.visit_concat_in()?;
242 }
243 _ => {}
244 }
245 ast = x.child();
246 self.stack.push((post_ast, x));
247 break;
248 }
249 // Otherwise, we've finished visiting all the child nodes for
250 // this AST, so we can post visit it now.
251 visitor.visit_post(post_ast)?;
252 }
253 }
254 }
255
256 /// Build a stack frame for the given AST if one is needed (which occurs if
257 /// and only if there are child nodes in the AST). Otherwise, return None.
258 ///
259 /// If this visits a class, then the underlying visitor implementation may
260 /// return an error which will be passed on here.
261 fn induct<V: Visitor>(
262 &mut self,
263 ast: &'a Ast,
264 visitor: &mut V,
265 ) -> Result<Option<Frame<'a>>, V::Err> {
266 Ok(match *ast {
267 Ast::ClassBracketed(ref x) => {
268 self.visit_class(x, visitor)?;
269 None
270 }
271 Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
272 Ast::Group(ref x) => Some(Frame::Group(x)),
273 Ast::Concat(ref x) if x.asts.is_empty() => None,
274 Ast::Concat(ref x) => {
275 Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
276 }
277 Ast::Alternation(ref x) if x.asts.is_empty() => None,
278 Ast::Alternation(ref x) => Some(Frame::Alternation {
279 head: &x.asts[0],
280 tail: &x.asts[1..],
281 }),
282 _ => None,
283 })
284 }
285
286 /// Pops the given frame. If the frame has an additional inductive step,
287 /// then return it, otherwise return `None`.
288 fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
289 match induct {
290 Frame::Repetition(_) => None,
291 Frame::Group(_) => None,
292 Frame::Concat { tail, .. } => {
293 if tail.is_empty() {
294 None
295 } else {
296 Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
297 }
298 }
299 Frame::Alternation { tail, .. } => {
300 if tail.is_empty() {
301 None
302 } else {
303 Some(Frame::Alternation {
304 head: &tail[0],
305 tail: &tail[1..],
306 })
307 }
308 }
309 }
310 }
311
312 fn visit_class<V: Visitor>(
313 &mut self,
314 ast: &'a ast::ClassBracketed,
315 visitor: &mut V,
316 ) -> Result<(), V::Err> {
317 let mut ast = ClassInduct::from_bracketed(ast);
318 loop {
319 self.visit_class_pre(&ast, visitor)?;
320 if let Some(x) = self.induct_class(&ast) {
321 let child = x.child();
322 self.stack_class.push((ast, x));
323 ast = child;
324 continue;
325 }
326 self.visit_class_post(&ast, visitor)?;
327
328 // At this point, we now try to pop our call stack until it is
329 // either empty or we hit another inductive case.
330 loop {
331 let (post_ast, frame) = match self.stack_class.pop() {
332 None => return Ok(()),
333 Some((post_ast, frame)) => (post_ast, frame),
334 };
335 // If this is a union or a binary op, then we might have
336 // additional inductive steps to process.
337 if let Some(x) = self.pop_class(frame) {
338 if let ClassFrame::BinaryRHS { ref op, .. } = x {
339 visitor.visit_class_set_binary_op_in(op)?;
340 }
341 ast = x.child();
342 self.stack_class.push((post_ast, x));
343 break;
344 }
345 // Otherwise, we've finished visiting all the child nodes for
346 // this class node, so we can post visit it now.
347 self.visit_class_post(&post_ast, visitor)?;
348 }
349 }
350 }
351
352 /// Call the appropriate `Visitor` methods given an inductive step.
353 fn visit_class_pre<V: Visitor>(
354 &self,
355 ast: &ClassInduct<'a>,
356 visitor: &mut V,
357 ) -> Result<(), V::Err> {
358 match *ast {
359 ClassInduct::Item(item) => {
360 visitor.visit_class_set_item_pre(item)?;
361 }
362 ClassInduct::BinaryOp(op) => {
363 visitor.visit_class_set_binary_op_pre(op)?;
364 }
365 }
366 Ok(())
367 }
368
369 /// Call the appropriate `Visitor` methods given an inductive step.
370 fn visit_class_post<V: Visitor>(
371 &self,
372 ast: &ClassInduct<'a>,
373 visitor: &mut V,
374 ) -> Result<(), V::Err> {
375 match *ast {
376 ClassInduct::Item(item) => {
377 visitor.visit_class_set_item_post(item)?;
378 }
379 ClassInduct::BinaryOp(op) => {
380 visitor.visit_class_set_binary_op_post(op)?;
381 }
382 }
383 Ok(())
384 }
385
386 /// Build a stack frame for the given class node if one is needed (which
387 /// occurs if and only if there are child nodes). Otherwise, return None.
388 fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
389 match *ast {
390 ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
391 match x.kind {
392 ast::ClassSet::Item(ref item) => {
393 Some(ClassFrame::Union { head: item, tail: &[] })
394 }
395 ast::ClassSet::BinaryOp(ref op) => {
396 Some(ClassFrame::Binary { op })
397 }
398 }
399 }
400 ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
401 if x.items.is_empty() {
402 None
403 } else {
404 Some(ClassFrame::Union {
405 head: &x.items[0],
406 tail: &x.items[1..],
407 })
408 }
409 }
410 ClassInduct::BinaryOp(op) => {
411 Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
412 }
413 _ => None,
414 }
415 }
416
417 /// Pops the given frame. If the frame has an additional inductive step,
418 /// then return it, otherwise return `None`.
419 fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
420 match induct {
421 ClassFrame::Union { tail, .. } => {
422 if tail.is_empty() {
423 None
424 } else {
425 Some(ClassFrame::Union {
426 head: &tail[0],
427 tail: &tail[1..],
428 })
429 }
430 }
431 ClassFrame::Binary { .. } => None,
432 ClassFrame::BinaryLHS { op, rhs, .. } => {
433 Some(ClassFrame::BinaryRHS { op, rhs })
434 }
435 ClassFrame::BinaryRHS { .. } => None,
436 }
437 }
438}
439
440impl<'a> Frame<'a> {
441 /// Perform the next inductive step on this frame and return the next
442 /// child AST node to visit.
443 fn child(&self) -> &'a Ast {
444 match *self {
445 Frame::Repetition(rep: &Repetition) => &rep.ast,
446 Frame::Group(group: &Group) => &group.ast,
447 Frame::Concat { head: &Ast, .. } => head,
448 Frame::Alternation { head: &Ast, .. } => head,
449 }
450 }
451}
452
453impl<'a> ClassFrame<'a> {
454 /// Perform the next inductive step on this frame and return the next
455 /// child class node to visit.
456 fn child(&self) -> ClassInduct<'a> {
457 match *self {
458 ClassFrame::Union { head: &ClassSetItem, .. } => ClassInduct::Item(head),
459 ClassFrame::Binary { op: &ClassSetBinaryOp, .. } => ClassInduct::BinaryOp(op),
460 ClassFrame::BinaryLHS { ref lhs: &&ClassSet, .. } => {
461 ClassInduct::from_set(ast:lhs)
462 }
463 ClassFrame::BinaryRHS { ref rhs: &&ClassSet, .. } => {
464 ClassInduct::from_set(ast:rhs)
465 }
466 }
467 }
468}
469
470impl<'a> ClassInduct<'a> {
471 fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
472 ClassInduct::from_set(&ast.kind)
473 }
474
475 fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
476 match *ast {
477 ast::ClassSet::Item(ref item: &ClassSetItem) => ClassInduct::Item(item),
478 ast::ClassSet::BinaryOp(ref op: &ClassSetBinaryOp) => ClassInduct::BinaryOp(op),
479 }
480 }
481}
482
483impl<'a> core::fmt::Debug for ClassFrame<'a> {
484 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
485 let x: &str = match *self {
486 ClassFrame::Union { .. } => "Union",
487 ClassFrame::Binary { .. } => "Binary",
488 ClassFrame::BinaryLHS { .. } => "BinaryLHS",
489 ClassFrame::BinaryRHS { .. } => "BinaryRHS",
490 };
491 write!(f, "{}", x)
492 }
493}
494
495impl<'a> core::fmt::Debug for ClassInduct<'a> {
496 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
497 let x = match *self {
498 ClassInduct::Item(it) => match *it {
499 ast::ClassSetItem::Empty(_) => "Item(Empty)",
500 ast::ClassSetItem::Literal(_) => "Item(Literal)",
501 ast::ClassSetItem::Range(_) => "Item(Range)",
502 ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
503 ast::ClassSetItem::Perl(_) => "Item(Perl)",
504 ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
505 ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
506 ast::ClassSetItem::Union(_) => "Item(Union)",
507 },
508 ClassInduct::BinaryOp(it) => match it.kind {
509 ast::ClassSetBinaryOpKind::Intersection => {
510 "BinaryOp(Intersection)"
511 }
512 ast::ClassSetBinaryOpKind::Difference => {
513 "BinaryOp(Difference)"
514 }
515 ast::ClassSetBinaryOpKind::SymmetricDifference => {
516 "BinaryOp(SymmetricDifference)"
517 }
518 },
519 };
520 write!(f, "{}", x)
521 }
522}
523