1use std::fmt;
2
3use crate::ast::{self, Ast};
4
5/// A trait for visiting an abstract syntax tree (AST) in depth first order.
6///
7/// The principle aim of this trait is to enable callers to perform case
8/// analysis on an abstract syntax tree without necessarily using recursion.
9/// In particular, this permits callers to do case analysis with constant stack
10/// usage, which can be important since the size of an abstract syntax tree
11/// may be proportional to end user input.
12///
13/// Typical usage of this trait involves providing an implementation and then
14/// running it using the [`visit`](fn.visit.html) function.
15///
16/// Note that the abstract syntax tree for a regular expression is quite
17/// complex. Unless you specifically need it, you might be able to use the
18/// much simpler
19/// [high-level intermediate representation](../hir/struct.Hir.html)
20/// and its
21/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
22/// instead.
23pub trait Visitor {
24 /// The result of visiting an AST.
25 type Output;
26 /// An error that visiting an AST might return.
27 type Err;
28
29 /// All implementors of `Visitor` must provide a `finish` method, which
30 /// yields the result of visiting the AST or an error.
31 fn finish(self) -> Result<Self::Output, Self::Err>;
32
33 /// This method is called before beginning traversal of the AST.
34 fn start(&mut self) {}
35
36 /// This method is called on an `Ast` before descending into child `Ast`
37 /// nodes.
38 fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
39 Ok(())
40 }
41
42 /// This method is called on an `Ast` after descending all of its child
43 /// `Ast` nodes.
44 fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
45 Ok(())
46 }
47
48 /// This method is called between child nodes of an
49 /// [`Alternation`](struct.Alternation.html).
50 fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
51 Ok(())
52 }
53
54 /// This method is called on every
55 /// [`ClassSetItem`](enum.ClassSetItem.html)
56 /// before descending into child nodes.
57 fn visit_class_set_item_pre(
58 &mut self,
59 _ast: &ast::ClassSetItem,
60 ) -> Result<(), Self::Err> {
61 Ok(())
62 }
63
64 /// This method is called on every
65 /// [`ClassSetItem`](enum.ClassSetItem.html)
66 /// after descending into child nodes.
67 fn visit_class_set_item_post(
68 &mut self,
69 _ast: &ast::ClassSetItem,
70 ) -> Result<(), Self::Err> {
71 Ok(())
72 }
73
74 /// This method is called on every
75 /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
76 /// before descending into child nodes.
77 fn visit_class_set_binary_op_pre(
78 &mut self,
79 _ast: &ast::ClassSetBinaryOp,
80 ) -> Result<(), Self::Err> {
81 Ok(())
82 }
83
84 /// This method is called on every
85 /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
86 /// after descending into child nodes.
87 fn visit_class_set_binary_op_post(
88 &mut self,
89 _ast: &ast::ClassSetBinaryOp,
90 ) -> Result<(), Self::Err> {
91 Ok(())
92 }
93
94 /// This method is called between the left hand and right hand child nodes
95 /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
96 fn visit_class_set_binary_op_in(
97 &mut self,
98 _ast: &ast::ClassSetBinaryOp,
99 ) -> Result<(), Self::Err> {
100 Ok(())
101 }
102}
103
104/// Executes an implementation of `Visitor` in constant stack space.
105///
106/// This function will visit every node in the given `Ast` while calling the
107/// appropriate methods provided by the
108/// [`Visitor`](trait.Visitor.html) trait.
109///
110/// The primary use case for this method is when one wants to perform case
111/// analysis over an `Ast` without using a stack size proportional to the depth
112/// of the `Ast`. Namely, this method will instead use constant stack size, but
113/// will use heap space proportional to the size of the `Ast`. This may be
114/// desirable in cases where the size of `Ast` is proportional to end user
115/// input.
116///
117/// If the visitor returns an error at any point, then visiting is stopped and
118/// the error is returned.
119pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
120 HeapVisitor::new().visit(ast, visitor)
121}
122
123/// HeapVisitor visits every item in an `Ast` recursively using constant stack
124/// size and a heap size proportional to the size of the `Ast`.
125struct HeapVisitor<'a> {
126 /// A stack of `Ast` nodes. This is roughly analogous to the call stack
127 /// used in a typical recursive visitor.
128 stack: Vec<(&'a Ast, Frame<'a>)>,
129 /// Similar to the `Ast` stack above, but is used only for character
130 /// classes. In particular, character classes embed their own mini
131 /// recursive syntax.
132 stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
133}
134
135/// Represents a single stack frame while performing structural induction over
136/// an `Ast`.
137enum Frame<'a> {
138 /// A stack frame allocated just before descending into a repetition
139 /// operator's child node.
140 Repetition(&'a ast::Repetition),
141 /// A stack frame allocated just before descending into a group's child
142 /// node.
143 Group(&'a ast::Group),
144 /// The stack frame used while visiting every child node of a concatenation
145 /// of expressions.
146 Concat {
147 /// The child node we are currently visiting.
148 head: &'a Ast,
149 /// The remaining child nodes to visit (which may be empty).
150 tail: &'a [Ast],
151 },
152 /// The stack frame used while visiting every child node of an alternation
153 /// of expressions.
154 Alternation {
155 /// The child node we are currently visiting.
156 head: &'a Ast,
157 /// The remaining child nodes to visit (which may be empty).
158 tail: &'a [Ast],
159 },
160}
161
162/// Represents a single stack frame while performing structural induction over
163/// a character class.
164enum ClassFrame<'a> {
165 /// The stack frame used while visiting every child node of a union of
166 /// character class items.
167 Union {
168 /// The child node we are currently visiting.
169 head: &'a ast::ClassSetItem,
170 /// The remaining child nodes to visit (which may be empty).
171 tail: &'a [ast::ClassSetItem],
172 },
173 /// The stack frame used while a binary class operation.
174 Binary { op: &'a ast::ClassSetBinaryOp },
175 /// A stack frame allocated just before descending into a binary operator's
176 /// left hand child node.
177 BinaryLHS {
178 op: &'a ast::ClassSetBinaryOp,
179 lhs: &'a ast::ClassSet,
180 rhs: &'a ast::ClassSet,
181 },
182 /// A stack frame allocated just before descending into a binary operator's
183 /// right hand child node.
184 BinaryRHS { op: &'a ast::ClassSetBinaryOp, rhs: &'a ast::ClassSet },
185}
186
187/// A representation of the inductive step when performing structural induction
188/// over a character class.
189///
190/// Note that there is no analogous explicit type for the inductive step for
191/// `Ast` nodes because the inductive step is just an `Ast`. For character
192/// classes, the inductive step can produce one of two possible child nodes:
193/// an item or a binary operation. (An item cannot be a binary operation
194/// because that would imply binary operations can be unioned in the concrete
195/// syntax, which is not possible.)
196enum ClassInduct<'a> {
197 Item(&'a ast::ClassSetItem),
198 BinaryOp(&'a ast::ClassSetBinaryOp),
199}
200
201impl<'a> HeapVisitor<'a> {
202 fn new() -> HeapVisitor<'a> {
203 HeapVisitor { stack: vec![], stack_class: vec![] }
204 }
205
206 fn visit<V: Visitor>(
207 &mut self,
208 mut ast: &'a Ast,
209 mut visitor: V,
210 ) -> Result<V::Output, V::Err> {
211 self.stack.clear();
212 self.stack_class.clear();
213
214 visitor.start();
215 loop {
216 visitor.visit_pre(ast)?;
217 if let Some(x) = self.induct(ast, &mut visitor)? {
218 let child = x.child();
219 self.stack.push((ast, x));
220 ast = child;
221 continue;
222 }
223 // No induction means we have a base case, so we can post visit
224 // it now.
225 visitor.visit_post(ast)?;
226
227 // At this point, we now try to pop our call stack until it is
228 // either empty or we hit another inductive case.
229 loop {
230 let (post_ast, frame) = match self.stack.pop() {
231 None => return visitor.finish(),
232 Some((post_ast, frame)) => (post_ast, frame),
233 };
234 // If this is a concat/alternate, then we might have additional
235 // inductive steps to process.
236 if let Some(x) = self.pop(frame) {
237 if let Frame::Alternation { .. } = x {
238 visitor.visit_alternation_in()?;
239 }
240 ast = x.child();
241 self.stack.push((post_ast, x));
242 break;
243 }
244 // Otherwise, we've finished visiting all the child nodes for
245 // this AST, so we can post visit it now.
246 visitor.visit_post(post_ast)?;
247 }
248 }
249 }
250
251 /// Build a stack frame for the given AST if one is needed (which occurs if
252 /// and only if there are child nodes in the AST). Otherwise, return None.
253 ///
254 /// If this visits a class, then the underlying visitor implementation may
255 /// return an error which will be passed on here.
256 fn induct<V: Visitor>(
257 &mut self,
258 ast: &'a Ast,
259 visitor: &mut V,
260 ) -> Result<Option<Frame<'a>>, V::Err> {
261 Ok(match *ast {
262 Ast::Class(ast::Class::Bracketed(ref x)) => {
263 self.visit_class(x, visitor)?;
264 None
265 }
266 Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
267 Ast::Group(ref x) => Some(Frame::Group(x)),
268 Ast::Concat(ref x) if x.asts.is_empty() => None,
269 Ast::Concat(ref x) => {
270 Some(Frame::Concat { head: &x.asts[0], tail: &x.asts[1..] })
271 }
272 Ast::Alternation(ref x) if x.asts.is_empty() => None,
273 Ast::Alternation(ref x) => Some(Frame::Alternation {
274 head: &x.asts[0],
275 tail: &x.asts[1..],
276 }),
277 _ => None,
278 })
279 }
280
281 /// Pops the given frame. If the frame has an additional inductive step,
282 /// then return it, otherwise return `None`.
283 fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
284 match induct {
285 Frame::Repetition(_) => None,
286 Frame::Group(_) => None,
287 Frame::Concat { tail, .. } => {
288 if tail.is_empty() {
289 None
290 } else {
291 Some(Frame::Concat { head: &tail[0], tail: &tail[1..] })
292 }
293 }
294 Frame::Alternation { tail, .. } => {
295 if tail.is_empty() {
296 None
297 } else {
298 Some(Frame::Alternation {
299 head: &tail[0],
300 tail: &tail[1..],
301 })
302 }
303 }
304 }
305 }
306
307 fn visit_class<V: Visitor>(
308 &mut self,
309 ast: &'a ast::ClassBracketed,
310 visitor: &mut V,
311 ) -> Result<(), V::Err> {
312 let mut ast = ClassInduct::from_bracketed(ast);
313 loop {
314 self.visit_class_pre(&ast, visitor)?;
315 if let Some(x) = self.induct_class(&ast) {
316 let child = x.child();
317 self.stack_class.push((ast, x));
318 ast = child;
319 continue;
320 }
321 self.visit_class_post(&ast, visitor)?;
322
323 // At this point, we now try to pop our call stack until it is
324 // either empty or we hit another inductive case.
325 loop {
326 let (post_ast, frame) = match self.stack_class.pop() {
327 None => return Ok(()),
328 Some((post_ast, frame)) => (post_ast, frame),
329 };
330 // If this is a union or a binary op, then we might have
331 // additional inductive steps to process.
332 if let Some(x) = self.pop_class(frame) {
333 if let ClassFrame::BinaryRHS { ref op, .. } = x {
334 visitor.visit_class_set_binary_op_in(op)?;
335 }
336 ast = x.child();
337 self.stack_class.push((post_ast, x));
338 break;
339 }
340 // Otherwise, we've finished visiting all the child nodes for
341 // this class node, so we can post visit it now.
342 self.visit_class_post(&post_ast, visitor)?;
343 }
344 }
345 }
346
347 /// Call the appropriate `Visitor` methods given an inductive step.
348 fn visit_class_pre<V: Visitor>(
349 &self,
350 ast: &ClassInduct<'a>,
351 visitor: &mut V,
352 ) -> Result<(), V::Err> {
353 match *ast {
354 ClassInduct::Item(item) => {
355 visitor.visit_class_set_item_pre(item)?;
356 }
357 ClassInduct::BinaryOp(op) => {
358 visitor.visit_class_set_binary_op_pre(op)?;
359 }
360 }
361 Ok(())
362 }
363
364 /// Call the appropriate `Visitor` methods given an inductive step.
365 fn visit_class_post<V: Visitor>(
366 &self,
367 ast: &ClassInduct<'a>,
368 visitor: &mut V,
369 ) -> Result<(), V::Err> {
370 match *ast {
371 ClassInduct::Item(item) => {
372 visitor.visit_class_set_item_post(item)?;
373 }
374 ClassInduct::BinaryOp(op) => {
375 visitor.visit_class_set_binary_op_post(op)?;
376 }
377 }
378 Ok(())
379 }
380
381 /// Build a stack frame for the given class node if one is needed (which
382 /// occurs if and only if there are child nodes). Otherwise, return None.
383 fn induct_class(&self, ast: &ClassInduct<'a>) -> Option<ClassFrame<'a>> {
384 match *ast {
385 ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
386 match x.kind {
387 ast::ClassSet::Item(ref item) => {
388 Some(ClassFrame::Union { head: item, tail: &[] })
389 }
390 ast::ClassSet::BinaryOp(ref op) => {
391 Some(ClassFrame::Binary { op })
392 }
393 }
394 }
395 ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
396 if x.items.is_empty() {
397 None
398 } else {
399 Some(ClassFrame::Union {
400 head: &x.items[0],
401 tail: &x.items[1..],
402 })
403 }
404 }
405 ClassInduct::BinaryOp(op) => {
406 Some(ClassFrame::BinaryLHS { op, lhs: &op.lhs, rhs: &op.rhs })
407 }
408 _ => None,
409 }
410 }
411
412 /// Pops the given frame. If the frame has an additional inductive step,
413 /// then return it, otherwise return `None`.
414 fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
415 match induct {
416 ClassFrame::Union { tail, .. } => {
417 if tail.is_empty() {
418 None
419 } else {
420 Some(ClassFrame::Union {
421 head: &tail[0],
422 tail: &tail[1..],
423 })
424 }
425 }
426 ClassFrame::Binary { .. } => None,
427 ClassFrame::BinaryLHS { op, rhs, .. } => {
428 Some(ClassFrame::BinaryRHS { op, rhs })
429 }
430 ClassFrame::BinaryRHS { .. } => None,
431 }
432 }
433}
434
435impl<'a> Frame<'a> {
436 /// Perform the next inductive step on this frame and return the next
437 /// child AST node to visit.
438 fn child(&self) -> &'a Ast {
439 match *self {
440 Frame::Repetition(rep) => &rep.ast,
441 Frame::Group(group) => &group.ast,
442 Frame::Concat { head, .. } => head,
443 Frame::Alternation { head, .. } => head,
444 }
445 }
446}
447
448impl<'a> ClassFrame<'a> {
449 /// Perform the next inductive step on this frame and return the next
450 /// child class node to visit.
451 fn child(&self) -> ClassInduct<'a> {
452 match *self {
453 ClassFrame::Union { head, .. } => ClassInduct::Item(head),
454 ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
455 ClassFrame::BinaryLHS { ref lhs, .. } => {
456 ClassInduct::from_set(lhs)
457 }
458 ClassFrame::BinaryRHS { ref rhs, .. } => {
459 ClassInduct::from_set(rhs)
460 }
461 }
462 }
463}
464
465impl<'a> ClassInduct<'a> {
466 fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
467 ClassInduct::from_set(&ast.kind)
468 }
469
470 fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
471 match *ast {
472 ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
473 ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
474 }
475 }
476}
477
478impl<'a> fmt::Debug for ClassFrame<'a> {
479 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
480 let x = match *self {
481 ClassFrame::Union { .. } => "Union",
482 ClassFrame::Binary { .. } => "Binary",
483 ClassFrame::BinaryLHS { .. } => "BinaryLHS",
484 ClassFrame::BinaryRHS { .. } => "BinaryRHS",
485 };
486 write!(f, "{}", x)
487 }
488}
489
490impl<'a> fmt::Debug for ClassInduct<'a> {
491 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
492 let x = match *self {
493 ClassInduct::Item(it) => match *it {
494 ast::ClassSetItem::Empty(_) => "Item(Empty)",
495 ast::ClassSetItem::Literal(_) => "Item(Literal)",
496 ast::ClassSetItem::Range(_) => "Item(Range)",
497 ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
498 ast::ClassSetItem::Perl(_) => "Item(Perl)",
499 ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
500 ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
501 ast::ClassSetItem::Union(_) => "Item(Union)",
502 },
503 ClassInduct::BinaryOp(it) => match it.kind {
504 ast::ClassSetBinaryOpKind::Intersection => {
505 "BinaryOp(Intersection)"
506 }
507 ast::ClassSetBinaryOpKind::Difference => {
508 "BinaryOp(Difference)"
509 }
510 ast::ClassSetBinaryOpKind::SymmetricDifference => {
511 "BinaryOp(SymmetricDifference)"
512 }
513 },
514 };
515 write!(f, "{}", x)
516 }
517}
518