1 | use alloc::{vec, vec::Vec}; |
2 | |
3 | use crate::hir::{self, Hir, HirKind}; |
4 | |
5 | /// A trait for visiting the high-level IR (HIR) in depth first order. |
6 | /// |
7 | /// The principle aim of this trait is to enable callers to perform case |
8 | /// analysis on a high-level intermediate representation of a regular |
9 | /// expression without necessarily using recursion. In particular, this permits |
10 | /// callers to do case analysis with constant stack usage, which can be |
11 | /// important since the size of an HIR may be proportional to end user input. |
12 | /// |
13 | /// Typical usage of this trait involves providing an implementation and then |
14 | /// running it using the [`visit`] function. |
15 | pub trait Visitor { |
16 | /// The result of visiting an HIR. |
17 | type Output; |
18 | /// An error that visiting an HIR might return. |
19 | type Err; |
20 | |
21 | /// All implementors of `Visitor` must provide a `finish` method, which |
22 | /// yields the result of visiting the HIR or an error. |
23 | fn finish(self) -> Result<Self::Output, Self::Err>; |
24 | |
25 | /// This method is called before beginning traversal of the HIR. |
26 | fn start(&mut self) {} |
27 | |
28 | /// This method is called on an `Hir` before descending into child `Hir` |
29 | /// nodes. |
30 | fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> { |
31 | Ok(()) |
32 | } |
33 | |
34 | /// This method is called on an `Hir` after descending all of its child |
35 | /// `Hir` nodes. |
36 | fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> { |
37 | Ok(()) |
38 | } |
39 | |
40 | /// This method is called between child nodes of an alternation. |
41 | fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { |
42 | Ok(()) |
43 | } |
44 | } |
45 | |
46 | /// Executes an implementation of `Visitor` in constant stack space. |
47 | /// |
48 | /// This function will visit every node in the given `Hir` while calling |
49 | /// appropriate methods provided by the [`Visitor`] trait. |
50 | /// |
51 | /// The primary use case for this method is when one wants to perform case |
52 | /// analysis over an `Hir` without using a stack size proportional to the depth |
53 | /// of the `Hir`. Namely, this method will instead use constant stack space, |
54 | /// but will use heap space proportional to the size of the `Hir`. This may be |
55 | /// desirable in cases where the size of `Hir` is proportional to end user |
56 | /// input. |
57 | /// |
58 | /// If the visitor returns an error at any point, then visiting is stopped and |
59 | /// the error is returned. |
60 | pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> { |
61 | HeapVisitor::new().visit(hir, visitor) |
62 | } |
63 | |
64 | /// HeapVisitor visits every item in an `Hir` recursively using constant stack |
65 | /// size and a heap size proportional to the size of the `Hir`. |
66 | struct HeapVisitor<'a> { |
67 | /// A stack of `Hir` nodes. This is roughly analogous to the call stack |
68 | /// used in a typical recursive visitor. |
69 | stack: Vec<(&'a Hir, Frame<'a>)>, |
70 | } |
71 | |
72 | /// Represents a single stack frame while performing structural induction over |
73 | /// an `Hir`. |
74 | enum Frame<'a> { |
75 | /// A stack frame allocated just before descending into a repetition |
76 | /// operator's child node. |
77 | Repetition(&'a hir::Repetition), |
78 | /// A stack frame allocated just before descending into a capture's child |
79 | /// node. |
80 | Capture(&'a hir::Capture), |
81 | /// The stack frame used while visiting every child node of a concatenation |
82 | /// of expressions. |
83 | Concat { |
84 | /// The child node we are currently visiting. |
85 | head: &'a Hir, |
86 | /// The remaining child nodes to visit (which may be empty). |
87 | tail: &'a [Hir], |
88 | }, |
89 | /// The stack frame used while visiting every child node of an alternation |
90 | /// of expressions. |
91 | Alternation { |
92 | /// The child node we are currently visiting. |
93 | head: &'a Hir, |
94 | /// The remaining child nodes to visit (which may be empty). |
95 | tail: &'a [Hir], |
96 | }, |
97 | } |
98 | |
99 | impl<'a> HeapVisitor<'a> { |
100 | fn new() -> HeapVisitor<'a> { |
101 | HeapVisitor { stack: vec![] } |
102 | } |
103 | |
104 | fn visit<V: Visitor>( |
105 | &mut self, |
106 | mut hir: &'a Hir, |
107 | mut visitor: V, |
108 | ) -> Result<V::Output, V::Err> { |
109 | self.stack.clear(); |
110 | |
111 | visitor.start(); |
112 | loop { |
113 | visitor.visit_pre(hir)?; |
114 | if let Some(x) = self.induct(hir) { |
115 | let child = x.child(); |
116 | self.stack.push((hir, x)); |
117 | hir = child; |
118 | continue; |
119 | } |
120 | // No induction means we have a base case, so we can post visit |
121 | // it now. |
122 | visitor.visit_post(hir)?; |
123 | |
124 | // At this point, we now try to pop our call stack until it is |
125 | // either empty or we hit another inductive case. |
126 | loop { |
127 | let (post_hir, frame) = match self.stack.pop() { |
128 | None => return visitor.finish(), |
129 | Some((post_hir, frame)) => (post_hir, frame), |
130 | }; |
131 | // If this is a concat/alternate, then we might have additional |
132 | // inductive steps to process. |
133 | if let Some(x) = self.pop(frame) { |
134 | if let Frame::Alternation { .. } = x { |
135 | visitor.visit_alternation_in()?; |
136 | } |
137 | hir = x.child(); |
138 | self.stack.push((post_hir, x)); |
139 | break; |
140 | } |
141 | // Otherwise, we've finished visiting all the child nodes for |
142 | // this HIR, so we can post visit it now. |
143 | visitor.visit_post(post_hir)?; |
144 | } |
145 | } |
146 | } |
147 | |
148 | /// Build a stack frame for the given HIR if one is needed (which occurs if |
149 | /// and only if there are child nodes in the HIR). Otherwise, return None. |
150 | fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> { |
151 | match *hir.kind() { |
152 | HirKind::Repetition(ref x) => Some(Frame::Repetition(x)), |
153 | HirKind::Capture(ref x) => Some(Frame::Capture(x)), |
154 | HirKind::Concat(ref x) if x.is_empty() => None, |
155 | HirKind::Concat(ref x) => { |
156 | Some(Frame::Concat { head: &x[0], tail: &x[1..] }) |
157 | } |
158 | HirKind::Alternation(ref x) if x.is_empty() => None, |
159 | HirKind::Alternation(ref x) => { |
160 | Some(Frame::Alternation { head: &x[0], tail: &x[1..] }) |
161 | } |
162 | _ => None, |
163 | } |
164 | } |
165 | |
166 | /// Pops the given frame. If the frame has an additional inductive step, |
167 | /// then return it, otherwise return `None`. |
168 | fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> { |
169 | match induct { |
170 | Frame::Repetition(_) => None, |
171 | Frame::Capture(_) => None, |
172 | Frame::Concat { tail, .. } => { |
173 | if tail.is_empty() { |
174 | None |
175 | } else { |
176 | Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) |
177 | } |
178 | } |
179 | Frame::Alternation { tail, .. } => { |
180 | if tail.is_empty() { |
181 | None |
182 | } else { |
183 | Some(Frame::Alternation { |
184 | head: &tail[0], |
185 | tail: &tail[1..], |
186 | }) |
187 | } |
188 | } |
189 | } |
190 | } |
191 | } |
192 | |
193 | impl<'a> Frame<'a> { |
194 | /// Perform the next inductive step on this frame and return the next |
195 | /// child HIR node to visit. |
196 | fn child(&self) -> &'a Hir { |
197 | match *self { |
198 | Frame::Repetition(rep: &Repetition) => &rep.sub, |
199 | Frame::Capture(capture: &Capture) => &capture.sub, |
200 | Frame::Concat { head: &Hir, .. } => head, |
201 | Frame::Alternation { head: &Hir, .. } => head, |
202 | } |
203 | } |
204 | } |
205 | |