1 | use alloc::{vec, vec::Vec}; |
2 | |
3 | use crate::hir::{self, Hir, HirKind}; |
4 | |
5 | /// A trait for visiting the high-level IR (HIR) in depth first order. |
6 | /// |
7 | /// The principle aim of this trait is to enable callers to perform case |
8 | /// analysis on a high-level intermediate representation of a regular |
9 | /// expression without necessarily using recursion. In particular, this permits |
10 | /// callers to do case analysis with constant stack usage, which can be |
11 | /// important since the size of an HIR may be proportional to end user input. |
12 | /// |
13 | /// Typical usage of this trait involves providing an implementation and then |
14 | /// running it using the [`visit`] function. |
15 | pub trait Visitor { |
16 | /// The result of visiting an HIR. |
17 | type Output; |
18 | /// An error that visiting an HIR might return. |
19 | type Err; |
20 | |
21 | /// All implementors of `Visitor` must provide a `finish` method, which |
22 | /// yields the result of visiting the HIR or an error. |
23 | fn finish(self) -> Result<Self::Output, Self::Err>; |
24 | |
25 | /// This method is called before beginning traversal of the HIR. |
26 | fn start(&mut self) {} |
27 | |
28 | /// This method is called on an `Hir` before descending into child `Hir` |
29 | /// nodes. |
30 | fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> { |
31 | Ok(()) |
32 | } |
33 | |
34 | /// This method is called on an `Hir` after descending all of its child |
35 | /// `Hir` nodes. |
36 | fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> { |
37 | Ok(()) |
38 | } |
39 | |
40 | /// This method is called between child nodes of an alternation. |
41 | fn visit_alternation_in(&mut self) -> Result<(), Self::Err> { |
42 | Ok(()) |
43 | } |
44 | |
45 | /// This method is called between child nodes of a concatenation. |
46 | fn visit_concat_in(&mut self) -> Result<(), Self::Err> { |
47 | Ok(()) |
48 | } |
49 | } |
50 | |
51 | /// Executes an implementation of `Visitor` in constant stack space. |
52 | /// |
53 | /// This function will visit every node in the given `Hir` while calling |
54 | /// appropriate methods provided by the [`Visitor`] trait. |
55 | /// |
56 | /// The primary use case for this method is when one wants to perform case |
57 | /// analysis over an `Hir` without using a stack size proportional to the depth |
58 | /// of the `Hir`. Namely, this method will instead use constant stack space, |
59 | /// but will use heap space proportional to the size of the `Hir`. This may be |
60 | /// desirable in cases where the size of `Hir` is proportional to end user |
61 | /// input. |
62 | /// |
63 | /// If the visitor returns an error at any point, then visiting is stopped and |
64 | /// the error is returned. |
65 | pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> { |
66 | HeapVisitor::new().visit(hir, visitor) |
67 | } |
68 | |
69 | /// HeapVisitor visits every item in an `Hir` recursively using constant stack |
70 | /// size and a heap size proportional to the size of the `Hir`. |
71 | struct HeapVisitor<'a> { |
72 | /// A stack of `Hir` nodes. This is roughly analogous to the call stack |
73 | /// used in a typical recursive visitor. |
74 | stack: Vec<(&'a Hir, Frame<'a>)>, |
75 | } |
76 | |
77 | /// Represents a single stack frame while performing structural induction over |
78 | /// an `Hir`. |
79 | enum Frame<'a> { |
80 | /// A stack frame allocated just before descending into a repetition |
81 | /// operator's child node. |
82 | Repetition(&'a hir::Repetition), |
83 | /// A stack frame allocated just before descending into a capture's child |
84 | /// node. |
85 | Capture(&'a hir::Capture), |
86 | /// The stack frame used while visiting every child node of a concatenation |
87 | /// of expressions. |
88 | Concat { |
89 | /// The child node we are currently visiting. |
90 | head: &'a Hir, |
91 | /// The remaining child nodes to visit (which may be empty). |
92 | tail: &'a [Hir], |
93 | }, |
94 | /// The stack frame used while visiting every child node of an alternation |
95 | /// of expressions. |
96 | Alternation { |
97 | /// The child node we are currently visiting. |
98 | head: &'a Hir, |
99 | /// The remaining child nodes to visit (which may be empty). |
100 | tail: &'a [Hir], |
101 | }, |
102 | } |
103 | |
104 | impl<'a> HeapVisitor<'a> { |
105 | fn new() -> HeapVisitor<'a> { |
106 | HeapVisitor { stack: vec![] } |
107 | } |
108 | |
109 | fn visit<V: Visitor>( |
110 | &mut self, |
111 | mut hir: &'a Hir, |
112 | mut visitor: V, |
113 | ) -> Result<V::Output, V::Err> { |
114 | self.stack.clear(); |
115 | |
116 | visitor.start(); |
117 | loop { |
118 | visitor.visit_pre(hir)?; |
119 | if let Some(x) = self.induct(hir) { |
120 | let child = x.child(); |
121 | self.stack.push((hir, x)); |
122 | hir = child; |
123 | continue; |
124 | } |
125 | // No induction means we have a base case, so we can post visit |
126 | // it now. |
127 | visitor.visit_post(hir)?; |
128 | |
129 | // At this point, we now try to pop our call stack until it is |
130 | // either empty or we hit another inductive case. |
131 | loop { |
132 | let (post_hir, frame) = match self.stack.pop() { |
133 | None => return visitor.finish(), |
134 | Some((post_hir, frame)) => (post_hir, frame), |
135 | }; |
136 | // If this is a concat/alternate, then we might have additional |
137 | // inductive steps to process. |
138 | if let Some(x) = self.pop(frame) { |
139 | match x { |
140 | Frame::Alternation { .. } => { |
141 | visitor.visit_alternation_in()?; |
142 | } |
143 | Frame::Concat { .. } => { |
144 | visitor.visit_concat_in()?; |
145 | } |
146 | _ => {} |
147 | } |
148 | hir = x.child(); |
149 | self.stack.push((post_hir, x)); |
150 | break; |
151 | } |
152 | // Otherwise, we've finished visiting all the child nodes for |
153 | // this HIR, so we can post visit it now. |
154 | visitor.visit_post(post_hir)?; |
155 | } |
156 | } |
157 | } |
158 | |
159 | /// Build a stack frame for the given HIR if one is needed (which occurs if |
160 | /// and only if there are child nodes in the HIR). Otherwise, return None. |
161 | fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> { |
162 | match *hir.kind() { |
163 | HirKind::Repetition(ref x) => Some(Frame::Repetition(x)), |
164 | HirKind::Capture(ref x) => Some(Frame::Capture(x)), |
165 | HirKind::Concat(ref x) if x.is_empty() => None, |
166 | HirKind::Concat(ref x) => { |
167 | Some(Frame::Concat { head: &x[0], tail: &x[1..] }) |
168 | } |
169 | HirKind::Alternation(ref x) if x.is_empty() => None, |
170 | HirKind::Alternation(ref x) => { |
171 | Some(Frame::Alternation { head: &x[0], tail: &x[1..] }) |
172 | } |
173 | _ => None, |
174 | } |
175 | } |
176 | |
177 | /// Pops the given frame. If the frame has an additional inductive step, |
178 | /// then return it, otherwise return `None`. |
179 | fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> { |
180 | match induct { |
181 | Frame::Repetition(_) => None, |
182 | Frame::Capture(_) => None, |
183 | Frame::Concat { tail, .. } => { |
184 | if tail.is_empty() { |
185 | None |
186 | } else { |
187 | Some(Frame::Concat { head: &tail[0], tail: &tail[1..] }) |
188 | } |
189 | } |
190 | Frame::Alternation { tail, .. } => { |
191 | if tail.is_empty() { |
192 | None |
193 | } else { |
194 | Some(Frame::Alternation { |
195 | head: &tail[0], |
196 | tail: &tail[1..], |
197 | }) |
198 | } |
199 | } |
200 | } |
201 | } |
202 | } |
203 | |
204 | impl<'a> Frame<'a> { |
205 | /// Perform the next inductive step on this frame and return the next |
206 | /// child HIR node to visit. |
207 | fn child(&self) -> &'a Hir { |
208 | match *self { |
209 | Frame::Repetition(rep) => &rep.sub, |
210 | Frame::Capture(capture) => &capture.sub, |
211 | Frame::Concat { head, .. } => head, |
212 | Frame::Alternation { head, .. } => head, |
213 | } |
214 | } |
215 | } |
216 | |