1use alloc::vec::Vec;
2
3use crate::{nfa::noncontiguous, util::primitives::StateID};
4
5/// Remappable is a tightly coupled abstraction that facilitates remapping
6/// state identifiers in DFAs.
7///
8/// The main idea behind remapping state IDs is that DFAs often need to check
9/// if a certain state is a "special" state of some kind (like a match state)
10/// during a search. Since this is extremely perf critical code, we want this
11/// check to be as fast as possible. Partitioning state IDs into, for example,
12/// into "non-match" and "match" states means one can tell if a state is a
13/// match state via a simple comparison of the state ID.
14///
15/// The issue is that during the DFA construction process, it's not
16/// particularly easy to partition the states. Instead, the simplest thing is
17/// to often just do a pass over all of the states and shuffle them into their
18/// desired partitionings. To do that, we need a mechanism for swapping states.
19/// Hence, this abstraction.
20///
21/// Normally, for such little code, I would just duplicate it. But this is a
22/// key optimization and the implementation is a bit subtle. So the abstraction
23/// is basically a ham-fisted attempt at DRY. The only place we use this is in
24/// the dense and one-pass DFAs.
25///
26/// See also src/dfa/special.rs for a more detailed explanation of how dense
27/// DFAs are partitioned.
28pub(crate) trait Remappable: core::fmt::Debug {
29 /// Return the total number of states.
30 fn state_len(&self) -> usize;
31
32 /// Swap the states pointed to by the given IDs. The underlying finite
33 /// state machine should be mutated such that all of the transitions in
34 /// `id1` are now in the memory region where the transitions for `id2`
35 /// were, and all of the transitions in `id2` are now in the memory region
36 /// where the transitions for `id1` were.
37 ///
38 /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
39 ///
40 /// It is expected that, after calling this, the underlying state machine
41 /// will be left in an inconsistent state, since any other transitions
42 /// pointing to, e.g., `id1` need to be updated to point to `id2`, since
43 /// that's where `id1` moved to.
44 ///
45 /// In order to "fix" the underlying inconsistent state, a `Remapper`
46 /// should be used to guarantee that `remap` is called at the appropriate
47 /// time.
48 fn swap_states(&mut self, id1: StateID, id2: StateID);
49
50 /// This must remap every single state ID in the underlying value according
51 /// to the function given. For example, in a DFA, this should remap every
52 /// transition and every starting state ID.
53 fn remap(&mut self, map: impl Fn(StateID) -> StateID);
54}
55
56/// Remapper is an abstraction the manages the remapping of state IDs in a
57/// finite state machine. This is useful when one wants to shuffle states into
58/// different positions in the machine.
59///
60/// One of the key complexities this manages is the ability to correctly move
61/// one state multiple times.
62///
63/// Once shuffling is complete, `remap` must be called, which will rewrite
64/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
65/// will almost certainly result in a corrupt machine.
66#[derive(Debug)]
67pub(crate) struct Remapper {
68 /// A map from the index of a state to its pre-multiplied identifier.
69 ///
70 /// When a state is swapped with another, then their corresponding
71 /// locations in this map are also swapped. Thus, its new position will
72 /// still point to its old pre-multiplied StateID.
73 ///
74 /// While there is a bit more to it, this then allows us to rewrite the
75 /// state IDs in a DFA's transition table in a single pass. This is done
76 /// by iterating over every ID in this map, then iterating over each
77 /// transition for the state at that ID and re-mapping the transition from
78 /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
79 /// in this map where `old_id` *started*, and set it to where it ended up
80 /// after all swaps have been completed.
81 map: Vec<StateID>,
82 /// A way to map indices to state IDs (and back).
83 idx: IndexMapper,
84}
85
86impl Remapper {
87 /// Create a new remapper from the given remappable implementation. The
88 /// remapper can then be used to swap states. The remappable value given
89 /// here must the same one given to `swap` and `remap`.
90 ///
91 /// The given stride should be the stride of the transition table expressed
92 /// as a power of 2. This stride is used to map between state IDs and state
93 /// indices. If state IDs and state indices are equivalent, then provide
94 /// a `stride2` of `0`, which acts as an identity.
95 pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper {
96 let idx = IndexMapper { stride2 };
97 let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect();
98 Remapper { map, idx }
99 }
100
101 /// Swap two states. Once this is called, callers must follow through to
102 /// call `remap`, or else it's possible for the underlying remappable
103 /// value to be in a corrupt state.
104 pub(crate) fn swap(
105 &mut self,
106 r: &mut impl Remappable,
107 id1: StateID,
108 id2: StateID,
109 ) {
110 if id1 == id2 {
111 return;
112 }
113 r.swap_states(id1, id2);
114 self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2));
115 }
116
117 /// Complete the remapping process by rewriting all state IDs in the
118 /// remappable value according to the swaps performed.
119 pub(crate) fn remap(mut self, r: &mut impl Remappable) {
120 // Update the map to account for states that have been swapped
121 // multiple times. For example, if (A, C) and (C, G) are swapped, then
122 // transitions previously pointing to A should now point to G. But if
123 // we don't update our map, they will erroneously be set to C. All we
124 // do is follow the swaps in our map until we see our original state
125 // ID.
126 //
127 // The intuition here is to think about how changes are made to the
128 // map: only through pairwise swaps. That means that starting at any
129 // given state, it is always possible to find the loop back to that
130 // state by following the swaps represented in the map (which might be
131 // 0 swaps).
132 //
133 // We are also careful to clone the map before starting in order to
134 // freeze it. We use the frozen map to find our loops, since we need to
135 // update our map as well. Without freezing it, our updates could break
136 // the loops referenced above and produce incorrect results.
137 let oldmap = self.map.clone();
138 for i in 0..r.state_len() {
139 let cur_id = self.idx.to_state_id(i);
140 let mut new_id = oldmap[i];
141 if cur_id == new_id {
142 continue;
143 }
144 loop {
145 let id = oldmap[self.idx.to_index(new_id)];
146 if cur_id == id {
147 self.map[i] = new_id;
148 break;
149 }
150 new_id = id;
151 }
152 }
153 r.remap(|sid| self.map[self.idx.to_index(sid)]);
154 }
155}
156
157/// A simple type for mapping between state indices and state IDs.
158///
159/// The reason why this exists is because state IDs are "premultiplied" in a
160/// DFA. That is, in order to get to the transitions for a particular state,
161/// one need only use the state ID as-is, instead of having to multiply it by
162/// transition table's stride.
163///
164/// The downside of this is that it's inconvenient to map between state IDs
165/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
166/// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`,
167/// etc.
168///
169/// Since our state IDs are premultiplied, we can convert back-and-forth
170/// between IDs and indices by simply unmultiplying the IDs and multiplying the
171/// indices.
172///
173/// Note that for a sparse NFA, state IDs and indices are equivalent. In this
174/// case, we set the stride of the index mapped to be `0`, which acts as an
175/// identity.
176#[derive(Debug)]
177struct IndexMapper {
178 /// The power of 2 corresponding to the stride of the corresponding
179 /// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
180 /// stride2' pre-multiplies an index to an ID.
181 stride2: usize,
182}
183
184impl IndexMapper {
185 /// Convert a state ID to a state index.
186 fn to_index(&self, id: StateID) -> usize {
187 id.as_usize() >> self.stride2
188 }
189
190 /// Convert a state index to a state ID.
191 fn to_state_id(&self, index: usize) -> StateID {
192 // CORRECTNESS: If the given index is not valid, then it is not
193 // required for this to panic or return a valid state ID. We'll "just"
194 // wind up with panics or silent logic errors at some other point. But
195 // this is OK because if Remappable::state_len is correct and so is
196 // 'to_index', then all inputs to 'to_state_id' should be valid indices
197 // and thus transform into valid state IDs.
198 StateID::new_unchecked(index << self.stride2)
199 }
200}
201
202impl Remappable for noncontiguous::NFA {
203 fn state_len(&self) -> usize {
204 noncontiguous::NFA::states(self).len()
205 }
206
207 fn swap_states(&mut self, id1: StateID, id2: StateID) {
208 noncontiguous::NFA::swap_states(self, id1, id2)
209 }
210
211 fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
212 noncontiguous::NFA::remap(self, map)
213 }
214}
215