| 1 | use alloc::vec::Vec; |
| 2 | |
| 3 | use crate::{nfa::noncontiguous, util::primitives::StateID}; |
| 4 | |
| 5 | /// Remappable is a tightly coupled abstraction that facilitates remapping |
| 6 | /// state identifiers in DFAs. |
| 7 | /// |
| 8 | /// The main idea behind remapping state IDs is that DFAs often need to check |
| 9 | /// if a certain state is a "special" state of some kind (like a match state) |
| 10 | /// during a search. Since this is extremely perf critical code, we want this |
| 11 | /// check to be as fast as possible. Partitioning state IDs into, for example, |
| 12 | /// into "non-match" and "match" states means one can tell if a state is a |
| 13 | /// match state via a simple comparison of the state ID. |
| 14 | /// |
| 15 | /// The issue is that during the DFA construction process, it's not |
| 16 | /// particularly easy to partition the states. Instead, the simplest thing is |
| 17 | /// to often just do a pass over all of the states and shuffle them into their |
| 18 | /// desired partitionings. To do that, we need a mechanism for swapping states. |
| 19 | /// Hence, this abstraction. |
| 20 | /// |
| 21 | /// Normally, for such little code, I would just duplicate it. But this is a |
| 22 | /// key optimization and the implementation is a bit subtle. So the abstraction |
| 23 | /// is basically a ham-fisted attempt at DRY. The only place we use this is in |
| 24 | /// the dense and one-pass DFAs. |
| 25 | /// |
| 26 | /// See also src/dfa/special.rs for a more detailed explanation of how dense |
| 27 | /// DFAs are partitioned. |
| 28 | pub(crate) trait Remappable: core::fmt::Debug { |
| 29 | /// Return the total number of states. |
| 30 | fn state_len(&self) -> usize; |
| 31 | |
| 32 | /// Swap the states pointed to by the given IDs. The underlying finite |
| 33 | /// state machine should be mutated such that all of the transitions in |
| 34 | /// `id1` are now in the memory region where the transitions for `id2` |
| 35 | /// were, and all of the transitions in `id2` are now in the memory region |
| 36 | /// where the transitions for `id1` were. |
| 37 | /// |
| 38 | /// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`. |
| 39 | /// |
| 40 | /// It is expected that, after calling this, the underlying state machine |
| 41 | /// will be left in an inconsistent state, since any other transitions |
| 42 | /// pointing to, e.g., `id1` need to be updated to point to `id2`, since |
| 43 | /// that's where `id1` moved to. |
| 44 | /// |
| 45 | /// In order to "fix" the underlying inconsistent state, a `Remapper` |
| 46 | /// should be used to guarantee that `remap` is called at the appropriate |
| 47 | /// time. |
| 48 | fn swap_states(&mut self, id1: StateID, id2: StateID); |
| 49 | |
| 50 | /// This must remap every single state ID in the underlying value according |
| 51 | /// to the function given. For example, in a DFA, this should remap every |
| 52 | /// transition and every starting state ID. |
| 53 | fn remap(&mut self, map: impl Fn(StateID) -> StateID); |
| 54 | } |
| 55 | |
| 56 | /// Remapper is an abstraction the manages the remapping of state IDs in a |
| 57 | /// finite state machine. This is useful when one wants to shuffle states into |
| 58 | /// different positions in the machine. |
| 59 | /// |
| 60 | /// One of the key complexities this manages is the ability to correctly move |
| 61 | /// one state multiple times. |
| 62 | /// |
| 63 | /// Once shuffling is complete, `remap` must be called, which will rewrite |
| 64 | /// all pertinent transitions to updated state IDs. Neglecting to call `remap` |
| 65 | /// will almost certainly result in a corrupt machine. |
| 66 | #[derive (Debug)] |
| 67 | pub(crate) struct Remapper { |
| 68 | /// A map from the index of a state to its pre-multiplied identifier. |
| 69 | /// |
| 70 | /// When a state is swapped with another, then their corresponding |
| 71 | /// locations in this map are also swapped. Thus, its new position will |
| 72 | /// still point to its old pre-multiplied StateID. |
| 73 | /// |
| 74 | /// While there is a bit more to it, this then allows us to rewrite the |
| 75 | /// state IDs in a DFA's transition table in a single pass. This is done |
| 76 | /// by iterating over every ID in this map, then iterating over each |
| 77 | /// transition for the state at that ID and re-mapping the transition from |
| 78 | /// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position |
| 79 | /// in this map where `old_id` *started*, and set it to where it ended up |
| 80 | /// after all swaps have been completed. |
| 81 | map: Vec<StateID>, |
| 82 | /// A way to map indices to state IDs (and back). |
| 83 | idx: IndexMapper, |
| 84 | } |
| 85 | |
| 86 | impl Remapper { |
| 87 | /// Create a new remapper from the given remappable implementation. The |
| 88 | /// remapper can then be used to swap states. The remappable value given |
| 89 | /// here must the same one given to `swap` and `remap`. |
| 90 | /// |
| 91 | /// The given stride should be the stride of the transition table expressed |
| 92 | /// as a power of 2. This stride is used to map between state IDs and state |
| 93 | /// indices. If state IDs and state indices are equivalent, then provide |
| 94 | /// a `stride2` of `0`, which acts as an identity. |
| 95 | pub(crate) fn new(r: &impl Remappable, stride2: usize) -> Remapper { |
| 96 | let idx = IndexMapper { stride2 }; |
| 97 | let map = (0..r.state_len()).map(|i| idx.to_state_id(i)).collect(); |
| 98 | Remapper { map, idx } |
| 99 | } |
| 100 | |
| 101 | /// Swap two states. Once this is called, callers must follow through to |
| 102 | /// call `remap`, or else it's possible for the underlying remappable |
| 103 | /// value to be in a corrupt state. |
| 104 | pub(crate) fn swap( |
| 105 | &mut self, |
| 106 | r: &mut impl Remappable, |
| 107 | id1: StateID, |
| 108 | id2: StateID, |
| 109 | ) { |
| 110 | if id1 == id2 { |
| 111 | return; |
| 112 | } |
| 113 | r.swap_states(id1, id2); |
| 114 | self.map.swap(self.idx.to_index(id1), self.idx.to_index(id2)); |
| 115 | } |
| 116 | |
| 117 | /// Complete the remapping process by rewriting all state IDs in the |
| 118 | /// remappable value according to the swaps performed. |
| 119 | pub(crate) fn remap(mut self, r: &mut impl Remappable) { |
| 120 | // Update the map to account for states that have been swapped |
| 121 | // multiple times. For example, if (A, C) and (C, G) are swapped, then |
| 122 | // transitions previously pointing to A should now point to G. But if |
| 123 | // we don't update our map, they will erroneously be set to C. All we |
| 124 | // do is follow the swaps in our map until we see our original state |
| 125 | // ID. |
| 126 | // |
| 127 | // The intuition here is to think about how changes are made to the |
| 128 | // map: only through pairwise swaps. That means that starting at any |
| 129 | // given state, it is always possible to find the loop back to that |
| 130 | // state by following the swaps represented in the map (which might be |
| 131 | // 0 swaps). |
| 132 | // |
| 133 | // We are also careful to clone the map before starting in order to |
| 134 | // freeze it. We use the frozen map to find our loops, since we need to |
| 135 | // update our map as well. Without freezing it, our updates could break |
| 136 | // the loops referenced above and produce incorrect results. |
| 137 | let oldmap = self.map.clone(); |
| 138 | for i in 0..r.state_len() { |
| 139 | let cur_id = self.idx.to_state_id(i); |
| 140 | let mut new_id = oldmap[i]; |
| 141 | if cur_id == new_id { |
| 142 | continue; |
| 143 | } |
| 144 | loop { |
| 145 | let id = oldmap[self.idx.to_index(new_id)]; |
| 146 | if cur_id == id { |
| 147 | self.map[i] = new_id; |
| 148 | break; |
| 149 | } |
| 150 | new_id = id; |
| 151 | } |
| 152 | } |
| 153 | r.remap(|sid| self.map[self.idx.to_index(sid)]); |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | /// A simple type for mapping between state indices and state IDs. |
| 158 | /// |
| 159 | /// The reason why this exists is because state IDs are "premultiplied" in a |
| 160 | /// DFA. That is, in order to get to the transitions for a particular state, |
| 161 | /// one need only use the state ID as-is, instead of having to multiply it by |
| 162 | /// transition table's stride. |
| 163 | /// |
| 164 | /// The downside of this is that it's inconvenient to map between state IDs |
| 165 | /// using a dense map, e.g., Vec<StateID>. That's because state IDs look like |
| 166 | /// `0`, `stride`, `2*stride`, `3*stride`, etc., instead of `0`, `1`, `2`, `3`, |
| 167 | /// etc. |
| 168 | /// |
| 169 | /// Since our state IDs are premultiplied, we can convert back-and-forth |
| 170 | /// between IDs and indices by simply unmultiplying the IDs and multiplying the |
| 171 | /// indices. |
| 172 | /// |
| 173 | /// Note that for a sparse NFA, state IDs and indices are equivalent. In this |
| 174 | /// case, we set the stride of the index mapped to be `0`, which acts as an |
| 175 | /// identity. |
| 176 | #[derive (Debug)] |
| 177 | struct IndexMapper { |
| 178 | /// The power of 2 corresponding to the stride of the corresponding |
| 179 | /// transition table. 'id >> stride2' de-multiplies an ID while 'index << |
| 180 | /// stride2' pre-multiplies an index to an ID. |
| 181 | stride2: usize, |
| 182 | } |
| 183 | |
| 184 | impl IndexMapper { |
| 185 | /// Convert a state ID to a state index. |
| 186 | fn to_index(&self, id: StateID) -> usize { |
| 187 | id.as_usize() >> self.stride2 |
| 188 | } |
| 189 | |
| 190 | /// Convert a state index to a state ID. |
| 191 | fn to_state_id(&self, index: usize) -> StateID { |
| 192 | // CORRECTNESS: If the given index is not valid, then it is not |
| 193 | // required for this to panic or return a valid state ID. We'll "just" |
| 194 | // wind up with panics or silent logic errors at some other point. But |
| 195 | // this is OK because if Remappable::state_len is correct and so is |
| 196 | // 'to_index', then all inputs to 'to_state_id' should be valid indices |
| 197 | // and thus transform into valid state IDs. |
| 198 | StateID::new_unchecked(index << self.stride2) |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | impl Remappable for noncontiguous::NFA { |
| 203 | fn state_len(&self) -> usize { |
| 204 | noncontiguous::NFA::states(self).len() |
| 205 | } |
| 206 | |
| 207 | fn swap_states(&mut self, id1: StateID, id2: StateID) { |
| 208 | noncontiguous::NFA::swap_states(self, id1, id2) |
| 209 | } |
| 210 | |
| 211 | fn remap(&mut self, map: impl Fn(StateID) -> StateID) { |
| 212 | noncontiguous::NFA::remap(self, map) |
| 213 | } |
| 214 | } |
| 215 | |