remapper.rs source code [crates/regex-automata-0.4.9/src/dfa/remapper.rs]

1	use alloc::vec::Vec;
2
3	use crate::util::primitives::StateID;
4
5	/// Remappable is a tightly coupled abstraction that facilitates remapping
6	/// state identifiers in DFAs.
7	///
8	/// The main idea behind remapping state IDs is that DFAs often need to check
9	/// if a certain state is a "special" state of some kind (like a match state)
10	/// during a search. Since this is extremely perf critical code, we want this
11	/// check to be as fast as possible. Partitioning state IDs into, for example,
12	/// into "non-match" and "match" states means one can tell if a state is a
13	/// match state via a simple comparison of the state ID.
14	///
15	/// The issue is that during the DFA construction process, it's not
16	/// particularly easy to partition the states. Instead, the simplest thing is
17	/// to often just do a pass over all of the states and shuffle them into their
18	/// desired partitionings. To do that, we need a mechanism for swapping states.
19	/// Hence, this abstraction.
20	///
21	/// Normally, for such little code, I would just duplicate it. But this is a
22	/// key optimization and the implementation is a bit subtle. So the abstraction
23	/// is basically a ham-fisted attempt at DRY. The only place we use this is in
24	/// the dense and one-pass DFAs.
25	///
26	/// See also src/dfa/special.rs for a more detailed explanation of how dense
27	/// DFAs are partitioned.
28	pub(super) trait Remappable: core::fmt::Debug {
29	/// Return the total number of states.
30	fn state_len(&self) -> usize;
31	/// Return the power-of-2 exponent that yields the stride. The pertinent
32	/// laws here are, where N=stride2: 2^N=stride and len(alphabet) <= stride.
33	fn stride2(&self) -> usize;
34	/// Swap the states pointed to by the given IDs. The underlying finite
35	/// state machine should be mutated such that all of the transitions in
36	/// `id1` are now in the memory region where the transitions for `id2`
37	/// were, and all of the transitions in `id2` are now in the memory region
38	/// where the transitions for `id1` were.
39	///
40	/// Essentially, this "moves" `id1` to `id2` and `id2` to `id1`.
41	///
42	/// It is expected that, after calling this, the underlying value will be
43	/// left in an inconsistent state, since any other transitions pointing to,
44	/// e.g., `id1` need to be updated to point to `id2`, since that's where
45	/// `id1` moved to.
46	///
47	/// In order to "fix" the underlying inconsistent state, a `Remapper`
48	/// should be used to guarantee that `remap` is called at the appropriate
49	/// time.
50	fn swap_states(&mut self, id1: StateID, id2: StateID);
51	/// This must remap every single state ID in the underlying value according
52	/// to the function given. For example, in a DFA, this should remap every
53	/// transition and every starting state ID.
54	fn remap(&mut self, map: impl Fn(StateID) -> StateID);
55	}
56
57	/// Remapper is an abstraction the manages the remapping of state IDs in a
58	/// finite state machine. This is useful when one wants to shuffle states into
59	/// different positions in the machine.
60	///
61	/// One of the key complexities this manages is the ability to correctly move
62	/// one state multiple times.
63	///
64	/// Once shuffling is complete, `remap` must be called, which will rewrite
65	/// all pertinent transitions to updated state IDs. Neglecting to call `remap`
66	/// will almost certainly result in a corrupt machine.
67	#[derive(Debug)]
68	pub(super) struct Remapper {
69	/// A map from the index of a state to its pre-multiplied identifier.
70	///
71	/// When a state is swapped with another, then their corresponding
72	/// locations in this map are also swapped. Thus, its new position will
73	/// still point to its old pre-multiplied StateID.
74	///
75	/// While there is a bit more to it, this then allows us to rewrite the
76	/// state IDs in a DFA's transition table in a single pass. This is done
77	/// by iterating over every ID in this map, then iterating over each
78	/// transition for the state at that ID and re-mapping the transition from
79	/// `old_id` to `map[dfa.to_index(old_id)]`. That is, we find the position
80	/// in this map where `old_id` started, and set it to where it ended up
81	/// after all swaps have been completed.
82	map: Vec<StateID>,
83	/// A mapper from state index to state ID (and back).
84	idxmap: IndexMapper,
85	}
86
87	impl Remapper {
88	/// Create a new remapper from the given remappable implementation. The
89	/// remapper can then be used to swap states. The remappable value given
90	/// here must the same one given to `swap` and `remap`.
91	pub(super) fn new(r: &impl Remappable) -> Remapper {
92	let idxmap = IndexMapper { stride2: r.stride2() };
93	let map = (`0`..r.state_len()).map(\|i\| idxmap.to_state_id(i)).collect();
94	Remapper { map, idxmap }
95	}
96
97	/// Swap two states. Once this is called, callers must follow through to
98	/// call `remap`, or else it's possible for the underlying remappable
99	/// value to be in a corrupt state.
100	pub(super) fn swap(
101	&mut self,
102	r: &mut impl Remappable,
103	id1: StateID,
104	id2: StateID,
105	) {
106	if id1 == id2 {
107	return;
108	}
109	r.swap_states(id1, id2);
110	self.map.swap(self.idxmap.to_index(id1), self.idxmap.to_index(id2));
111	}
112
113	/// Complete the remapping process by rewriting all state IDs in the
114	/// remappable value according to the swaps performed.
115	pub(super) fn remap(mut self, r: &mut impl Remappable) {
116	// Update the map to account for states that have been swapped
117	// multiple times. For example, if (A, C) and (C, G) are swapped, then
118	// transitions previously pointing to A should now point to G. But if
119	// we don't update our map, they will erroneously be set to C. All we
120	// do is follow the swaps in our map until we see our original state
121	// ID.
122	//
123	// The intuition here is to think about how changes are made to the
124	// map: only through pairwise swaps. That means that starting at any
125	// given state, it is always possible to find the loop back to that
126	// state by following the swaps represented in the map (which might be
127	// 0 swaps).
128	//
129	// We are also careful to clone the map before starting in order to
130	// freeze it. We use the frozen map to find our loops, since we need to
131	// update our map as well. Without freezing it, our updates could break
132	// the loops referenced above and produce incorrect results.
133	let oldmap = self.map.clone();
134	for i in `0`..r.state_len() {
135	let cur_id = self.idxmap.to_state_id(i);
136	let mut new_id = oldmap[i];
137	if cur_id == new_id {
138	continue;
139	}
140	loop {
141	let id = oldmap[self.idxmap.to_index(new_id)];
142	if cur_id == id {
143	self.map[i] = new_id;
144	break;
145	}
146	new_id = id;
147	}
148	}
149	r.remap(\|next\| self.map[self.idxmap.to_index(next)]);
150	}
151	}
152
153	/// A simple type for mapping between state indices and state IDs.
154	///
155	/// The reason why this exists is because state IDs are "premultiplied." That
156	/// is, in order to get to the transitions for a particular state, one need
157	/// only use the state ID as-is, instead of having to multiple it by transition
158	/// table's stride.
159	///
160	/// The downside of this is that it's inconvenient to map between state IDs
161	/// using a dense map, e.g., Vec<StateID>. That's because state IDs look like
162	/// `0`, `0+stride`, `0+2stride`, `0+3stride`, etc., instead of `0`, `1`,
163	/// `2`, `3`, etc.
164	///
165	/// Since our state IDs are premultiplied, we can convert back-and-forth
166	/// between IDs and indices by simply unmultiplying the IDs and multiplying the
167	/// indices.
168	#[derive(Debug)]
169	struct IndexMapper {
170	/// The power of 2 corresponding to the stride of the corresponding
171	/// transition table. 'id >> stride2' de-multiplies an ID while 'index <<
172	/// stride2' pre-multiplies an index to an ID.
173	stride2: usize,
174	}
175
176	impl IndexMapper {
177	/// Convert a state ID to a state index.
178	fn to_index(&self, id: StateID) -> usize {
179	id.as_usize() >> self.stride2
180	}
181
182	/// Convert a state index to a state ID.
183	fn to_state_id(&self, index: usize) -> StateID {
184	// CORRECTNESS: If the given index is not valid, then it is not
185	// required for this to panic or return a valid state ID. We'll "just"
186	// wind up with panics or silent logic errors at some other point.
187	StateID::new_unchecked(index << self.stride2)
188	}
189	}
190
191	#[cfg(feature = "dfa-build")]
192	mod dense {
193	use crate::{dfa::dense::OwnedDFA, util::primitives::StateID};
194
195	use super::Remappable;
196
197	impl Remappable for OwnedDFA {
198	fn state_len(&self) -> usize {
199	OwnedDFA::state_len(self)
200	}
201
202	fn stride2(&self) -> usize {
203	OwnedDFA::stride2(self)
204	}
205
206	fn swap_states(&mut self, id1: StateID, id2: StateID) {
207	OwnedDFA::swap_states(self, id1, id2)
208	}
209
210	fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
211	OwnedDFA::remap(self, map)
212	}
213	}
214	}
215
216	#[cfg(feature = "dfa-onepass")]
217	mod onepass {
218	use crate::{dfa::onepass::DFA, util::primitives::StateID};
219
220	use super::Remappable;
221
222	impl Remappable for DFA {
223	fn state_len(&self) -> usize {
224	DFA::state_len(self)
225	}
226
227	fn stride2(&self) -> usize {
228	// We don't do pre-multiplication for the one-pass DFA, so
229	// returning 0 has the effect of making state IDs and state indices
230	// equivalent.
231	`0`
232	}
233
234	fn swap_states(&mut self, id1: StateID, id2: StateID) {
235	DFA::swap_states(self, id1, id2)
236	}
237
238	fn remap(&mut self, map: impl Fn(StateID) -> StateID) {
239	DFA::remap(self, map)
240	}
241	}
242	}
243