state_id.rs source code [crates/regex-automata-0.1.10/src/state_id.rs]

1	use core::fmt::Debug;
2	use core::hash::Hash;
3	use core::mem::size_of;
4
5	use byteorder::{ByteOrder, NativeEndian};
6
7	#[cfg(feature = "std")]
8	pub use self::std::*;
9
10	#[cfg(feature = "std")]
11	mod std {
12	use byteorder::ByteOrder;
13	use core::mem::size_of;
14	use error::{Error, Result};
15
16	use super::StateID;
17
18	/// Check that the premultiplication of the given state identifier can
19	/// fit into the representation indicated by `S`. If it cannot, or if it
20	/// overflows `usize` itself, then an error is returned.
21	pub fn premultiply_overflow_error<S: StateID>(
22	last_state: S,
23	alphabet_len: usize,
24	) -> Result<()> {
25	let requested = match last_state.to_usize().checked_mul(alphabet_len) {
26	Some(requested) => requested,
27	None => return Err(Error::premultiply_overflow(`0`, `0`)),
28	};
29	if requested > S::max_id() {
30	return Err(Error::premultiply_overflow(S::max_id(), requested));
31	}
32	Ok(())
33	}
34
35	/// Allocate the next sequential identifier for a fresh state given
36	/// the previously constructed state identified by `current`. If the
37	/// next sequential identifier would overflow `usize` or the chosen
38	/// representation indicated by `S`, then an error is returned.
39	pub fn next_state_id<S: StateID>(current: S) -> Result<S> {
40	let next = match current.to_usize().checked_add(`1`) {
41	Some(next) => next,
42	None => return Err(Error::state_id_overflow(::std::usize::MAX)),
43	};
44	if next > S::max_id() {
45	return Err(Error::state_id_overflow(S::max_id()));
46	}
47	Ok(S::from_usize(next))
48	}
49
50	/// Convert the given `usize` to the chosen state identifier
51	/// representation. If the given value cannot fit in the chosen
52	/// representation, then an error is returned.
53	pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
54	if value > S::max_id() {
55	Err(Error::state_id_overflow(S::max_id()))
56	} else {
57	Ok(S::from_usize(value))
58	}
59	}
60
61	/// Write the given identifier to the given slice of bytes using the
62	/// specified endianness. The given slice must have length at least
63	/// `size_of::<S>()`.
64	///
65	/// The given state identifier representation must have size 1, 2, 4 or 8.
66	pub fn write_state_id_bytes<E: ByteOrder, S: StateID>(
67	slice: &mut [u8],
68	id: S,
69	) {
70	assert!(
71	`1` == size_of::<S>()
72	\|\| `2` == size_of::<S>()
73	\|\| `4` == size_of::<S>()
74	\|\| `8` == size_of::<S>()
75	);
76
77	match size_of::<S>() {
78	`1` => slice[`0`] = id.to_usize() as u8,
79	`2` => E::write_u16(slice, id.to_usize() as u16),
80	`4` => E::write_u32(slice, id.to_usize() as u32),
81	`8` => E::write_u64(slice, id.to_usize() as u64),
82	_ => unreachable!(),
83	}
84	}
85	}
86
87	/// Return the unique identifier for a DFA's dead state in the chosen
88	/// representation indicated by `S`.
89	pub fn dead_id<S: StateID>() -> S {
90	S::from_usize(`0`)
91	}
92
93	/// A trait describing the representation of a DFA's state identifier.
94	///
95	/// The purpose of this trait is to safely express both the possible state
96	/// identifier representations that can be used in a DFA and to convert between
97	/// state identifier representations and types that can be used to efficiently
98	/// index memory (such as `usize`).
99	///
100	/// In general, one should not need to implement this trait explicitly. In
101	/// particular, this crate provides implementations for `u8`, `u16`, `u32`,
102	/// `u64` and `usize`. (`u32` and `u64` are only provided for targets that can
103	/// represent all corresponding values in a `usize`.)
104	///
105	/// # Safety
106	///
107	/// This trait is unsafe because the correctness of its implementations may be
108	/// relied upon by other unsafe code. For example, one possible way to
109	/// implement this trait incorrectly would be to return a maximum identifier
110	/// in `max_id` that is greater than the real maximum identifier. This will
111	/// likely result in wrap-on-overflow semantics in release mode, which can in
112	/// turn produce incorrect state identifiers. Those state identifiers may then
113	/// in turn access out-of-bounds memory in a DFA's search routine, where bounds
114	/// checks are explicitly elided for performance reasons.
115	pub unsafe trait StateID:
116	Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
117	{
118	/// Convert from a `usize` to this implementation's representation.
119	///
120	/// Implementors may assume that `n <= Self::max_id`. That is, implementors
121	/// do not need to check whether `n` can fit inside this implementation's
122	/// representation.
123	fn from_usize(n: usize) -> Self;
124
125	/// Convert this implementation's representation to a `usize`.
126	///
127	/// Implementors must not return a `usize` value greater than
128	/// `Self::max_id` and must not permit overflow when converting between the
129	/// implementor's representation and `usize`. In general, the preferred
130	/// way for implementors to achieve this is to simply not provide
131	/// implementations of `StateID` that cannot fit into the target platform's
132	/// `usize`.
133	fn to_usize(self) -> usize;
134
135	/// Return the maximum state identifier supported by this representation.
136	///
137	/// Implementors must return a correct bound. Doing otherwise may result
138	/// in memory unsafety.
139	fn max_id() -> usize;
140
141	/// Read a single state identifier from the given slice of bytes in native
142	/// endian format.
143	///
144	/// Implementors may assume that the given slice has length at least
145	/// `size_of::<Self>()`.
146	fn read_bytes(slice: &[u8]) -> Self;
147
148	/// Write this state identifier to the given slice of bytes in native
149	/// endian format.
150	///
151	/// Implementors may assume that the given slice has length at least
152	/// `size_of::<Self>()`.
153	fn write_bytes(self, slice: &mut [u8]);
154	}
155
156	unsafe impl StateID for usize {
157	#[inline]
158	fn from_usize(n: usize) -> usize {
159	n
160	}
161
162	#[inline]
163	fn to_usize(self) -> usize {
164	self
165	}
166
167	#[inline]
168	fn max_id() -> usize {
169	::core::usize::MAX
170	}
171
172	#[inline]
173	fn read_bytes(slice: &[u8]) -> Self {
174	NativeEndian::read_uint(slice, size_of::<usize>()) as usize
175	}
176
177	#[inline]
178	fn write_bytes(self, slice: &mut [u8]) {
179	NativeEndian::write_uint(slice, self as u64, size_of::<usize>())
180	}
181	}
182
183	unsafe impl StateID for u8 {
184	#[inline]
185	fn from_usize(n: usize) -> u8 {
186	n as u8
187	}
188
189	#[inline]
190	fn to_usize(self) -> usize {
191	self as usize
192	}
193
194	#[inline]
195	fn max_id() -> usize {
196	::core::u8::MAX as usize
197	}
198
199	#[inline]
200	fn read_bytes(slice: &[u8]) -> Self {
201	slice[`0`]
202	}
203
204	#[inline]
205	fn write_bytes(self, slice: &mut [u8]) {
206	slice[`0`] = self;
207	}
208	}
209
210	unsafe impl StateID for u16 {
211	#[inline]
212	fn from_usize(n: usize) -> u16 {
213	n as u16
214	}
215
216	#[inline]
217	fn to_usize(self) -> usize {
218	self as usize
219	}
220
221	#[inline]
222	fn max_id() -> usize {
223	::core::u16::MAX as usize
224	}
225
226	#[inline]
227	fn read_bytes(slice: &[u8]) -> Self {
228	NativeEndian::read_u16(slice)
229	}
230
231	#[inline]
232	fn write_bytes(self, slice: &mut [u8]) {
233	NativeEndian::write_u16(slice, self)
234	}
235	}
236
237	#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
238	unsafe impl StateID for u32 {
239	#[inline]
240	fn from_usize(n: usize) -> u32 {
241	n as u32
242	}
243
244	#[inline]
245	fn to_usize(self) -> usize {
246	self as usize
247	}
248
249	#[inline]
250	fn max_id() -> usize {
251	::core::u32::MAX as usize
252	}
253
254	#[inline]
255	fn read_bytes(slice: &[u8]) -> Self {
256	NativeEndian::read_u32(slice)
257	}
258
259	#[inline]
260	fn write_bytes(self, slice: &mut [u8]) {
261	NativeEndian::write_u32(slice, self)
262	}
263	}
264
265	#[cfg(target_pointer_width = "64")]
266	unsafe impl StateID for u64 {
267	#[inline]
268	fn from_usize(n: usize) -> u64 {
269	n as u64
270	}
271
272	#[inline]
273	fn to_usize(self) -> usize {
274	self as usize
275	}
276
277	#[inline]
278	fn max_id() -> usize {
279	::core::u64::MAX as usize
280	}
281
282	#[inline]
283	fn read_bytes(slice: &[u8]) -> Self {
284	NativeEndian::read_u64(slice)
285	}
286
287	#[inline]
288	fn write_bytes(self, slice: &mut [u8]) {
289	NativeEndian::write_u64(slice, self)
290	}
291	}
292