1use core::fmt::Debug;
2use core::hash::Hash;
3use core::mem::size_of;
4
5use byteorder::{ByteOrder, NativeEndian};
6
7#[cfg(feature = "std")]
8pub use self::std::*;
9
10#[cfg(feature = "std")]
11mod std {
12 use byteorder::ByteOrder;
13 use core::mem::size_of;
14 use error::{Error, Result};
15
16 use super::StateID;
17
18 /// Check that the premultiplication of the given state identifier can
19 /// fit into the representation indicated by `S`. If it cannot, or if it
20 /// overflows `usize` itself, then an error is returned.
21 pub fn premultiply_overflow_error<S: StateID>(
22 last_state: S,
23 alphabet_len: usize,
24 ) -> Result<()> {
25 let requested = match last_state.to_usize().checked_mul(alphabet_len) {
26 Some(requested) => requested,
27 None => return Err(Error::premultiply_overflow(0, 0)),
28 };
29 if requested > S::max_id() {
30 return Err(Error::premultiply_overflow(S::max_id(), requested));
31 }
32 Ok(())
33 }
34
35 /// Allocate the next sequential identifier for a fresh state given
36 /// the previously constructed state identified by `current`. If the
37 /// next sequential identifier would overflow `usize` or the chosen
38 /// representation indicated by `S`, then an error is returned.
39 pub fn next_state_id<S: StateID>(current: S) -> Result<S> {
40 let next = match current.to_usize().checked_add(1) {
41 Some(next) => next,
42 None => return Err(Error::state_id_overflow(::std::usize::MAX)),
43 };
44 if next > S::max_id() {
45 return Err(Error::state_id_overflow(S::max_id()));
46 }
47 Ok(S::from_usize(next))
48 }
49
50 /// Convert the given `usize` to the chosen state identifier
51 /// representation. If the given value cannot fit in the chosen
52 /// representation, then an error is returned.
53 pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> {
54 if value > S::max_id() {
55 Err(Error::state_id_overflow(S::max_id()))
56 } else {
57 Ok(S::from_usize(value))
58 }
59 }
60
61 /// Write the given identifier to the given slice of bytes using the
62 /// specified endianness. The given slice must have length at least
63 /// `size_of::<S>()`.
64 ///
65 /// The given state identifier representation must have size 1, 2, 4 or 8.
66 pub fn write_state_id_bytes<E: ByteOrder, S: StateID>(
67 slice: &mut [u8],
68 id: S,
69 ) {
70 assert!(
71 1 == size_of::<S>()
72 || 2 == size_of::<S>()
73 || 4 == size_of::<S>()
74 || 8 == size_of::<S>()
75 );
76
77 match size_of::<S>() {
78 1 => slice[0] = id.to_usize() as u8,
79 2 => E::write_u16(slice, id.to_usize() as u16),
80 4 => E::write_u32(slice, id.to_usize() as u32),
81 8 => E::write_u64(slice, id.to_usize() as u64),
82 _ => unreachable!(),
83 }
84 }
85}
86
87/// Return the unique identifier for a DFA's dead state in the chosen
88/// representation indicated by `S`.
89pub fn dead_id<S: StateID>() -> S {
90 S::from_usize(0)
91}
92
93/// A trait describing the representation of a DFA's state identifier.
94///
95/// The purpose of this trait is to safely express both the possible state
96/// identifier representations that can be used in a DFA and to convert between
97/// state identifier representations and types that can be used to efficiently
98/// index memory (such as `usize`).
99///
100/// In general, one should not need to implement this trait explicitly. In
101/// particular, this crate provides implementations for `u8`, `u16`, `u32`,
102/// `u64` and `usize`. (`u32` and `u64` are only provided for targets that can
103/// represent all corresponding values in a `usize`.)
104///
105/// # Safety
106///
107/// This trait is unsafe because the correctness of its implementations may be
108/// relied upon by other unsafe code. For example, one possible way to
109/// implement this trait incorrectly would be to return a maximum identifier
110/// in `max_id` that is greater than the real maximum identifier. This will
111/// likely result in wrap-on-overflow semantics in release mode, which can in
112/// turn produce incorrect state identifiers. Those state identifiers may then
113/// in turn access out-of-bounds memory in a DFA's search routine, where bounds
114/// checks are explicitly elided for performance reasons.
115pub unsafe trait StateID:
116 Clone + Copy + Debug + Eq + Hash + PartialEq + PartialOrd + Ord
117{
118 /// Convert from a `usize` to this implementation's representation.
119 ///
120 /// Implementors may assume that `n <= Self::max_id`. That is, implementors
121 /// do not need to check whether `n` can fit inside this implementation's
122 /// representation.
123 fn from_usize(n: usize) -> Self;
124
125 /// Convert this implementation's representation to a `usize`.
126 ///
127 /// Implementors must not return a `usize` value greater than
128 /// `Self::max_id` and must not permit overflow when converting between the
129 /// implementor's representation and `usize`. In general, the preferred
130 /// way for implementors to achieve this is to simply not provide
131 /// implementations of `StateID` that cannot fit into the target platform's
132 /// `usize`.
133 fn to_usize(self) -> usize;
134
135 /// Return the maximum state identifier supported by this representation.
136 ///
137 /// Implementors must return a correct bound. Doing otherwise may result
138 /// in memory unsafety.
139 fn max_id() -> usize;
140
141 /// Read a single state identifier from the given slice of bytes in native
142 /// endian format.
143 ///
144 /// Implementors may assume that the given slice has length at least
145 /// `size_of::<Self>()`.
146 fn read_bytes(slice: &[u8]) -> Self;
147
148 /// Write this state identifier to the given slice of bytes in native
149 /// endian format.
150 ///
151 /// Implementors may assume that the given slice has length at least
152 /// `size_of::<Self>()`.
153 fn write_bytes(self, slice: &mut [u8]);
154}
155
156unsafe impl StateID for usize {
157 #[inline]
158 fn from_usize(n: usize) -> usize {
159 n
160 }
161
162 #[inline]
163 fn to_usize(self) -> usize {
164 self
165 }
166
167 #[inline]
168 fn max_id() -> usize {
169 ::core::usize::MAX
170 }
171
172 #[inline]
173 fn read_bytes(slice: &[u8]) -> Self {
174 NativeEndian::read_uint(slice, size_of::<usize>()) as usize
175 }
176
177 #[inline]
178 fn write_bytes(self, slice: &mut [u8]) {
179 NativeEndian::write_uint(slice, self as u64, size_of::<usize>())
180 }
181}
182
183unsafe impl StateID for u8 {
184 #[inline]
185 fn from_usize(n: usize) -> u8 {
186 n as u8
187 }
188
189 #[inline]
190 fn to_usize(self) -> usize {
191 self as usize
192 }
193
194 #[inline]
195 fn max_id() -> usize {
196 ::core::u8::MAX as usize
197 }
198
199 #[inline]
200 fn read_bytes(slice: &[u8]) -> Self {
201 slice[0]
202 }
203
204 #[inline]
205 fn write_bytes(self, slice: &mut [u8]) {
206 slice[0] = self;
207 }
208}
209
210unsafe impl StateID for u16 {
211 #[inline]
212 fn from_usize(n: usize) -> u16 {
213 n as u16
214 }
215
216 #[inline]
217 fn to_usize(self) -> usize {
218 self as usize
219 }
220
221 #[inline]
222 fn max_id() -> usize {
223 ::core::u16::MAX as usize
224 }
225
226 #[inline]
227 fn read_bytes(slice: &[u8]) -> Self {
228 NativeEndian::read_u16(slice)
229 }
230
231 #[inline]
232 fn write_bytes(self, slice: &mut [u8]) {
233 NativeEndian::write_u16(slice, self)
234 }
235}
236
237#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
238unsafe impl StateID for u32 {
239 #[inline]
240 fn from_usize(n: usize) -> u32 {
241 n as u32
242 }
243
244 #[inline]
245 fn to_usize(self) -> usize {
246 self as usize
247 }
248
249 #[inline]
250 fn max_id() -> usize {
251 ::core::u32::MAX as usize
252 }
253
254 #[inline]
255 fn read_bytes(slice: &[u8]) -> Self {
256 NativeEndian::read_u32(slice)
257 }
258
259 #[inline]
260 fn write_bytes(self, slice: &mut [u8]) {
261 NativeEndian::write_u32(slice, self)
262 }
263}
264
265#[cfg(target_pointer_width = "64")]
266unsafe impl StateID for u64 {
267 #[inline]
268 fn from_usize(n: usize) -> u64 {
269 n as u64
270 }
271
272 #[inline]
273 fn to_usize(self) -> usize {
274 self as usize
275 }
276
277 #[inline]
278 fn max_id() -> usize {
279 ::core::u64::MAX as usize
280 }
281
282 #[inline]
283 fn read_bytes(slice: &[u8]) -> Self {
284 NativeEndian::read_u64(slice)
285 }
286
287 #[inline]
288 fn write_bytes(self, slice: &mut [u8]) {
289 NativeEndian::write_u64(slice, self)
290 }
291}
292