1 | use std::fmt::Debug; |
2 | use std::hash::Hash; |
3 | |
4 | use crate::error::{Error, Result}; |
5 | |
6 | // NOTE: Most of this code was copied from regex-automata, but without the |
7 | // (de)serialization specific stuff. |
8 | |
9 | /// Check that the premultiplication of the given state identifier can |
10 | /// fit into the representation indicated by `S`. If it cannot, or if it |
11 | /// overflows `usize` itself, then an error is returned. |
12 | pub fn premultiply_overflow_error<S: StateID>( |
13 | last_state: S, |
14 | alphabet_len: usize, |
15 | ) -> Result<()> { |
16 | let requested: usize = match last_state.to_usize().checked_mul(alphabet_len) { |
17 | Some(requested: usize) => requested, |
18 | None => return Err(Error::premultiply_overflow(max:0, requested_max:0)), |
19 | }; |
20 | if requested > S::max_id() { |
21 | return Err(Error::premultiply_overflow(S::max_id(), requested_max:requested)); |
22 | } |
23 | Ok(()) |
24 | } |
25 | |
26 | /// Convert the given `usize` to the chosen state identifier |
27 | /// representation. If the given value cannot fit in the chosen |
28 | /// representation, then an error is returned. |
29 | pub fn usize_to_state_id<S: StateID>(value: usize) -> Result<S> { |
30 | if value > S::max_id() { |
31 | Err(Error::state_id_overflow(S::max_id())) |
32 | } else { |
33 | Ok(S::from_usize(value)) |
34 | } |
35 | } |
36 | |
37 | /// Return the unique identifier for an automaton's fail state in the chosen |
38 | /// representation indicated by `S`. |
39 | pub fn fail_id<S: StateID>() -> S { |
40 | S::from_usize(0) |
41 | } |
42 | |
43 | /// Return the unique identifier for an automaton's fail state in the chosen |
44 | /// representation indicated by `S`. |
45 | pub fn dead_id<S: StateID>() -> S { |
46 | S::from_usize(1) |
47 | } |
48 | |
49 | mod private { |
50 | /// Sealed stops crates other than aho-corasick from implementing any |
51 | /// traits that use it. |
52 | pub trait Sealed {} |
53 | impl Sealed for u8 {} |
54 | impl Sealed for u16 {} |
55 | impl Sealed for u32 {} |
56 | impl Sealed for u64 {} |
57 | impl Sealed for usize {} |
58 | } |
59 | |
60 | /// A trait describing the representation of an automaton's state identifier. |
61 | /// |
62 | /// The purpose of this trait is to safely express both the possible state |
63 | /// identifier representations that can be used in an automaton and to convert |
64 | /// between state identifier representations and types that can be used to |
65 | /// efficiently index memory (such as `usize`). |
66 | /// |
67 | /// In general, one should not need to implement this trait explicitly. Indeed, |
68 | /// for now, this trait is sealed such that it cannot be implemented by any |
69 | /// other type. In particular, this crate provides implementations for `u8`, |
70 | /// `u16`, `u32`, `u64` and `usize`. (`u32` and `u64` are only provided for |
71 | /// targets that can represent all corresponding values in a `usize`.) |
72 | pub trait StateID: |
73 | private::Sealed |
74 | + Clone |
75 | + Copy |
76 | + Debug |
77 | + Eq |
78 | + Hash |
79 | + PartialEq |
80 | + PartialOrd |
81 | + Ord |
82 | { |
83 | /// Convert from a `usize` to this implementation's representation. |
84 | /// |
85 | /// Implementors may assume that `n <= Self::max_id`. That is, implementors |
86 | /// do not need to check whether `n` can fit inside this implementation's |
87 | /// representation. |
88 | fn from_usize(n: usize) -> Self; |
89 | |
90 | /// Convert this implementation's representation to a `usize`. |
91 | /// |
92 | /// Implementors must not return a `usize` value greater than |
93 | /// `Self::max_id` and must not permit overflow when converting between the |
94 | /// implementor's representation and `usize`. In general, the preferred |
95 | /// way for implementors to achieve this is to simply not provide |
96 | /// implementations of `StateID` that cannot fit into the target platform's |
97 | /// `usize`. |
98 | fn to_usize(self) -> usize; |
99 | |
100 | /// Return the maximum state identifier supported by this representation. |
101 | /// |
102 | /// Implementors must return a correct bound. Doing otherwise may result |
103 | /// in unspecified behavior (but will not violate memory safety). |
104 | fn max_id() -> usize; |
105 | } |
106 | |
107 | impl StateID for usize { |
108 | #[inline ] |
109 | fn from_usize(n: usize) -> usize { |
110 | n |
111 | } |
112 | |
113 | #[inline ] |
114 | fn to_usize(self) -> usize { |
115 | self |
116 | } |
117 | |
118 | #[inline ] |
119 | fn max_id() -> usize { |
120 | ::std::usize::MAX |
121 | } |
122 | } |
123 | |
124 | impl StateID for u8 { |
125 | #[inline ] |
126 | fn from_usize(n: usize) -> u8 { |
127 | n as u8 |
128 | } |
129 | |
130 | #[inline ] |
131 | fn to_usize(self) -> usize { |
132 | self as usize |
133 | } |
134 | |
135 | #[inline ] |
136 | fn max_id() -> usize { |
137 | ::std::u8::MAX as usize |
138 | } |
139 | } |
140 | |
141 | impl StateID for u16 { |
142 | #[inline ] |
143 | fn from_usize(n: usize) -> u16 { |
144 | n as u16 |
145 | } |
146 | |
147 | #[inline ] |
148 | fn to_usize(self) -> usize { |
149 | self as usize |
150 | } |
151 | |
152 | #[inline ] |
153 | fn max_id() -> usize { |
154 | ::std::u16::MAX as usize |
155 | } |
156 | } |
157 | |
158 | #[cfg (any(target_pointer_width = "32" , target_pointer_width = "64" ))] |
159 | impl StateID for u32 { |
160 | #[inline ] |
161 | fn from_usize(n: usize) -> u32 { |
162 | n as u32 |
163 | } |
164 | |
165 | #[inline ] |
166 | fn to_usize(self) -> usize { |
167 | self as usize |
168 | } |
169 | |
170 | #[inline ] |
171 | fn max_id() -> usize { |
172 | ::std::u32::MAX as usize |
173 | } |
174 | } |
175 | |
176 | #[cfg (target_pointer_width = "64" )] |
177 | impl StateID for u64 { |
178 | #[inline ] |
179 | fn from_usize(n: usize) -> u64 { |
180 | n as u64 |
181 | } |
182 | |
183 | #[inline ] |
184 | fn to_usize(self) -> usize { |
185 | self as usize |
186 | } |
187 | |
188 | #[inline ] |
189 | fn max_id() -> usize { |
190 | ::std::u64::MAX as usize |
191 | } |
192 | } |
193 | |