1 | //! The foldhash implementation optimized for speed. |
2 | |
3 | use core::hash::{BuildHasher, Hasher}; |
4 | |
5 | use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed}; |
6 | use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3}; |
7 | |
8 | /// A [`Hasher`] instance implementing foldhash, optimized for speed. |
9 | /// |
10 | /// While you can create one directly with [`FoldHasher::with_seed`], you |
11 | /// most likely want to use [`RandomState`], [`SeedableRandomState`] or |
12 | /// [`FixedState`] to create [`FoldHasher`]s. |
13 | #[derive (Clone)] |
14 | pub struct FoldHasher { |
15 | accumulator: u64, |
16 | sponge: u128, |
17 | sponge_len: u8, |
18 | fold_seed: u64, |
19 | expand_seed: u64, |
20 | expand_seed2: u64, |
21 | expand_seed3: u64, |
22 | } |
23 | |
24 | impl FoldHasher { |
25 | /// Initializes this [`FoldHasher`] with the given per-hasher seed and |
26 | /// [`SharedSeed`]. |
27 | #[inline ] |
28 | pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher { |
29 | FoldHasher { |
30 | accumulator: per_hasher_seed, |
31 | sponge: 0, |
32 | sponge_len: 0, |
33 | fold_seed: shared_seed.seeds[0], |
34 | expand_seed: shared_seed.seeds[1], |
35 | expand_seed2: shared_seed.seeds[2], |
36 | expand_seed3: shared_seed.seeds[3], |
37 | } |
38 | } |
39 | |
40 | #[inline (always)] |
41 | fn write_num<T: Into<u128>>(&mut self, x: T) { |
42 | let bits: usize = 8 * core::mem::size_of::<T>(); |
43 | if self.sponge_len as usize + bits > 128 { |
44 | let lo = self.sponge as u64; |
45 | let hi = (self.sponge >> 64) as u64; |
46 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); |
47 | self.sponge = x.into(); |
48 | self.sponge_len = bits as u8; |
49 | } else { |
50 | self.sponge |= x.into() << self.sponge_len; |
51 | self.sponge_len += bits as u8; |
52 | } |
53 | } |
54 | } |
55 | |
56 | impl Hasher for FoldHasher { |
57 | #[inline (always)] |
58 | fn write(&mut self, bytes: &[u8]) { |
59 | // We perform overlapping reads in the byte hash which could lead to |
60 | // trivial length-extension attacks. These should be defeated by |
61 | // adding a length-dependent rotation on our unpredictable seed |
62 | // which costs only a single cycle (or none if executed with |
63 | // instruction-level parallelism). |
64 | let len = bytes.len(); |
65 | let base_seed = rotate_right(self.accumulator, len as u32); |
66 | if len <= 16 { |
67 | let mut s0 = base_seed; |
68 | let mut s1 = self.expand_seed; |
69 | // XOR the input into s0, s1, then multiply and fold. |
70 | if len >= 8 { |
71 | s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap()); |
72 | s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap()); |
73 | } else if len >= 4 { |
74 | s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64; |
75 | s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64; |
76 | } else if len > 0 { |
77 | let lo = bytes[0]; |
78 | let mid = bytes[len / 2]; |
79 | let hi = bytes[len - 1]; |
80 | s0 ^= lo as u64; |
81 | s1 ^= ((hi as u64) << 8) | mid as u64; |
82 | } |
83 | self.accumulator = folded_multiply(s0, s1); |
84 | } else if len < 256 { |
85 | self.accumulator = hash_bytes_medium( |
86 | bytes, |
87 | base_seed, |
88 | base_seed.wrapping_add(self.expand_seed), |
89 | self.fold_seed, |
90 | ); |
91 | } else { |
92 | self.accumulator = hash_bytes_long( |
93 | bytes, |
94 | base_seed, |
95 | base_seed.wrapping_add(self.expand_seed), |
96 | base_seed.wrapping_add(self.expand_seed2), |
97 | base_seed.wrapping_add(self.expand_seed3), |
98 | self.fold_seed, |
99 | ); |
100 | } |
101 | } |
102 | |
103 | #[inline (always)] |
104 | fn write_u8(&mut self, i: u8) { |
105 | self.write_num(i); |
106 | } |
107 | |
108 | #[inline (always)] |
109 | fn write_u16(&mut self, i: u16) { |
110 | self.write_num(i); |
111 | } |
112 | |
113 | #[inline (always)] |
114 | fn write_u32(&mut self, i: u32) { |
115 | self.write_num(i); |
116 | } |
117 | |
118 | #[inline (always)] |
119 | fn write_u64(&mut self, i: u64) { |
120 | self.write_num(i); |
121 | } |
122 | |
123 | #[inline (always)] |
124 | fn write_u128(&mut self, i: u128) { |
125 | let lo = i as u64; |
126 | let hi = (i >> 64) as u64; |
127 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); |
128 | } |
129 | |
130 | #[inline (always)] |
131 | fn write_usize(&mut self, i: usize) { |
132 | // u128 doesn't implement From<usize>. |
133 | #[cfg (target_pointer_width = "32" )] |
134 | self.write_num(i as u32); |
135 | #[cfg (target_pointer_width = "64" )] |
136 | self.write_num(i as u64); |
137 | } |
138 | |
139 | #[inline (always)] |
140 | fn finish(&self) -> u64 { |
141 | if self.sponge_len > 0 { |
142 | let lo = self.sponge as u64; |
143 | let hi = (self.sponge >> 64) as u64; |
144 | folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed) |
145 | } else { |
146 | self.accumulator |
147 | } |
148 | } |
149 | } |
150 | |
151 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized. |
152 | #[derive (Copy, Clone, Debug)] |
153 | pub struct RandomState { |
154 | per_hasher_seed: u64, |
155 | global_seed: GlobalSeed, |
156 | } |
157 | |
158 | impl Default for RandomState { |
159 | #[inline (always)] |
160 | fn default() -> Self { |
161 | Self { |
162 | per_hasher_seed: gen_per_hasher_seed(), |
163 | global_seed: GlobalSeed::new(), |
164 | } |
165 | } |
166 | } |
167 | |
168 | impl BuildHasher for RandomState { |
169 | type Hasher = FoldHasher; |
170 | |
171 | #[inline (always)] |
172 | fn build_hasher(&self) -> FoldHasher { |
173 | FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) |
174 | } |
175 | } |
176 | |
177 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly |
178 | /// initialized by default, but can also be initialized with a specific seed. |
179 | /// |
180 | /// This can be useful for e.g. testing, but the downside is that this type |
181 | /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. |
182 | #[derive (Copy, Clone, Debug)] |
183 | pub struct SeedableRandomState { |
184 | per_hasher_seed: u64, |
185 | shared_seed: &'static SharedSeed, |
186 | } |
187 | |
188 | impl Default for SeedableRandomState { |
189 | #[inline (always)] |
190 | fn default() -> Self { |
191 | Self::random() |
192 | } |
193 | } |
194 | |
195 | impl SeedableRandomState { |
196 | /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. |
197 | #[inline (always)] |
198 | pub fn random() -> Self { |
199 | Self { |
200 | per_hasher_seed: gen_per_hasher_seed(), |
201 | shared_seed: SharedSeed::global_random(), |
202 | } |
203 | } |
204 | |
205 | /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. |
206 | #[inline (always)] |
207 | pub fn fixed() -> Self { |
208 | Self { |
209 | per_hasher_seed: ARBITRARY3, |
210 | shared_seed: SharedSeed::global_fixed(), |
211 | } |
212 | } |
213 | |
214 | /// Generates a [`SeedableRandomState`] with the given per-hasher seed |
215 | /// and [`SharedSeed`]. |
216 | #[inline (always)] |
217 | pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { |
218 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
219 | Self { |
220 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
221 | shared_seed, |
222 | } |
223 | } |
224 | } |
225 | |
226 | impl BuildHasher for SeedableRandomState { |
227 | type Hasher = FoldHasher; |
228 | |
229 | #[inline (always)] |
230 | fn build_hasher(&self) -> FoldHasher { |
231 | FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed) |
232 | } |
233 | } |
234 | |
235 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed. |
236 | /// |
237 | /// Not recommended unless you absolutely need determinism. |
238 | #[derive (Copy, Clone, Debug)] |
239 | pub struct FixedState { |
240 | per_hasher_seed: u64, |
241 | } |
242 | |
243 | impl FixedState { |
244 | /// Creates a [`FixedState`] with the given per-hasher-seed. |
245 | #[inline (always)] |
246 | pub const fn with_seed(per_hasher_seed: u64) -> Self { |
247 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
248 | Self { |
249 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
250 | } |
251 | } |
252 | } |
253 | |
254 | impl Default for FixedState { |
255 | #[inline (always)] |
256 | fn default() -> Self { |
257 | Self { |
258 | per_hasher_seed: ARBITRARY3, |
259 | } |
260 | } |
261 | } |
262 | |
263 | impl BuildHasher for FixedState { |
264 | type Hasher = FoldHasher; |
265 | |
266 | #[inline (always)] |
267 | fn build_hasher(&self) -> FoldHasher { |
268 | FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed()) |
269 | } |
270 | } |
271 | |