| 1 | //! The foldhash implementation optimized for speed. |
| 2 | |
| 3 | use core::hash::{BuildHasher, Hasher}; |
| 4 | |
| 5 | use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed}; |
| 6 | use crate::{folded_multiply, hash_bytes_long, hash_bytes_medium, rotate_right, ARBITRARY3}; |
| 7 | |
| 8 | /// A [`Hasher`] instance implementing foldhash, optimized for speed. |
| 9 | /// |
| 10 | /// While you can create one directly with [`FoldHasher::with_seed`], you |
| 11 | /// most likely want to use [`RandomState`], [`SeedableRandomState`] or |
| 12 | /// [`FixedState`] to create [`FoldHasher`]s. |
| 13 | #[derive (Clone)] |
| 14 | pub struct FoldHasher { |
| 15 | accumulator: u64, |
| 16 | sponge: u128, |
| 17 | sponge_len: u8, |
| 18 | fold_seed: u64, |
| 19 | expand_seed: u64, |
| 20 | expand_seed2: u64, |
| 21 | expand_seed3: u64, |
| 22 | } |
| 23 | |
| 24 | impl FoldHasher { |
| 25 | /// Initializes this [`FoldHasher`] with the given per-hasher seed and |
| 26 | /// [`SharedSeed`]. |
| 27 | #[inline ] |
| 28 | pub fn with_seed(per_hasher_seed: u64, shared_seed: &SharedSeed) -> FoldHasher { |
| 29 | FoldHasher { |
| 30 | accumulator: per_hasher_seed, |
| 31 | sponge: 0, |
| 32 | sponge_len: 0, |
| 33 | fold_seed: shared_seed.seeds[0], |
| 34 | expand_seed: shared_seed.seeds[1], |
| 35 | expand_seed2: shared_seed.seeds[2], |
| 36 | expand_seed3: shared_seed.seeds[3], |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | #[inline (always)] |
| 41 | fn write_num<T: Into<u128>>(&mut self, x: T) { |
| 42 | let bits: usize = 8 * core::mem::size_of::<T>(); |
| 43 | if self.sponge_len as usize + bits > 128 { |
| 44 | let lo = self.sponge as u64; |
| 45 | let hi = (self.sponge >> 64) as u64; |
| 46 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); |
| 47 | self.sponge = x.into(); |
| 48 | self.sponge_len = bits as u8; |
| 49 | } else { |
| 50 | self.sponge |= x.into() << self.sponge_len; |
| 51 | self.sponge_len += bits as u8; |
| 52 | } |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | impl Hasher for FoldHasher { |
| 57 | #[inline (always)] |
| 58 | fn write(&mut self, bytes: &[u8]) { |
| 59 | // We perform overlapping reads in the byte hash which could lead to |
| 60 | // trivial length-extension attacks. These should be defeated by |
| 61 | // adding a length-dependent rotation on our unpredictable seed |
| 62 | // which costs only a single cycle (or none if executed with |
| 63 | // instruction-level parallelism). |
| 64 | let len = bytes.len(); |
| 65 | let base_seed = rotate_right(self.accumulator, len as u32); |
| 66 | if len <= 16 { |
| 67 | let mut s0 = base_seed; |
| 68 | let mut s1 = self.expand_seed; |
| 69 | // XOR the input into s0, s1, then multiply and fold. |
| 70 | if len >= 8 { |
| 71 | s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap()); |
| 72 | s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap()); |
| 73 | } else if len >= 4 { |
| 74 | s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64; |
| 75 | s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64; |
| 76 | } else if len > 0 { |
| 77 | let lo = bytes[0]; |
| 78 | let mid = bytes[len / 2]; |
| 79 | let hi = bytes[len - 1]; |
| 80 | s0 ^= lo as u64; |
| 81 | s1 ^= ((hi as u64) << 8) | mid as u64; |
| 82 | } |
| 83 | self.accumulator = folded_multiply(s0, s1); |
| 84 | } else if len < 256 { |
| 85 | self.accumulator = hash_bytes_medium( |
| 86 | bytes, |
| 87 | base_seed, |
| 88 | base_seed.wrapping_add(self.expand_seed), |
| 89 | self.fold_seed, |
| 90 | ); |
| 91 | } else { |
| 92 | self.accumulator = hash_bytes_long( |
| 93 | bytes, |
| 94 | base_seed, |
| 95 | base_seed.wrapping_add(self.expand_seed), |
| 96 | base_seed.wrapping_add(self.expand_seed2), |
| 97 | base_seed.wrapping_add(self.expand_seed3), |
| 98 | self.fold_seed, |
| 99 | ); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | #[inline (always)] |
| 104 | fn write_u8(&mut self, i: u8) { |
| 105 | self.write_num(i); |
| 106 | } |
| 107 | |
| 108 | #[inline (always)] |
| 109 | fn write_u16(&mut self, i: u16) { |
| 110 | self.write_num(i); |
| 111 | } |
| 112 | |
| 113 | #[inline (always)] |
| 114 | fn write_u32(&mut self, i: u32) { |
| 115 | self.write_num(i); |
| 116 | } |
| 117 | |
| 118 | #[inline (always)] |
| 119 | fn write_u64(&mut self, i: u64) { |
| 120 | self.write_num(i); |
| 121 | } |
| 122 | |
| 123 | #[inline (always)] |
| 124 | fn write_u128(&mut self, i: u128) { |
| 125 | let lo = i as u64; |
| 126 | let hi = (i >> 64) as u64; |
| 127 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed); |
| 128 | } |
| 129 | |
| 130 | #[inline (always)] |
| 131 | fn write_usize(&mut self, i: usize) { |
| 132 | // u128 doesn't implement From<usize>. |
| 133 | #[cfg (target_pointer_width = "32" )] |
| 134 | self.write_num(i as u32); |
| 135 | #[cfg (target_pointer_width = "64" )] |
| 136 | self.write_num(i as u64); |
| 137 | } |
| 138 | |
| 139 | #[inline (always)] |
| 140 | fn finish(&self) -> u64 { |
| 141 | if self.sponge_len > 0 { |
| 142 | let lo = self.sponge as u64; |
| 143 | let hi = (self.sponge >> 64) as u64; |
| 144 | folded_multiply(lo ^ self.accumulator, hi ^ self.fold_seed) |
| 145 | } else { |
| 146 | self.accumulator |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized. |
| 152 | #[derive (Copy, Clone, Debug)] |
| 153 | pub struct RandomState { |
| 154 | per_hasher_seed: u64, |
| 155 | global_seed: GlobalSeed, |
| 156 | } |
| 157 | |
| 158 | impl Default for RandomState { |
| 159 | #[inline (always)] |
| 160 | fn default() -> Self { |
| 161 | Self { |
| 162 | per_hasher_seed: gen_per_hasher_seed(), |
| 163 | global_seed: GlobalSeed::new(), |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | impl BuildHasher for RandomState { |
| 169 | type Hasher = FoldHasher; |
| 170 | |
| 171 | #[inline (always)] |
| 172 | fn build_hasher(&self) -> FoldHasher { |
| 173 | FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly |
| 178 | /// initialized by default, but can also be initialized with a specific seed. |
| 179 | /// |
| 180 | /// This can be useful for e.g. testing, but the downside is that this type |
| 181 | /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. |
| 182 | #[derive (Copy, Clone, Debug)] |
| 183 | pub struct SeedableRandomState { |
| 184 | per_hasher_seed: u64, |
| 185 | shared_seed: &'static SharedSeed, |
| 186 | } |
| 187 | |
| 188 | impl Default for SeedableRandomState { |
| 189 | #[inline (always)] |
| 190 | fn default() -> Self { |
| 191 | Self::random() |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | impl SeedableRandomState { |
| 196 | /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. |
| 197 | #[inline (always)] |
| 198 | pub fn random() -> Self { |
| 199 | Self { |
| 200 | per_hasher_seed: gen_per_hasher_seed(), |
| 201 | shared_seed: SharedSeed::global_random(), |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. |
| 206 | #[inline (always)] |
| 207 | pub fn fixed() -> Self { |
| 208 | Self { |
| 209 | per_hasher_seed: ARBITRARY3, |
| 210 | shared_seed: SharedSeed::global_fixed(), |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | /// Generates a [`SeedableRandomState`] with the given per-hasher seed |
| 215 | /// and [`SharedSeed`]. |
| 216 | #[inline (always)] |
| 217 | pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { |
| 218 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
| 219 | Self { |
| 220 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
| 221 | shared_seed, |
| 222 | } |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | impl BuildHasher for SeedableRandomState { |
| 227 | type Hasher = FoldHasher; |
| 228 | |
| 229 | #[inline (always)] |
| 230 | fn build_hasher(&self) -> FoldHasher { |
| 231 | FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed) |
| 232 | } |
| 233 | } |
| 234 | |
| 235 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed. |
| 236 | /// |
| 237 | /// Not recommended unless you absolutely need determinism. |
| 238 | #[derive (Copy, Clone, Debug)] |
| 239 | pub struct FixedState { |
| 240 | per_hasher_seed: u64, |
| 241 | } |
| 242 | |
| 243 | impl FixedState { |
| 244 | /// Creates a [`FixedState`] with the given per-hasher-seed. |
| 245 | #[inline (always)] |
| 246 | pub const fn with_seed(per_hasher_seed: u64) -> Self { |
| 247 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
| 248 | Self { |
| 249 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | impl Default for FixedState { |
| 255 | #[inline (always)] |
| 256 | fn default() -> Self { |
| 257 | Self { |
| 258 | per_hasher_seed: ARBITRARY3, |
| 259 | } |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | impl BuildHasher for FixedState { |
| 264 | type Hasher = FoldHasher; |
| 265 | |
| 266 | #[inline (always)] |
| 267 | fn build_hasher(&self) -> FoldHasher { |
| 268 | FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed()) |
| 269 | } |
| 270 | } |
| 271 | |