| 1 | //! The foldhash implementation optimized for speed. |
| 2 | |
| 3 | use core::hash::{BuildHasher, Hasher}; |
| 4 | |
| 5 | use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed}; |
| 6 | use crate::{folded_multiply, hash_bytes_long, hash_bytes_short, rotate_right, ARBITRARY3}; |
| 7 | |
| 8 | /// A [`Hasher`] instance implementing foldhash, optimized for speed. |
| 9 | /// |
| 10 | /// While you can create one directly with [`FoldHasher::with_seed`], you |
| 11 | /// most likely want to use [`RandomState`], [`SeedableRandomState`] or |
| 12 | /// [`FixedState`] to create [`FoldHasher`]s. |
| 13 | #[derive(Clone)] |
| 14 | pub struct FoldHasher<'a> { |
| 15 | accumulator: u64, |
| 16 | sponge: u128, |
| 17 | sponge_len: u8, |
| 18 | seeds: &'a [u64; 6], |
| 19 | } |
| 20 | |
| 21 | impl<'a> FoldHasher<'a> { |
| 22 | /// Initializes this [`FoldHasher`] with the given per-hasher seed and |
| 23 | /// [`SharedSeed`]. |
| 24 | #[inline ] |
| 25 | pub const fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> { |
| 26 | FoldHasher { |
| 27 | accumulator: per_hasher_seed, |
| 28 | sponge: 0, |
| 29 | sponge_len: 0, |
| 30 | seeds: &shared_seed.seeds, |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | #[inline (always)] |
| 35 | fn write_num<T: Into<u128>>(&mut self, x: T) { |
| 36 | let bits: usize = 8 * core::mem::size_of::<T>(); |
| 37 | if self.sponge_len as usize + bits > 128 { |
| 38 | let lo = self.sponge as u64; |
| 39 | let hi = (self.sponge >> 64) as u64; |
| 40 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]); |
| 41 | self.sponge = x.into(); |
| 42 | self.sponge_len = bits as u8; |
| 43 | } else { |
| 44 | self.sponge |= x.into() << self.sponge_len; |
| 45 | self.sponge_len += bits as u8; |
| 46 | } |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | impl<'a> Hasher for FoldHasher<'a> { |
| 51 | #[inline (always)] |
| 52 | fn write(&mut self, bytes: &[u8]) { |
| 53 | // We perform overlapping reads in the byte hash which could lead to |
| 54 | // trivial length-extension attacks. These should be defeated by |
| 55 | // adding a length-dependent rotation on our unpredictable seed |
| 56 | // which costs only a single cycle (or none if executed with |
| 57 | // instruction-level parallelism). |
| 58 | let len = bytes.len(); |
| 59 | self.accumulator = rotate_right(self.accumulator, len as u32); |
| 60 | if len <= 16 { |
| 61 | self.accumulator = hash_bytes_short(bytes, self.accumulator, self.seeds); |
| 62 | } else { |
| 63 | unsafe { |
| 64 | // SAFETY: we checked that the length is > 16 bytes. |
| 65 | self.accumulator = hash_bytes_long(bytes, self.accumulator, self.seeds); |
| 66 | } |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | #[inline (always)] |
| 71 | fn write_u8(&mut self, i: u8) { |
| 72 | self.write_num(i); |
| 73 | } |
| 74 | |
| 75 | #[inline (always)] |
| 76 | fn write_u16(&mut self, i: u16) { |
| 77 | self.write_num(i); |
| 78 | } |
| 79 | |
| 80 | #[inline (always)] |
| 81 | fn write_u32(&mut self, i: u32) { |
| 82 | self.write_num(i); |
| 83 | } |
| 84 | |
| 85 | #[inline (always)] |
| 86 | fn write_u64(&mut self, i: u64) { |
| 87 | self.write_num(i); |
| 88 | } |
| 89 | |
| 90 | #[inline (always)] |
| 91 | fn write_u128(&mut self, i: u128) { |
| 92 | let lo = i as u64; |
| 93 | let hi = (i >> 64) as u64; |
| 94 | self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]); |
| 95 | } |
| 96 | |
| 97 | #[inline (always)] |
| 98 | fn write_usize(&mut self, i: usize) { |
| 99 | // u128 doesn't implement From<usize>. |
| 100 | #[cfg (target_pointer_width = "32" )] |
| 101 | self.write_num(i as u32); |
| 102 | #[cfg (target_pointer_width = "64" )] |
| 103 | self.write_num(i as u64); |
| 104 | } |
| 105 | |
| 106 | #[cfg (feature = "nightly" )] |
| 107 | #[inline (always)] |
| 108 | fn write_str(&mut self, s: &str) { |
| 109 | // Our write function already handles length differences. |
| 110 | self.write(s.as_bytes()) |
| 111 | } |
| 112 | |
| 113 | #[inline (always)] |
| 114 | fn finish(&self) -> u64 { |
| 115 | if self.sponge_len > 0 { |
| 116 | let lo = self.sponge as u64; |
| 117 | let hi = (self.sponge >> 64) as u64; |
| 118 | folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]) |
| 119 | } else { |
| 120 | self.accumulator |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized. |
| 126 | #[derive(Clone, Debug)] |
| 127 | pub struct RandomState { |
| 128 | per_hasher_seed: u64, |
| 129 | global_seed: GlobalSeed, |
| 130 | } |
| 131 | |
| 132 | impl Default for RandomState { |
| 133 | #[inline (always)] |
| 134 | fn default() -> Self { |
| 135 | Self { |
| 136 | per_hasher_seed: gen_per_hasher_seed(), |
| 137 | global_seed: GlobalSeed::new(), |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | impl BuildHasher for RandomState { |
| 143 | type Hasher = FoldHasher<'static>; |
| 144 | |
| 145 | #[inline (always)] |
| 146 | fn build_hasher(&self) -> FoldHasher<'static> { |
| 147 | FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly |
| 152 | /// initialized by default, but can also be initialized with a specific seed. |
| 153 | /// |
| 154 | /// This can be useful for e.g. testing, but the downside is that this type |
| 155 | /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. |
| 156 | #[derive(Clone, Debug)] |
| 157 | pub struct SeedableRandomState { |
| 158 | per_hasher_seed: u64, |
| 159 | shared_seed: &'static SharedSeed, |
| 160 | } |
| 161 | |
| 162 | impl Default for SeedableRandomState { |
| 163 | #[inline (always)] |
| 164 | fn default() -> Self { |
| 165 | Self::random() |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | impl SeedableRandomState { |
| 170 | /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. |
| 171 | #[inline (always)] |
| 172 | pub fn random() -> Self { |
| 173 | Self { |
| 174 | per_hasher_seed: gen_per_hasher_seed(), |
| 175 | shared_seed: SharedSeed::global_random(), |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. |
| 180 | #[inline (always)] |
| 181 | pub fn fixed() -> Self { |
| 182 | Self { |
| 183 | per_hasher_seed: ARBITRARY3, |
| 184 | shared_seed: SharedSeed::global_fixed(), |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | /// Generates a [`SeedableRandomState`] with the given per-hasher seed |
| 189 | /// and [`SharedSeed`]. |
| 190 | #[inline (always)] |
| 191 | pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { |
| 192 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
| 193 | Self { |
| 194 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
| 195 | shared_seed, |
| 196 | } |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | impl BuildHasher for SeedableRandomState { |
| 201 | type Hasher = FoldHasher<'static>; |
| 202 | |
| 203 | #[inline (always)] |
| 204 | fn build_hasher(&self) -> FoldHasher<'static> { |
| 205 | FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed) |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed. |
| 210 | /// |
| 211 | /// Not recommended unless you absolutely need determinism. |
| 212 | #[derive(Clone, Debug)] |
| 213 | pub struct FixedState { |
| 214 | per_hasher_seed: u64, |
| 215 | } |
| 216 | |
| 217 | impl FixedState { |
| 218 | /// Creates a [`FixedState`] with the given per-hasher-seed. |
| 219 | #[inline (always)] |
| 220 | pub const fn with_seed(per_hasher_seed: u64) -> Self { |
| 221 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
| 222 | Self { |
| 223 | per_hasher_seed: per_hasher_seed ^ ARBITRARY3, |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | impl Default for FixedState { |
| 229 | #[inline (always)] |
| 230 | fn default() -> Self { |
| 231 | Self { |
| 232 | per_hasher_seed: ARBITRARY3, |
| 233 | } |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | impl BuildHasher for FixedState { |
| 238 | type Hasher = FoldHasher<'static>; |
| 239 | |
| 240 | #[inline (always)] |
| 241 | fn build_hasher(&self) -> FoldHasher<'static> { |
| 242 | FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed()) |
| 243 | } |
| 244 | } |
| 245 | |