| 1 | //! A speedy, non-cryptographic hashing algorithm used by `rustc`. |
| 2 | //! |
| 3 | //! # Example |
| 4 | //! |
| 5 | //! ```rust |
| 6 | //! # #[cfg (feature = "std" )] |
| 7 | //! # fn main() { |
| 8 | //! use rustc_hash::FxHashMap; |
| 9 | //! |
| 10 | //! let mut map: FxHashMap<u32, u32> = FxHashMap::default(); |
| 11 | //! map.insert(22, 44); |
| 12 | //! # } |
| 13 | //! # #[cfg (not(feature = "std" ))] |
| 14 | //! # fn main() { } |
| 15 | //! ``` |
| 16 | |
| 17 | #![no_std ] |
| 18 | #![cfg_attr (feature = "nightly" , feature(hasher_prefixfree_extras))] |
| 19 | |
| 20 | #[cfg (feature = "std" )] |
| 21 | extern crate std; |
| 22 | |
| 23 | #[cfg (feature = "rand" )] |
| 24 | extern crate rand; |
| 25 | |
| 26 | #[cfg (feature = "rand" )] |
| 27 | mod random_state; |
| 28 | |
| 29 | mod seeded_state; |
| 30 | |
| 31 | use core::default::Default; |
| 32 | use core::hash::{BuildHasher, Hasher}; |
| 33 | #[cfg (feature = "std" )] |
| 34 | use std::collections::{HashMap, HashSet}; |
| 35 | |
| 36 | /// Type alias for a hash map that uses the Fx hashing algorithm. |
| 37 | #[cfg (feature = "std" )] |
| 38 | pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>; |
| 39 | |
| 40 | /// Type alias for a hash set that uses the Fx hashing algorithm. |
| 41 | #[cfg (feature = "std" )] |
| 42 | pub type FxHashSet<V> = HashSet<V, FxBuildHasher>; |
| 43 | |
| 44 | #[cfg (feature = "rand" )] |
| 45 | pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState}; |
| 46 | |
| 47 | pub use seeded_state::FxSeededState; |
| 48 | #[cfg (feature = "std" )] |
| 49 | pub use seeded_state::{FxHashMapSeed, FxHashSetSeed}; |
| 50 | |
| 51 | /// A speedy hash algorithm for use within rustc. The hashmap in liballoc |
| 52 | /// by default uses SipHash which isn't quite as speedy as we want. In the |
| 53 | /// compiler we're not really worried about DOS attempts, so we use a fast |
| 54 | /// non-cryptographic hash. |
| 55 | /// |
| 56 | /// The current implementation is a fast polynomial hash with a single |
| 57 | /// bit rotation as a finishing step designed by Orson Peters. |
| 58 | #[derive (Clone)] |
| 59 | pub struct FxHasher { |
| 60 | hash: usize, |
| 61 | } |
| 62 | |
| 63 | // One might view a polynomial hash |
| 64 | // m[0] * k + m[1] * k^2 + m[2] * k^3 + ... |
| 65 | // as a multilinear hash with keystream k[..] |
| 66 | // m[0] * k[0] + m[1] * k[1] + m[2] * k[2] + ... |
| 67 | // where keystream k just happens to be generated using a multiplicative |
| 68 | // congruential pseudorandom number generator (MCG). For that reason we chose a |
| 69 | // constant that was found to be good for a MCG in: |
| 70 | // "Computationally Easy, Spectrally Good Multipliers for Congruential |
| 71 | // Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna. |
| 72 | #[cfg (target_pointer_width = "64" )] |
| 73 | const K: usize = 0xf1357aea2e62a9c5; |
| 74 | #[cfg (target_pointer_width = "32" )] |
| 75 | const K: usize = 0x93d765dd; |
| 76 | |
| 77 | impl FxHasher { |
| 78 | /// Creates a `fx` hasher with a given seed. |
| 79 | pub const fn with_seed(seed: usize) -> FxHasher { |
| 80 | FxHasher { hash: seed } |
| 81 | } |
| 82 | |
| 83 | /// Creates a default `fx` hasher. |
| 84 | pub const fn default() -> FxHasher { |
| 85 | FxHasher { hash: 0 } |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | impl Default for FxHasher { |
| 90 | #[inline ] |
| 91 | fn default() -> FxHasher { |
| 92 | Self::default() |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | impl FxHasher { |
| 97 | #[inline ] |
| 98 | fn add_to_hash(&mut self, i: usize) { |
| 99 | self.hash = self.hash.wrapping_add(i).wrapping_mul(K); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | impl Hasher for FxHasher { |
| 104 | #[inline ] |
| 105 | fn write(&mut self, bytes: &[u8]) { |
| 106 | // Compress the byte string to a single u64 and add to our hash. |
| 107 | self.write_u64(hash_bytes(bytes)); |
| 108 | } |
| 109 | |
| 110 | #[inline ] |
| 111 | fn write_u8(&mut self, i: u8) { |
| 112 | self.add_to_hash(i as usize); |
| 113 | } |
| 114 | |
| 115 | #[inline ] |
| 116 | fn write_u16(&mut self, i: u16) { |
| 117 | self.add_to_hash(i as usize); |
| 118 | } |
| 119 | |
| 120 | #[inline ] |
| 121 | fn write_u32(&mut self, i: u32) { |
| 122 | self.add_to_hash(i as usize); |
| 123 | } |
| 124 | |
| 125 | #[inline ] |
| 126 | fn write_u64(&mut self, i: u64) { |
| 127 | self.add_to_hash(i as usize); |
| 128 | #[cfg (target_pointer_width = "32" )] |
| 129 | self.add_to_hash((i >> 32) as usize); |
| 130 | } |
| 131 | |
| 132 | #[inline ] |
| 133 | fn write_u128(&mut self, i: u128) { |
| 134 | self.add_to_hash(i as usize); |
| 135 | #[cfg (target_pointer_width = "32" )] |
| 136 | self.add_to_hash((i >> 32) as usize); |
| 137 | self.add_to_hash((i >> 64) as usize); |
| 138 | #[cfg (target_pointer_width = "32" )] |
| 139 | self.add_to_hash((i >> 96) as usize); |
| 140 | } |
| 141 | |
| 142 | #[inline ] |
| 143 | fn write_usize(&mut self, i: usize) { |
| 144 | self.add_to_hash(i); |
| 145 | } |
| 146 | |
| 147 | #[cfg (feature = "nightly" )] |
| 148 | #[inline ] |
| 149 | fn write_length_prefix(&mut self, _len: usize) { |
| 150 | // Most cases will specialize hash_slice to call write(), which encodes |
| 151 | // the length already in a more efficient manner than we could here. For |
| 152 | // HashDoS-resistance you would still need to include this for the |
| 153 | // non-slice collection hashes, but for the purposes of rustc we do not |
| 154 | // care and do not wish to pay the performance penalty of mixing in len |
| 155 | // for those collections. |
| 156 | } |
| 157 | |
| 158 | #[cfg (feature = "nightly" )] |
| 159 | #[inline ] |
| 160 | fn write_str(&mut self, s: &str) { |
| 161 | // Similarly here, write already encodes the length, so nothing special |
| 162 | // is needed. |
| 163 | self.write(s.as_bytes()) |
| 164 | } |
| 165 | |
| 166 | #[inline ] |
| 167 | fn finish(&self) -> u64 { |
| 168 | // Since we used a multiplicative hash our top bits have the most |
| 169 | // entropy (with the top bit having the most, decreasing as you go). |
| 170 | // As most hash table implementations (including hashbrown) compute |
| 171 | // the bucket index from the bottom bits we want to move bits from the |
| 172 | // top to the bottom. Ideally we'd rotate left by exactly the hash table |
| 173 | // size, but as we don't know this we'll choose 26 bits, giving decent |
| 174 | // entropy up until 2^26 table sizes. On 32-bit hosts we'll dial it |
| 175 | // back down a bit to 15 bits. |
| 176 | |
| 177 | #[cfg (target_pointer_width = "64" )] |
| 178 | const ROTATE: u32 = 26; |
| 179 | #[cfg (target_pointer_width = "32" )] |
| 180 | const ROTATE: u32 = 15; |
| 181 | |
| 182 | self.hash.rotate_left(ROTATE) as u64 |
| 183 | |
| 184 | // A bit reversal would be even better, except hashbrown also expects |
| 185 | // good entropy in the top 7 bits and a bit reverse would fill those |
| 186 | // bits with low entropy. More importantly, bit reversals are very slow |
| 187 | // on x86-64. A byte reversal is relatively fast, but still has a 2 |
| 188 | // cycle latency on x86-64 compared to the 1 cycle latency of a rotate. |
| 189 | // It also suffers from the hashbrown-top-7-bit-issue. |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | // Nothing special, digits of pi. |
| 194 | const SEED1: u64 = 0x243f6a8885a308d3; |
| 195 | const SEED2: u64 = 0x13198a2e03707344; |
| 196 | const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0; |
| 197 | |
| 198 | #[inline ] |
| 199 | fn multiply_mix(x: u64, y: u64) -> u64 { |
| 200 | #[cfg (target_pointer_width = "64" )] |
| 201 | { |
| 202 | // We compute the full u64 x u64 -> u128 product, this is a single mul |
| 203 | // instruction on x86-64, one mul plus one mulhi on ARM64. |
| 204 | let full = (x as u128) * (y as u128); |
| 205 | let lo = full as u64; |
| 206 | let hi = (full >> 64) as u64; |
| 207 | |
| 208 | // The middle bits of the full product fluctuate the most with small |
| 209 | // changes in the input. This is the top bits of lo and the bottom bits |
| 210 | // of hi. We can thus make the entire output fluctuate with small |
| 211 | // changes to the input by XOR'ing these two halves. |
| 212 | lo ^ hi |
| 213 | |
| 214 | // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors, |
| 215 | // otherwise combining with + or - could result in a really strong hash, as: |
| 216 | // x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi, (mod 2^64 + 1) |
| 217 | // x * y = 2^64 * hi + lo = 1 * hi + lo = lo + hi, (mod 2^64 - 1) |
| 218 | // Multiplicative hashing is universal in a field (like mod p). |
| 219 | } |
| 220 | |
| 221 | #[cfg (target_pointer_width = "32" )] |
| 222 | { |
| 223 | // u64 x u64 -> u128 product is prohibitively expensive on 32-bit. |
| 224 | // Decompose into 32-bit parts. |
| 225 | let lx = x as u32; |
| 226 | let ly = y as u32; |
| 227 | let hx = (x >> 32) as u32; |
| 228 | let hy = (y >> 32) as u32; |
| 229 | |
| 230 | // u32 x u32 -> u64 the low bits of one with the high bits of the other. |
| 231 | let afull = (lx as u64) * (hy as u64); |
| 232 | let bfull = (hx as u64) * (ly as u64); |
| 233 | |
| 234 | // Combine, swapping low/high of one of them so the upper bits of the |
| 235 | // product of one combine with the lower bits of the other. |
| 236 | afull ^ bfull.rotate_right(32) |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | /// A wyhash-inspired non-collision-resistant hash for strings/slices designed |
| 241 | /// by Orson Peters, with a focus on small strings and small codesize. |
| 242 | /// |
| 243 | /// The 64-bit version of this hash passes the SMHasher3 test suite on the full |
| 244 | /// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching |
| 245 | /// permutation f() passed all tests with zero failures. When using the 32-bit |
| 246 | /// version of multiply_mix this hash has a few non-catastrophic failures where |
| 247 | /// there are a handful more collisions than an optimal hash would give. |
| 248 | /// |
| 249 | /// We don't bother avalanching here as we'll feed this hash into a |
| 250 | /// multiplication after which we take the high bits, which avalanches for us. |
| 251 | #[inline ] |
| 252 | fn hash_bytes(bytes: &[u8]) -> u64 { |
| 253 | let len = bytes.len(); |
| 254 | let mut s0 = SEED1; |
| 255 | let mut s1 = SEED2; |
| 256 | |
| 257 | if len <= 16 { |
| 258 | // XOR the input into s0, s1. |
| 259 | if len >= 8 { |
| 260 | s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap()); |
| 261 | s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap()); |
| 262 | } else if len >= 4 { |
| 263 | s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64; |
| 264 | s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64; |
| 265 | } else if len > 0 { |
| 266 | let lo = bytes[0]; |
| 267 | let mid = bytes[len / 2]; |
| 268 | let hi = bytes[len - 1]; |
| 269 | s0 ^= lo as u64; |
| 270 | s1 ^= ((hi as u64) << 8) | mid as u64; |
| 271 | } |
| 272 | } else { |
| 273 | // Handle bulk (can partially overlap with suffix). |
| 274 | let mut off = 0; |
| 275 | while off < len - 16 { |
| 276 | let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap()); |
| 277 | let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap()); |
| 278 | |
| 279 | // Replace s1 with a mix of s0, x, and y, and s0 with s1. |
| 280 | // This ensures the compiler can unroll this loop into two |
| 281 | // independent streams, one operating on s0, the other on s1. |
| 282 | // |
| 283 | // Since zeroes are a common input we prevent an immediate trivial |
| 284 | // collapse of the hash function by XOR'ing a constant with y. |
| 285 | let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y); |
| 286 | s0 = s1; |
| 287 | s1 = t; |
| 288 | off += 16; |
| 289 | } |
| 290 | |
| 291 | let suffix = &bytes[len - 16..]; |
| 292 | s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap()); |
| 293 | s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap()); |
| 294 | } |
| 295 | |
| 296 | multiply_mix(s0, s1) ^ (len as u64) |
| 297 | } |
| 298 | |
| 299 | /// An implementation of [`BuildHasher`] that produces [`FxHasher`]s. |
| 300 | /// |
| 301 | /// ``` |
| 302 | /// use std::hash::BuildHasher; |
| 303 | /// use rustc_hash::FxBuildHasher; |
| 304 | /// assert_ne!(FxBuildHasher.hash_one(1), FxBuildHasher.hash_one(2)); |
| 305 | /// ``` |
| 306 | #[derive (Copy, Clone, Default)] |
| 307 | pub struct FxBuildHasher; |
| 308 | |
| 309 | impl BuildHasher for FxBuildHasher { |
| 310 | type Hasher = FxHasher; |
| 311 | fn build_hasher(&self) -> FxHasher { |
| 312 | FxHasher::default() |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | #[cfg (test)] |
| 317 | mod tests { |
| 318 | #[cfg (not(any(target_pointer_width = "64" , target_pointer_width = "32" )))] |
| 319 | compile_error!("The test suite only supports 64 bit and 32 bit usize" ); |
| 320 | |
| 321 | use crate::{FxBuildHasher, FxHasher}; |
| 322 | use core::hash::{BuildHasher, Hash, Hasher}; |
| 323 | |
| 324 | macro_rules! test_hash { |
| 325 | ( |
| 326 | $( |
| 327 | hash($value:expr) == $result:expr, |
| 328 | )* |
| 329 | ) => { |
| 330 | $( |
| 331 | assert_eq!(FxBuildHasher.hash_one($value), $result); |
| 332 | )* |
| 333 | }; |
| 334 | } |
| 335 | |
| 336 | const B32: bool = cfg!(target_pointer_width = "32" ); |
| 337 | |
| 338 | #[test ] |
| 339 | fn unsigned() { |
| 340 | test_hash! { |
| 341 | hash(0_u8) == 0, |
| 342 | hash(1_u8) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 343 | hash(100_u8) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 344 | hash(u8::MAX) == if B32 { 999399879 } else { 1211781028898739645 }, |
| 345 | |
| 346 | hash(0_u16) == 0, |
| 347 | hash(1_u16) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 348 | hash(100_u16) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 349 | hash(u16::MAX) == if B32 { 3440503042 } else { 16279819243059860173 }, |
| 350 | |
| 351 | hash(0_u32) == 0, |
| 352 | hash(1_u32) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 353 | hash(100_u32) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 354 | hash(u32::MAX) == if B32 { 1293006356 } else { 7729994835221066939 }, |
| 355 | |
| 356 | hash(0_u64) == 0, |
| 357 | hash(1_u64) == if B32 { 275023839 } else { 12157901119326311915 }, |
| 358 | hash(100_u64) == if B32 { 1732383522 } else { 16751747135202103309 }, |
| 359 | hash(u64::MAX) == if B32 { 1017982517 } else { 6288842954450348564 }, |
| 360 | |
| 361 | hash(0_u128) == 0, |
| 362 | hash(1_u128) == if B32 { 1860738631 } else { 13032756267696824044 }, |
| 363 | hash(100_u128) == if B32 { 1389515751 } else { 12003541609544029302 }, |
| 364 | hash(u128::MAX) == if B32 { 2156022013 } else { 11702830760530184999 }, |
| 365 | |
| 366 | hash(0_usize) == 0, |
| 367 | hash(1_usize) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 368 | hash(100_usize) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 369 | hash(usize::MAX) == if B32 { 1293006356 } else { 6288842954450348564 }, |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | #[test ] |
| 374 | fn signed() { |
| 375 | test_hash! { |
| 376 | hash(i8::MIN) == if B32 { 2000713177 } else { 6684841074112525780 }, |
| 377 | hash(0_i8) == 0, |
| 378 | hash(1_i8) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 379 | hash(100_i8) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 380 | hash(i8::MAX) == if B32 { 3293686765 } else { 12973684028562874344 }, |
| 381 | |
| 382 | hash(i16::MIN) == if B32 { 1073764727 } else { 14218860181193086044 }, |
| 383 | hash(0_i16) == 0, |
| 384 | hash(1_i16) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 385 | hash(100_i16) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 386 | hash(i16::MAX) == if B32 { 2366738315 } else { 2060959061933882993 }, |
| 387 | |
| 388 | hash(i32::MIN) == if B32 { 16384 } else { 9943947977240134995 }, |
| 389 | hash(0_i32) == 0, |
| 390 | hash(1_i32) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 391 | hash(100_i32) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 392 | hash(i32::MAX) == if B32 { 1293022740 } else { 16232790931690483559 }, |
| 393 | |
| 394 | hash(i64::MIN) == if B32 { 16384 } else { 33554432 }, |
| 395 | hash(0_i64) == 0, |
| 396 | hash(1_i64) == if B32 { 275023839 } else { 12157901119326311915 }, |
| 397 | hash(100_i64) == if B32 { 1732383522 } else { 16751747135202103309 }, |
| 398 | hash(i64::MAX) == if B32 { 1017998901 } else { 6288842954483902996 }, |
| 399 | |
| 400 | hash(i128::MIN) == if B32 { 16384 } else { 33554432 }, |
| 401 | hash(0_i128) == 0, |
| 402 | hash(1_i128) == if B32 { 1860738631 } else { 13032756267696824044 }, |
| 403 | hash(100_i128) == if B32 { 1389515751 } else { 12003541609544029302 }, |
| 404 | hash(i128::MAX) == if B32 { 2156005629 } else { 11702830760496630567 }, |
| 405 | |
| 406 | hash(isize::MIN) == if B32 { 16384 } else { 33554432 }, |
| 407 | hash(0_isize) == 0, |
| 408 | hash(1_isize) == if B32 { 3001993707 } else { 12157901119326311915 }, |
| 409 | hash(100_isize) == if B32 { 3844759569 } else { 16751747135202103309 }, |
| 410 | hash(isize::MAX) == if B32 { 1293022740 } else { 6288842954483902996 }, |
| 411 | } |
| 412 | } |
| 413 | |
| 414 | // Avoid relying on any `Hash` implementations in the standard library. |
| 415 | struct HashBytes(&'static [u8]); |
| 416 | impl Hash for HashBytes { |
| 417 | fn hash<H: core::hash::Hasher>(&self, state: &mut H) { |
| 418 | state.write(self.0); |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | #[test ] |
| 423 | fn bytes() { |
| 424 | test_hash! { |
| 425 | hash(HashBytes(&[])) == if B32 { 2673204745 } else { 17606491139363777937 }, |
| 426 | hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 5448590020104574886 }, |
| 427 | hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 16766921560080789783 }, |
| 428 | hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 5922447956811044110 }, |
| 429 | hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 5229781508510959783 }, |
| 430 | hash(HashBytes(b"uwu" )) == if B32 { 2699662140 } else { 7168164714682931527 }, |
| 431 | hash(HashBytes(b"These are some bytes for testing rustc_hash." )) == if B32 { 2303640537 } else { 2349210501944688211 }, |
| 432 | } |
| 433 | } |
| 434 | |
| 435 | #[test ] |
| 436 | fn with_seed_actually_different() { |
| 437 | let seeds = [ |
| 438 | [1, 2], |
| 439 | [42, 17], |
| 440 | [124436707, 99237], |
| 441 | [usize::MIN, usize::MAX], |
| 442 | ]; |
| 443 | |
| 444 | for [a_seed, b_seed] in seeds { |
| 445 | let a = || FxHasher::with_seed(a_seed); |
| 446 | let b = || FxHasher::with_seed(b_seed); |
| 447 | |
| 448 | for x in u8::MIN..=u8::MAX { |
| 449 | let mut a = a(); |
| 450 | let mut b = b(); |
| 451 | |
| 452 | x.hash(&mut a); |
| 453 | x.hash(&mut b); |
| 454 | |
| 455 | assert_ne!(a.finish(), b.finish()) |
| 456 | } |
| 457 | } |
| 458 | } |
| 459 | } |
| 460 | |