1//! A speedy, non-cryptographic hashing algorithm used by `rustc`.
2//!
3//! # Example
4//!
5//! ```rust
6//! # #[cfg(feature = "std")]
7//! # fn main() {
8//! use rustc_hash::FxHashMap;
9//!
10//! let mut map: FxHashMap<u32, u32> = FxHashMap::default();
11//! map.insert(22, 44);
12//! # }
13//! # #[cfg(not(feature = "std"))]
14//! # fn main() { }
15//! ```
16
17#![no_std]
18#![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))]
19
20#[cfg(feature = "std")]
21extern crate std;
22
23#[cfg(feature = "rand")]
24extern crate rand;
25
26#[cfg(feature = "rand")]
27mod random_state;
28
29mod seeded_state;
30
31use core::default::Default;
32use core::hash::{BuildHasher, Hasher};
33#[cfg(feature = "std")]
34use std::collections::{HashMap, HashSet};
35
36/// Type alias for a hash map that uses the Fx hashing algorithm.
37#[cfg(feature = "std")]
38pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
39
40/// Type alias for a hash set that uses the Fx hashing algorithm.
41#[cfg(feature = "std")]
42pub type FxHashSet<V> = HashSet<V, FxBuildHasher>;
43
44#[cfg(feature = "rand")]
45pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState};
46
47pub use seeded_state::FxSeededState;
48#[cfg(feature = "std")]
49pub use seeded_state::{FxHashMapSeed, FxHashSetSeed};
50
51/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
52/// by default uses SipHash which isn't quite as speedy as we want. In the
53/// compiler we're not really worried about DOS attempts, so we use a fast
54/// non-cryptographic hash.
55///
56/// The current implementation is a fast polynomial hash with a single
57/// bit rotation as a finishing step designed by Orson Peters.
58#[derive(Clone)]
59pub struct FxHasher {
60 hash: usize,
61}
62
63// One might view a polynomial hash
64// m[0] * k + m[1] * k^2 + m[2] * k^3 + ...
65// as a multilinear hash with keystream k[..]
66// m[0] * k[0] + m[1] * k[1] + m[2] * k[2] + ...
67// where keystream k just happens to be generated using a multiplicative
68// congruential pseudorandom number generator (MCG). For that reason we chose a
69// constant that was found to be good for a MCG in:
70// "Computationally Easy, Spectrally Good Multipliers for Congruential
71// Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna.
72#[cfg(target_pointer_width = "64")]
73const K: usize = 0xf1357aea2e62a9c5;
74#[cfg(target_pointer_width = "32")]
75const K: usize = 0x93d765dd;
76
77impl FxHasher {
78 /// Creates a `fx` hasher with a given seed.
79 pub const fn with_seed(seed: usize) -> FxHasher {
80 FxHasher { hash: seed }
81 }
82
83 /// Creates a default `fx` hasher.
84 pub const fn default() -> FxHasher {
85 FxHasher { hash: 0 }
86 }
87}
88
89impl Default for FxHasher {
90 #[inline]
91 fn default() -> FxHasher {
92 Self::default()
93 }
94}
95
96impl FxHasher {
97 #[inline]
98 fn add_to_hash(&mut self, i: usize) {
99 self.hash = self.hash.wrapping_add(i).wrapping_mul(K);
100 }
101}
102
103impl Hasher for FxHasher {
104 #[inline]
105 fn write(&mut self, bytes: &[u8]) {
106 // Compress the byte string to a single u64 and add to our hash.
107 self.write_u64(hash_bytes(bytes));
108 }
109
110 #[inline]
111 fn write_u8(&mut self, i: u8) {
112 self.add_to_hash(i as usize);
113 }
114
115 #[inline]
116 fn write_u16(&mut self, i: u16) {
117 self.add_to_hash(i as usize);
118 }
119
120 #[inline]
121 fn write_u32(&mut self, i: u32) {
122 self.add_to_hash(i as usize);
123 }
124
125 #[inline]
126 fn write_u64(&mut self, i: u64) {
127 self.add_to_hash(i as usize);
128 #[cfg(target_pointer_width = "32")]
129 self.add_to_hash((i >> 32) as usize);
130 }
131
132 #[inline]
133 fn write_u128(&mut self, i: u128) {
134 self.add_to_hash(i as usize);
135 #[cfg(target_pointer_width = "32")]
136 self.add_to_hash((i >> 32) as usize);
137 self.add_to_hash((i >> 64) as usize);
138 #[cfg(target_pointer_width = "32")]
139 self.add_to_hash((i >> 96) as usize);
140 }
141
142 #[inline]
143 fn write_usize(&mut self, i: usize) {
144 self.add_to_hash(i);
145 }
146
147 #[cfg(feature = "nightly")]
148 #[inline]
149 fn write_length_prefix(&mut self, _len: usize) {
150 // Most cases will specialize hash_slice to call write(), which encodes
151 // the length already in a more efficient manner than we could here. For
152 // HashDoS-resistance you would still need to include this for the
153 // non-slice collection hashes, but for the purposes of rustc we do not
154 // care and do not wish to pay the performance penalty of mixing in len
155 // for those collections.
156 }
157
158 #[cfg(feature = "nightly")]
159 #[inline]
160 fn write_str(&mut self, s: &str) {
161 // Similarly here, write already encodes the length, so nothing special
162 // is needed.
163 self.write(s.as_bytes())
164 }
165
166 #[inline]
167 fn finish(&self) -> u64 {
168 // Since we used a multiplicative hash our top bits have the most
169 // entropy (with the top bit having the most, decreasing as you go).
170 // As most hash table implementations (including hashbrown) compute
171 // the bucket index from the bottom bits we want to move bits from the
172 // top to the bottom. Ideally we'd rotate left by exactly the hash table
173 // size, but as we don't know this we'll choose 26 bits, giving decent
174 // entropy up until 2^26 table sizes. On 32-bit hosts we'll dial it
175 // back down a bit to 15 bits.
176
177 #[cfg(target_pointer_width = "64")]
178 const ROTATE: u32 = 26;
179 #[cfg(target_pointer_width = "32")]
180 const ROTATE: u32 = 15;
181
182 self.hash.rotate_left(ROTATE) as u64
183
184 // A bit reversal would be even better, except hashbrown also expects
185 // good entropy in the top 7 bits and a bit reverse would fill those
186 // bits with low entropy. More importantly, bit reversals are very slow
187 // on x86-64. A byte reversal is relatively fast, but still has a 2
188 // cycle latency on x86-64 compared to the 1 cycle latency of a rotate.
189 // It also suffers from the hashbrown-top-7-bit-issue.
190 }
191}
192
193// Nothing special, digits of pi.
194const SEED1: u64 = 0x243f6a8885a308d3;
195const SEED2: u64 = 0x13198a2e03707344;
196const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0;
197
198#[inline]
199fn multiply_mix(x: u64, y: u64) -> u64 {
200 #[cfg(target_pointer_width = "64")]
201 {
202 // We compute the full u64 x u64 -> u128 product, this is a single mul
203 // instruction on x86-64, one mul plus one mulhi on ARM64.
204 let full = (x as u128) * (y as u128);
205 let lo = full as u64;
206 let hi = (full >> 64) as u64;
207
208 // The middle bits of the full product fluctuate the most with small
209 // changes in the input. This is the top bits of lo and the bottom bits
210 // of hi. We can thus make the entire output fluctuate with small
211 // changes to the input by XOR'ing these two halves.
212 lo ^ hi
213
214 // Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
215 // otherwise combining with + or - could result in a really strong hash, as:
216 // x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi, (mod 2^64 + 1)
217 // x * y = 2^64 * hi + lo = 1 * hi + lo = lo + hi, (mod 2^64 - 1)
218 // Multiplicative hashing is universal in a field (like mod p).
219 }
220
221 #[cfg(target_pointer_width = "32")]
222 {
223 // u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
224 // Decompose into 32-bit parts.
225 let lx = x as u32;
226 let ly = y as u32;
227 let hx = (x >> 32) as u32;
228 let hy = (y >> 32) as u32;
229
230 // u32 x u32 -> u64 the low bits of one with the high bits of the other.
231 let afull = (lx as u64) * (hy as u64);
232 let bfull = (hx as u64) * (ly as u64);
233
234 // Combine, swapping low/high of one of them so the upper bits of the
235 // product of one combine with the lower bits of the other.
236 afull ^ bfull.rotate_right(32)
237 }
238}
239
240/// A wyhash-inspired non-collision-resistant hash for strings/slices designed
241/// by Orson Peters, with a focus on small strings and small codesize.
242///
243/// The 64-bit version of this hash passes the SMHasher3 test suite on the full
244/// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
245/// permutation f() passed all tests with zero failures. When using the 32-bit
246/// version of multiply_mix this hash has a few non-catastrophic failures where
247/// there are a handful more collisions than an optimal hash would give.
248///
249/// We don't bother avalanching here as we'll feed this hash into a
250/// multiplication after which we take the high bits, which avalanches for us.
251#[inline]
252fn hash_bytes(bytes: &[u8]) -> u64 {
253 let len = bytes.len();
254 let mut s0 = SEED1;
255 let mut s1 = SEED2;
256
257 if len <= 16 {
258 // XOR the input into s0, s1.
259 if len >= 8 {
260 s0 ^= u64::from_le_bytes(bytes[0..8].try_into().unwrap());
261 s1 ^= u64::from_le_bytes(bytes[len - 8..].try_into().unwrap());
262 } else if len >= 4 {
263 s0 ^= u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as u64;
264 s1 ^= u32::from_le_bytes(bytes[len - 4..].try_into().unwrap()) as u64;
265 } else if len > 0 {
266 let lo = bytes[0];
267 let mid = bytes[len / 2];
268 let hi = bytes[len - 1];
269 s0 ^= lo as u64;
270 s1 ^= ((hi as u64) << 8) | mid as u64;
271 }
272 } else {
273 // Handle bulk (can partially overlap with suffix).
274 let mut off = 0;
275 while off < len - 16 {
276 let x = u64::from_le_bytes(bytes[off..off + 8].try_into().unwrap());
277 let y = u64::from_le_bytes(bytes[off + 8..off + 16].try_into().unwrap());
278
279 // Replace s1 with a mix of s0, x, and y, and s0 with s1.
280 // This ensures the compiler can unroll this loop into two
281 // independent streams, one operating on s0, the other on s1.
282 //
283 // Since zeroes are a common input we prevent an immediate trivial
284 // collapse of the hash function by XOR'ing a constant with y.
285 let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y);
286 s0 = s1;
287 s1 = t;
288 off += 16;
289 }
290
291 let suffix = &bytes[len - 16..];
292 s0 ^= u64::from_le_bytes(suffix[0..8].try_into().unwrap());
293 s1 ^= u64::from_le_bytes(suffix[8..16].try_into().unwrap());
294 }
295
296 multiply_mix(s0, s1) ^ (len as u64)
297}
298
299/// An implementation of [`BuildHasher`] that produces [`FxHasher`]s.
300///
301/// ```
302/// use std::hash::BuildHasher;
303/// use rustc_hash::FxBuildHasher;
304/// assert_ne!(FxBuildHasher.hash_one(1), FxBuildHasher.hash_one(2));
305/// ```
306#[derive(Copy, Clone, Default)]
307pub struct FxBuildHasher;
308
309impl BuildHasher for FxBuildHasher {
310 type Hasher = FxHasher;
311 fn build_hasher(&self) -> FxHasher {
312 FxHasher::default()
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 #[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))]
319 compile_error!("The test suite only supports 64 bit and 32 bit usize");
320
321 use crate::{FxBuildHasher, FxHasher};
322 use core::hash::{BuildHasher, Hash, Hasher};
323
324 macro_rules! test_hash {
325 (
326 $(
327 hash($value:expr) == $result:expr,
328 )*
329 ) => {
330 $(
331 assert_eq!(FxBuildHasher.hash_one($value), $result);
332 )*
333 };
334 }
335
336 const B32: bool = cfg!(target_pointer_width = "32");
337
338 #[test]
339 fn unsigned() {
340 test_hash! {
341 hash(0_u8) == 0,
342 hash(1_u8) == if B32 { 3001993707 } else { 12157901119326311915 },
343 hash(100_u8) == if B32 { 3844759569 } else { 16751747135202103309 },
344 hash(u8::MAX) == if B32 { 999399879 } else { 1211781028898739645 },
345
346 hash(0_u16) == 0,
347 hash(1_u16) == if B32 { 3001993707 } else { 12157901119326311915 },
348 hash(100_u16) == if B32 { 3844759569 } else { 16751747135202103309 },
349 hash(u16::MAX) == if B32 { 3440503042 } else { 16279819243059860173 },
350
351 hash(0_u32) == 0,
352 hash(1_u32) == if B32 { 3001993707 } else { 12157901119326311915 },
353 hash(100_u32) == if B32 { 3844759569 } else { 16751747135202103309 },
354 hash(u32::MAX) == if B32 { 1293006356 } else { 7729994835221066939 },
355
356 hash(0_u64) == 0,
357 hash(1_u64) == if B32 { 275023839 } else { 12157901119326311915 },
358 hash(100_u64) == if B32 { 1732383522 } else { 16751747135202103309 },
359 hash(u64::MAX) == if B32 { 1017982517 } else { 6288842954450348564 },
360
361 hash(0_u128) == 0,
362 hash(1_u128) == if B32 { 1860738631 } else { 13032756267696824044 },
363 hash(100_u128) == if B32 { 1389515751 } else { 12003541609544029302 },
364 hash(u128::MAX) == if B32 { 2156022013 } else { 11702830760530184999 },
365
366 hash(0_usize) == 0,
367 hash(1_usize) == if B32 { 3001993707 } else { 12157901119326311915 },
368 hash(100_usize) == if B32 { 3844759569 } else { 16751747135202103309 },
369 hash(usize::MAX) == if B32 { 1293006356 } else { 6288842954450348564 },
370 }
371 }
372
373 #[test]
374 fn signed() {
375 test_hash! {
376 hash(i8::MIN) == if B32 { 2000713177 } else { 6684841074112525780 },
377 hash(0_i8) == 0,
378 hash(1_i8) == if B32 { 3001993707 } else { 12157901119326311915 },
379 hash(100_i8) == if B32 { 3844759569 } else { 16751747135202103309 },
380 hash(i8::MAX) == if B32 { 3293686765 } else { 12973684028562874344 },
381
382 hash(i16::MIN) == if B32 { 1073764727 } else { 14218860181193086044 },
383 hash(0_i16) == 0,
384 hash(1_i16) == if B32 { 3001993707 } else { 12157901119326311915 },
385 hash(100_i16) == if B32 { 3844759569 } else { 16751747135202103309 },
386 hash(i16::MAX) == if B32 { 2366738315 } else { 2060959061933882993 },
387
388 hash(i32::MIN) == if B32 { 16384 } else { 9943947977240134995 },
389 hash(0_i32) == 0,
390 hash(1_i32) == if B32 { 3001993707 } else { 12157901119326311915 },
391 hash(100_i32) == if B32 { 3844759569 } else { 16751747135202103309 },
392 hash(i32::MAX) == if B32 { 1293022740 } else { 16232790931690483559 },
393
394 hash(i64::MIN) == if B32 { 16384 } else { 33554432 },
395 hash(0_i64) == 0,
396 hash(1_i64) == if B32 { 275023839 } else { 12157901119326311915 },
397 hash(100_i64) == if B32 { 1732383522 } else { 16751747135202103309 },
398 hash(i64::MAX) == if B32 { 1017998901 } else { 6288842954483902996 },
399
400 hash(i128::MIN) == if B32 { 16384 } else { 33554432 },
401 hash(0_i128) == 0,
402 hash(1_i128) == if B32 { 1860738631 } else { 13032756267696824044 },
403 hash(100_i128) == if B32 { 1389515751 } else { 12003541609544029302 },
404 hash(i128::MAX) == if B32 { 2156005629 } else { 11702830760496630567 },
405
406 hash(isize::MIN) == if B32 { 16384 } else { 33554432 },
407 hash(0_isize) == 0,
408 hash(1_isize) == if B32 { 3001993707 } else { 12157901119326311915 },
409 hash(100_isize) == if B32 { 3844759569 } else { 16751747135202103309 },
410 hash(isize::MAX) == if B32 { 1293022740 } else { 6288842954483902996 },
411 }
412 }
413
414 // Avoid relying on any `Hash` implementations in the standard library.
415 struct HashBytes(&'static [u8]);
416 impl Hash for HashBytes {
417 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
418 state.write(self.0);
419 }
420 }
421
422 #[test]
423 fn bytes() {
424 test_hash! {
425 hash(HashBytes(&[])) == if B32 { 2673204745 } else { 17606491139363777937 },
426 hash(HashBytes(&[0])) == if B32 { 2948228584 } else { 5448590020104574886 },
427 hash(HashBytes(&[0, 0, 0, 0, 0, 0])) == if B32 { 3223252423 } else { 16766921560080789783 },
428 hash(HashBytes(&[1])) == if B32 { 2943445104 } else { 5922447956811044110 },
429 hash(HashBytes(&[2])) == if B32 { 1055423297 } else { 5229781508510959783 },
430 hash(HashBytes(b"uwu")) == if B32 { 2699662140 } else { 7168164714682931527 },
431 hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { 2303640537 } else { 2349210501944688211 },
432 }
433 }
434
435 #[test]
436 fn with_seed_actually_different() {
437 let seeds = [
438 [1, 2],
439 [42, 17],
440 [124436707, 99237],
441 [usize::MIN, usize::MAX],
442 ];
443
444 for [a_seed, b_seed] in seeds {
445 let a = || FxHasher::with_seed(a_seed);
446 let b = || FxHasher::with_seed(b_seed);
447
448 for x in u8::MIN..=u8::MAX {
449 let mut a = a();
450 let mut b = b();
451
452 x.hash(&mut a);
453 x.hash(&mut b);
454
455 assert_ne!(a.finish(), b.finish())
456 }
457 }
458 }
459}
460