lib.rs source code [crates/rustc-hash-2.1.1/src/lib.rs]

1	//! A speedy, non-cryptographic hashing algorithm used by `rustc`.
2	//!
3	//! # Example
4	//!
5	//! ```rust
6	//! # #[cfg(feature = "std")]
7	//! # fn main() {
8	//! use rustc_hash::FxHashMap;
9	//!
10	//! let mut map: FxHashMap<u32, u32> = FxHashMap::default();
11	//! map.insert(`22`, `44`);
12	//! # }
13	//! # #[cfg(not(feature = "std"))]
14	//! # fn main() { }
15	//! ```
16
17	#![no_std]
18	#![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))]
19
20	#[cfg(feature = "std")]
21	extern crate std;
22
23	#[cfg(feature = "rand")]
24	extern crate rand;
25
26	#[cfg(feature = "rand")]
27	mod random_state;
28
29	mod seeded_state;
30
31	use core::default::Default;
32	use core::hash::{BuildHasher, Hasher};
33	#[cfg(feature = "std")]
34	use std::collections::{HashMap, HashSet};
35
36	/// Type alias for a hash map that uses the Fx hashing algorithm.
37	#[cfg(feature = "std")]
38	pub type FxHashMap<K, V> = HashMap<K, V, FxBuildHasher>;
39
40	/// Type alias for a hash set that uses the Fx hashing algorithm.
41	#[cfg(feature = "std")]
42	pub type FxHashSet<V> = HashSet<V, FxBuildHasher>;
43
44	#[cfg(feature = "rand")]
45	pub use random_state::{FxHashMapRand, FxHashSetRand, FxRandomState};
46
47	pub use seeded_state::FxSeededState;
48	#[cfg(feature = "std")]
49	pub use seeded_state::{FxHashMapSeed, FxHashSetSeed};
50
51	/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
52	/// by default uses SipHash which isn't quite as speedy as we want. In the
53	/// compiler we're not really worried about DOS attempts, so we use a fast
54	/// non-cryptographic hash.
55	///
56	/// The current implementation is a fast polynomial hash with a single
57	/// bit rotation as a finishing step designed by Orson Peters.
58	#[derive(Clone)]
59	pub struct FxHasher {
60	hash: usize,
61	}
62
63	// One might view a polynomial hash
64	// m[0] k + m[1] * k^2 + m[2] * k^3 + ...*
65	// as a multilinear hash with keystream k[..]
66	// m[0] k[0] + m[1] * k[1] + m[2] * k[2] + ...*
67	// where keystream k just happens to be generated using a multiplicative
68	// congruential pseudorandom number generator (MCG). For that reason we chose a
69	// constant that was found to be good for a MCG in:
70	// "Computationally Easy, Spectrally Good Multipliers for Congruential
71	// Pseudorandom Number Generators" by Guy Steele and Sebastiano Vigna.
72	#[cfg(target_pointer_width = "64")]
73	const K: usize = `0xf1357aea2e62a9c5`;
74	#[cfg(target_pointer_width = "32")]
75	const K: usize = `0x93d765dd`;
76
77	impl FxHasher {
78	/// Creates a `fx` hasher with a given seed.
79	pub const fn with_seed(seed: usize) -> FxHasher {
80	FxHasher { hash: seed }
81	}
82
83	/// Creates a default `fx` hasher.
84	pub const fn default() -> FxHasher {
85	FxHasher { hash: `0` }
86	}
87	}
88
89	impl Default for FxHasher {
90	#[inline]
91	fn default() -> FxHasher {
92	Self::default()
93	}
94	}
95
96	impl FxHasher {
97	#[inline]
98	fn add_to_hash(&mut self, i: usize) {
99	self.hash = self.hash.wrapping_add(i).wrapping_mul(K);
100	}
101	}
102
103	impl Hasher for FxHasher {
104	#[inline]
105	fn write(&mut self, bytes: &[u8]) {
106	// Compress the byte string to a single u64 and add to our hash.
107	self.write_u64(hash_bytes(bytes));
108	}
109
110	#[inline]
111	fn write_u8(&mut self, i: u8) {
112	self.add_to_hash(i as usize);
113	}
114
115	#[inline]
116	fn write_u16(&mut self, i: u16) {
117	self.add_to_hash(i as usize);
118	}
119
120	#[inline]
121	fn write_u32(&mut self, i: u32) {
122	self.add_to_hash(i as usize);
123	}
124
125	#[inline]
126	fn write_u64(&mut self, i: u64) {
127	self.add_to_hash(i as usize);
128	#[cfg(target_pointer_width = "32")]
129	self.add_to_hash((i >> `32`) as usize);
130	}
131
132	#[inline]
133	fn write_u128(&mut self, i: u128) {
134	self.add_to_hash(i as usize);
135	#[cfg(target_pointer_width = "32")]
136	self.add_to_hash((i >> `32`) as usize);
137	self.add_to_hash((i >> `64`) as usize);
138	#[cfg(target_pointer_width = "32")]
139	self.add_to_hash((i >> `96`) as usize);
140	}
141
142	#[inline]
143	fn write_usize(&mut self, i: usize) {
144	self.add_to_hash(i);
145	}
146
147	#[cfg(feature = "nightly")]
148	#[inline]
149	fn write_length_prefix(&mut self, _len: usize) {
150	// Most cases will specialize hash_slice to call write(), which encodes
151	// the length already in a more efficient manner than we could here. For
152	// HashDoS-resistance you would still need to include this for the
153	// non-slice collection hashes, but for the purposes of rustc we do not
154	// care and do not wish to pay the performance penalty of mixing in len
155	// for those collections.
156	}
157
158	#[cfg(feature = "nightly")]
159	#[inline]
160	fn write_str(&mut self, s: &str) {
161	// Similarly here, write already encodes the length, so nothing special
162	// is needed.
163	self.write(s.as_bytes())
164	}
165
166	#[inline]
167	fn finish(&self) -> u64 {
168	// Since we used a multiplicative hash our top bits have the most
169	// entropy (with the top bit having the most, decreasing as you go).
170	// As most hash table implementations (including hashbrown) compute
171	// the bucket index from the bottom bits we want to move bits from the
172	// top to the bottom. Ideally we'd rotate left by exactly the hash table
173	// size, but as we don't know this we'll choose 26 bits, giving decent
174	// entropy up until 2^26 table sizes. On 32-bit hosts we'll dial it
175	// back down a bit to 15 bits.
176
177	#[cfg(target_pointer_width = "64")]
178	const ROTATE: u32 = `26`;
179	#[cfg(target_pointer_width = "32")]
180	const ROTATE: u32 = `15`;
181
182	self.hash.rotate_left(ROTATE) as u64
183
184	// A bit reversal would be even better, except hashbrown also expects
185	// good entropy in the top 7 bits and a bit reverse would fill those
186	// bits with low entropy. More importantly, bit reversals are very slow
187	// on x86-64. A byte reversal is relatively fast, but still has a 2
188	// cycle latency on x86-64 compared to the 1 cycle latency of a rotate.
189	// It also suffers from the hashbrown-top-7-bit-issue.
190	}
191	}
192
193	// Nothing special, digits of pi.
194	const SEED1: u64 = `0x243f6a8885a308d3`;
195	const SEED2: u64 = `0x13198a2e03707344`;
196	const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = `0xa4093822299f31d0`;
197
198	#[inline]
199	fn multiply_mix(x: u64, y: u64) -> u64 {
200	#[cfg(target_pointer_width = "64")]
201	{
202	// We compute the full u64 x u64 -> u128 product, this is a single mul
203	// instruction on x86-64, one mul plus one mulhi on ARM64.
204	let full = (x as u128) * (y as u128);
205	let lo = full as u64;
206	let hi = (full >> `64`) as u64;
207
208	// The middle bits of the full product fluctuate the most with small
209	// changes in the input. This is the top bits of lo and the bottom bits
210	// of hi. We can thus make the entire output fluctuate with small
211	// changes to the input by XOR'ing these two halves.
212	lo ^ hi
213
214	// Unfortunately both 2^64 + 1 and 2^64 - 1 have small prime factors,
215	// otherwise combining with + or - could result in a really strong hash, as:
216	// x y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi, (mod 2^64 + 1)*
217	// x y = 2^64 * hi + lo = 1 * hi + lo = lo + hi, (mod 2^64 - 1)*
218	// Multiplicative hashing is universal in a field (like mod p).
219	}
220
221	#[cfg(target_pointer_width = "32")]
222	{
223	// u64 x u64 -> u128 product is prohibitively expensive on 32-bit.
224	// Decompose into 32-bit parts.
225	let lx = x as u32;
226	let ly = y as u32;
227	let hx = (x >> `32`) as u32;
228	let hy = (y >> `32`) as u32;
229
230	// u32 x u32 -> u64 the low bits of one with the high bits of the other.
231	let afull = (lx as u64) * (hy as u64);
232	let bfull = (hx as u64) * (ly as u64);
233
234	// Combine, swapping low/high of one of them so the upper bits of the
235	// product of one combine with the lower bits of the other.
236	afull ^ bfull.rotate_right(`32`)
237	}
238	}
239
240	/// A wyhash-inspired non-collision-resistant hash for strings/slices designed
241	/// by Orson Peters, with a focus on small strings and small codesize.
242	///
243	/// The 64-bit version of this hash passes the SMHasher3 test suite on the full
244	/// 64-bit output, that is, f(hash_bytes(b) ^ f(seed)) for some good avalanching
245	/// permutation f() passed all tests with zero failures. When using the 32-bit
246	/// version of multiply_mix this hash has a few non-catastrophic failures where
247	/// there are a handful more collisions than an optimal hash would give.
248	///
249	/// We don't bother avalanching here as we'll feed this hash into a
250	/// multiplication after which we take the high bits, which avalanches for us.
251	#[inline]
252	fn hash_bytes(bytes: &[u8]) -> u64 {
253	let len = bytes.len();
254	let mut s0 = SEED1;
255	let mut s1 = SEED2;
256
257	if len <= `16` {
258	// XOR the input into s0, s1.
259	if len >= `8` {
260	s0 ^= u64::from_le_bytes(bytes[`0`..`8`].try_into().unwrap());
261	s1 ^= u64::from_le_bytes(bytes[len - `8`..].try_into().unwrap());
262	} else if len >= `4` {
263	s0 ^= u32::from_le_bytes(bytes[`0`..`4`].try_into().unwrap()) as u64;
264	s1 ^= u32::from_le_bytes(bytes[len - `4`..].try_into().unwrap()) as u64;
265	} else if len > `0` {
266	let lo = bytes[`0`];
267	let mid = bytes[len / `2`];
268	let hi = bytes[len - `1`];
269	s0 ^= lo as u64;
270	s1 ^= ((hi as u64) << `8`) \| mid as u64;
271	}
272	} else {
273	// Handle bulk (can partially overlap with suffix).
274	let mut off = `0`;
275	while off < len - `16` {
276	let x = u64::from_le_bytes(bytes[off..off + `8`].try_into().unwrap());
277	let y = u64::from_le_bytes(bytes[off + `8`..off + `16`].try_into().unwrap());
278
279	// Replace s1 with a mix of s0, x, and y, and s0 with s1.
280	// This ensures the compiler can unroll this loop into two
281	// independent streams, one operating on s0, the other on s1.
282	//
283	// Since zeroes are a common input we prevent an immediate trivial
284	// collapse of the hash function by XOR'ing a constant with y.
285	let t = multiply_mix(s0 ^ x, PREVENT_TRIVIAL_ZERO_COLLAPSE ^ y);
286	s0 = s1;
287	s1 = t;
288	off += `16`;
289	}
290
291	let suffix = &bytes[len - `16`..];
292	s0 ^= u64::from_le_bytes(suffix[`0`..`8`].try_into().unwrap());
293	s1 ^= u64::from_le_bytes(suffix[`8`..`16`].try_into().unwrap());
294	}
295
296	multiply_mix(s0, s1) ^ (len as u64)
297	}
298
299	/// An implementation of [`BuildHasher`] that produces [`FxHasher`]s.
300	///
301	/// ```
302	/// use std::hash::BuildHasher;
303	/// use rustc_hash::FxBuildHasher;
304	/// assert_ne!(FxBuildHasher.hash_one(`1`), FxBuildHasher.hash_one(`2`));
305	/// ```
306	#[derive(Copy, Clone, Default)]
307	pub struct FxBuildHasher;
308
309	impl BuildHasher for FxBuildHasher {
310	type Hasher = FxHasher;
311	fn build_hasher(&self) -> FxHasher {
312	FxHasher::default()
313	}
314	}
315
316	#[cfg(test)]
317	mod tests {
318	#[cfg(not(any(target_pointer_width = "64", target_pointer_width = "32")))]
319	compile_error!("The test suite only supports 64 bit and 32 bit usize");
320
321	use crate::{FxBuildHasher, FxHasher};
322	use core::hash::{BuildHasher, Hash, Hasher};
323
324	macro_rules! test_hash {
325	(
326	$(
327	hash($value:expr) == $result:expr,
328	)*
329	) => {
330	$(
331	assert_eq!(FxBuildHasher.hash_one($value), $result);
332	)*
333	};
334	}
335
336	const B32: bool = cfg!(target_pointer_width = "32");
337
338	#[test]
339	fn unsigned() {
340	test_hash! {
341	hash(`0_u8`) == `0`,
342	hash(`1_u8`) == if B32 { `3001993707` } else { `12157901119326311915` },
343	hash(`100_u8`) == if B32 { `3844759569` } else { `16751747135202103309` },
344	hash(u8::MAX) == if B32 { `999399879` } else { `1211781028898739645` },
345
346	hash(`0_u16`) == `0`,
347	hash(`1_u16`) == if B32 { `3001993707` } else { `12157901119326311915` },
348	hash(`100_u16`) == if B32 { `3844759569` } else { `16751747135202103309` },
349	hash(u16::MAX) == if B32 { `3440503042` } else { `16279819243059860173` },
350
351	hash(`0_u32`) == `0`,
352	hash(`1_u32`) == if B32 { `3001993707` } else { `12157901119326311915` },
353	hash(`100_u32`) == if B32 { `3844759569` } else { `16751747135202103309` },
354	hash(u32::MAX) == if B32 { `1293006356` } else { `7729994835221066939` },
355
356	hash(`0_u64`) == `0`,
357	hash(`1_u64`) == if B32 { `275023839` } else { `12157901119326311915` },
358	hash(`100_u64`) == if B32 { `1732383522` } else { `16751747135202103309` },
359	hash(u64::MAX) == if B32 { `1017982517` } else { `6288842954450348564` },
360
361	hash(`0_u128`) == `0`,
362	hash(`1_u128`) == if B32 { `1860738631` } else { `13032756267696824044` },
363	hash(`100_u128`) == if B32 { `1389515751` } else { `12003541609544029302` },
364	hash(u128::MAX) == if B32 { `2156022013` } else { `11702830760530184999` },
365
366	hash(`0_usize`) == `0`,
367	hash(`1_usize`) == if B32 { `3001993707` } else { `12157901119326311915` },
368	hash(`100_usize`) == if B32 { `3844759569` } else { `16751747135202103309` },
369	hash(usize::MAX) == if B32 { `1293006356` } else { `6288842954450348564` },
370	}
371	}
372
373	#[test]
374	fn signed() {
375	test_hash! {
376	hash(i8::MIN) == if B32 { `2000713177` } else { `6684841074112525780` },
377	hash(`0_i8`) == `0`,
378	hash(`1_i8`) == if B32 { `3001993707` } else { `12157901119326311915` },
379	hash(`100_i8`) == if B32 { `3844759569` } else { `16751747135202103309` },
380	hash(i8::MAX) == if B32 { `3293686765` } else { `12973684028562874344` },
381
382	hash(i16::MIN) == if B32 { `1073764727` } else { `14218860181193086044` },
383	hash(`0_i16`) == `0`,
384	hash(`1_i16`) == if B32 { `3001993707` } else { `12157901119326311915` },
385	hash(`100_i16`) == if B32 { `3844759569` } else { `16751747135202103309` },
386	hash(i16::MAX) == if B32 { `2366738315` } else { `2060959061933882993` },
387
388	hash(i32::MIN) == if B32 { `16384` } else { `9943947977240134995` },
389	hash(`0_i32`) == `0`,
390	hash(`1_i32`) == if B32 { `3001993707` } else { `12157901119326311915` },
391	hash(`100_i32`) == if B32 { `3844759569` } else { `16751747135202103309` },
392	hash(i32::MAX) == if B32 { `1293022740` } else { `16232790931690483559` },
393
394	hash(i64::MIN) == if B32 { `16384` } else { `33554432` },
395	hash(`0_i64`) == `0`,
396	hash(`1_i64`) == if B32 { `275023839` } else { `12157901119326311915` },
397	hash(`100_i64`) == if B32 { `1732383522` } else { `16751747135202103309` },
398	hash(i64::MAX) == if B32 { `1017998901` } else { `6288842954483902996` },
399
400	hash(i128::MIN) == if B32 { `16384` } else { `33554432` },
401	hash(`0_i128`) == `0`,
402	hash(`1_i128`) == if B32 { `1860738631` } else { `13032756267696824044` },
403	hash(`100_i128`) == if B32 { `1389515751` } else { `12003541609544029302` },
404	hash(i128::MAX) == if B32 { `2156005629` } else { `11702830760496630567` },
405
406	hash(isize::MIN) == if B32 { `16384` } else { `33554432` },
407	hash(`0_isize`) == `0`,
408	hash(`1_isize`) == if B32 { `3001993707` } else { `12157901119326311915` },
409	hash(`100_isize`) == if B32 { `3844759569` } else { `16751747135202103309` },
410	hash(isize::MAX) == if B32 { `1293022740` } else { `6288842954483902996` },
411	}
412	}
413
414	// Avoid relying on any `Hash` implementations in the standard library.
415	struct HashBytes(&'static [u8]);
416	impl Hash for HashBytes {
417	fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
418	state.write(self.0);
419	}
420	}
421
422	#[test]
423	fn bytes() {
424	test_hash! {
425	hash(HashBytes(&[])) == if B32 { `2673204745` } else { `17606491139363777937` },
426	hash(HashBytes(&[`0`])) == if B32 { `2948228584` } else { `5448590020104574886` },
427	hash(HashBytes(&[`0`, `0`, `0`, `0`, `0`, `0`])) == if B32 { `3223252423` } else { `16766921560080789783` },
428	hash(HashBytes(&[`1`])) == if B32 { `2943445104` } else { `5922447956811044110` },
429	hash(HashBytes(&[`2`])) == if B32 { `1055423297` } else { `5229781508510959783` },
430	hash(HashBytes(b"uwu")) == if B32 { `2699662140` } else { `7168164714682931527` },
431	hash(HashBytes(b"These are some bytes for testing rustc_hash.")) == if B32 { `2303640537` } else { `2349210501944688211` },
432	}
433	}
434
435	#[test]
436	fn with_seed_actually_different() {
437	let seeds = [
438	[`1`, `2`],
439	[`42`, `17`],
440	[`124436707`, `99237`],
441	[usize::MIN, usize::MAX],
442	];
443
444	for [a_seed, b_seed] in seeds {
445	let a = \|\| FxHasher::with_seed(a_seed);
446	let b = \|\| FxHasher::with_seed(b_seed);
447
448	for x in u8::MIN..=u8::MAX {
449	let mut a = a();
450	let mut b = b();
451
452	x.hash(&mut a);
453	x.hash(&mut b);
454
455	assert_ne!(a.finish(), b.finish())
456	}
457	}
458	}
459	}
460