1 | use core::hash::BuildHasher; |
2 | |
3 | // These constants may end up unused depending on platform support. |
4 | #[allow (unused)] |
5 | use crate::{ARBITRARY1, ARBITRARY9}; |
6 | |
7 | use super::{ |
8 | folded_multiply, ARBITRARY2, ARBITRARY3, ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7, |
9 | ARBITRARY8, |
10 | }; |
11 | |
12 | /// Used for FixedState, and RandomState if atomics for dynamic init are unavailable. |
13 | const FIXED_GLOBAL_SEED: [u64; 4] = [ARBITRARY4, ARBITRARY5, ARBITRARY6, ARBITRARY7]; |
14 | |
15 | pub mod fast { |
16 | use super::*; |
17 | use crate::fast::FoldHasher; |
18 | |
19 | /// A [`BuildHasher`] for [`fast::FoldHasher`]s that are randomly initialized. |
20 | #[derive (Copy, Clone, Debug)] |
21 | pub struct RandomState { |
22 | per_hasher_seed: u64, |
23 | global_seed: global::GlobalSeed, |
24 | } |
25 | |
26 | impl Default for RandomState { |
27 | fn default() -> Self { |
28 | // We initialize the per-hasher seed with the stack pointer to ensure |
29 | // different threads have different seeds, with as side benefit that |
30 | // stack address randomization gives us further non-determinism. |
31 | let mut per_hasher_seed = 0; |
32 | let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64; |
33 | per_hasher_seed = stack_ptr; |
34 | |
35 | // If we have the standard library available we use a thread-local |
36 | // state to ensure RandomStates are different with high probability, |
37 | // even if the call stack is the same. |
38 | #[cfg (feature = "std" )] |
39 | { |
40 | use std::cell::Cell; |
41 | thread_local! { |
42 | static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) }; |
43 | } |
44 | |
45 | let nondeterminism = PER_HASHER_NONDETERMINISM.get(); |
46 | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); |
47 | PER_HASHER_NONDETERMINISM.set(per_hasher_seed); |
48 | }; |
49 | |
50 | // If we don't have the standard library we instead use a global |
51 | // atomic instead of a thread-local state. |
52 | // |
53 | // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, |
54 | // but this doesn't matter in practice - it is impossible that two |
55 | // different threads have the same stack location, so they'll almost |
56 | // surely generate different seeds, and provide a different possible |
57 | // update for PER_HASHER_NONDETERMINISM. If we would use a proper |
58 | // fetch_add atomic update then there is a larger chance of |
59 | // problematic contention. |
60 | // |
61 | // We use usize instead of 64-bit atomics for best platform support. |
62 | #[cfg (not(feature = "std" ))] |
63 | { |
64 | use core::sync::atomic::{AtomicUsize, Ordering}; |
65 | static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0); |
66 | |
67 | let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64; |
68 | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); |
69 | PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed); |
70 | } |
71 | |
72 | // One extra mixing step to ensure good random bits. |
73 | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY2); |
74 | |
75 | Self { |
76 | per_hasher_seed, |
77 | global_seed: global::GlobalSeed::new(), |
78 | } |
79 | } |
80 | } |
81 | |
82 | impl BuildHasher for RandomState { |
83 | type Hasher = FoldHasher; |
84 | |
85 | #[inline (always)] |
86 | fn build_hasher(&self) -> FoldHasher { |
87 | FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) |
88 | } |
89 | } |
90 | |
91 | /// A [`BuildHasher`] for [`fast::FoldHasher`]s that all have the same fixed seed. |
92 | /// |
93 | /// Not recommended unless you absolutely need determinism. |
94 | #[derive (Copy, Clone, Debug)] |
95 | pub struct FixedState { |
96 | per_hasher_seed: u64, |
97 | } |
98 | |
99 | impl FixedState { |
100 | /// Creates a [`FixedState`] with the given seed. |
101 | #[inline (always)] |
102 | pub const fn with_seed(seed: u64) -> Self { |
103 | // XOR with ARBITRARY3 such that with_seed(0) matches default. |
104 | Self { |
105 | per_hasher_seed: seed ^ ARBITRARY3, |
106 | } |
107 | } |
108 | } |
109 | |
110 | impl Default for FixedState { |
111 | #[inline (always)] |
112 | fn default() -> Self { |
113 | Self { |
114 | per_hasher_seed: ARBITRARY3, |
115 | } |
116 | } |
117 | } |
118 | |
119 | impl BuildHasher for FixedState { |
120 | type Hasher = FoldHasher; |
121 | |
122 | #[inline (always)] |
123 | fn build_hasher(&self) -> FoldHasher { |
124 | FoldHasher::with_seed(self.per_hasher_seed, &FIXED_GLOBAL_SEED) |
125 | } |
126 | } |
127 | } |
128 | |
129 | pub mod quality { |
130 | use super::*; |
131 | use crate::quality::FoldHasher; |
132 | |
133 | /// A [`BuildHasher`] for [`quality::FoldHasher`]s that are randomly initialized. |
134 | #[derive (Copy, Clone, Default, Debug)] |
135 | pub struct RandomState { |
136 | inner: fast::RandomState, |
137 | } |
138 | |
139 | impl BuildHasher for RandomState { |
140 | type Hasher = FoldHasher; |
141 | |
142 | #[inline (always)] |
143 | fn build_hasher(&self) -> FoldHasher { |
144 | FoldHasher { |
145 | inner: self.inner.build_hasher(), |
146 | } |
147 | } |
148 | } |
149 | |
150 | /// A [`BuildHasher`] for [`quality::FoldHasher`]s that all have the same fixed seed. |
151 | /// |
152 | /// Not recommended unless you absolutely need determinism. |
153 | #[derive (Copy, Clone, Default, Debug)] |
154 | pub struct FixedState { |
155 | inner: fast::FixedState, |
156 | } |
157 | |
158 | impl FixedState { |
159 | /// Creates a [`FixedState`] with the given seed. |
160 | #[inline (always)] |
161 | pub const fn with_seed(seed: u64) -> Self { |
162 | Self { |
163 | // We do an additional folded multiply with the seed here for |
164 | // the quality hash to ensure better independence between seed |
165 | // and hash. If the seed is zero the folded multiply is zero, |
166 | // preserving with_seed(0) == default(). |
167 | inner: fast::FixedState::with_seed(folded_multiply(seed, ARBITRARY8)), |
168 | } |
169 | } |
170 | } |
171 | |
172 | impl BuildHasher for FixedState { |
173 | type Hasher = FoldHasher; |
174 | |
175 | #[inline (always)] |
176 | fn build_hasher(&self) -> FoldHasher { |
177 | FoldHasher { |
178 | inner: self.inner.build_hasher(), |
179 | } |
180 | } |
181 | } |
182 | } |
183 | |
184 | #[cfg (target_has_atomic = "8" )] |
185 | mod global { |
186 | use super::*; |
187 | use core::cell::UnsafeCell; |
188 | use core::sync::atomic::{AtomicU8, Ordering}; |
189 | |
190 | fn generate_global_seed() -> [u64; 4] { |
191 | let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY9); |
192 | |
193 | // Use address space layout randomization as our main randomness source. |
194 | // This isn't great, but we don't advertise HashDoS resistance in the first |
195 | // place. This is a whole lot better than nothing, at near zero cost with |
196 | // no dependencies. |
197 | let mut seed = 0; |
198 | let stack_ptr = &seed as *const _; |
199 | let func_ptr = generate_global_seed; |
200 | let static_ptr = &GLOBAL_SEED_STORAGE as *const _; |
201 | seed = mix(seed, stack_ptr as usize as u64); |
202 | seed = mix(seed, func_ptr as usize as u64); |
203 | seed = mix(seed, static_ptr as usize as u64); |
204 | |
205 | // If we have the standard library available, augment entropy with the |
206 | // current time and an address from the allocator. |
207 | #[cfg (feature = "std" )] |
208 | { |
209 | #[cfg (not(any( |
210 | miri, |
211 | all(target_family = "wasm" , target_os = "unknown" ), |
212 | target_os = "zkvm" |
213 | )))] |
214 | if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() { |
215 | seed = mix(seed, duration.subsec_nanos() as u64); |
216 | seed = mix(seed, duration.as_secs()); |
217 | } |
218 | |
219 | let box_ptr = &*Box::new(0u8) as *const _; |
220 | seed = mix(seed, box_ptr as usize as u64); |
221 | } |
222 | |
223 | let seed_a = mix(seed, 0); |
224 | let seed_b = mix(mix(mix(seed_a, 0), 0), 0); |
225 | let seed_c = mix(mix(mix(seed_b, 0), 0), 0); |
226 | let seed_d = mix(mix(mix(seed_c, 0), 0), 0); |
227 | |
228 | // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be |
229 | // a common input. So we want our global seeds that are XOR'ed with the |
230 | // input to always be non-zero. To also ensure there is always a good spread |
231 | // of bits, we give up 3 bits of entropy and simply force some bits on. |
232 | const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1; |
233 | [ |
234 | seed_a | FORCED_ONES, |
235 | seed_b | FORCED_ONES, |
236 | seed_c | FORCED_ONES, |
237 | seed_d | FORCED_ONES, |
238 | ] |
239 | } |
240 | |
241 | // Now all the below code purely exists to cache the above seed as |
242 | // efficiently as possible. Even if we weren't a no_std crate and had access to |
243 | // OnceLock, we don't want to check whether the global is set each time we |
244 | // hash an object, so we hand-roll a global storage where type safety allows us |
245 | // to assume the storage is initialized after construction. |
246 | struct GlobalSeedStorage { |
247 | state: AtomicU8, |
248 | seed: UnsafeCell<[u64; 4]>, |
249 | } |
250 | |
251 | const UNINIT: u8 = 0; |
252 | const LOCKED: u8 = 1; |
253 | const INIT: u8 = 2; |
254 | |
255 | // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive |
256 | // LOCKED state, and only read the UnsafeCells when state is in the |
257 | // once-achieved-eternally-preserved state INIT. |
258 | unsafe impl Sync for GlobalSeedStorage {} |
259 | |
260 | static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage { |
261 | state: AtomicU8::new(UNINIT), |
262 | seed: UnsafeCell::new([0; 4]), |
263 | }; |
264 | |
265 | /// An object representing an initialized global seed. |
266 | /// |
267 | /// Does not actually store the seed inside itself, it is a zero-sized type. |
268 | /// This prevents inflating the RandomState size and in turn HashMap's size. |
269 | #[derive (Copy, Clone, Debug)] |
270 | pub struct GlobalSeed { |
271 | // So we can't accidentally type GlobalSeed { } within this crate. |
272 | _no_accidental_unsafe_init: (), |
273 | } |
274 | |
275 | impl GlobalSeed { |
276 | #[inline (always)] |
277 | pub fn new() -> Self { |
278 | if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT { |
279 | Self::init_slow() |
280 | } |
281 | Self { |
282 | _no_accidental_unsafe_init: (), |
283 | } |
284 | } |
285 | |
286 | #[cold ] |
287 | #[inline (never)] |
288 | fn init_slow() { |
289 | // Generate seed outside of critical section. |
290 | let seed = generate_global_seed(); |
291 | |
292 | loop { |
293 | match GLOBAL_SEED_STORAGE.state.compare_exchange_weak( |
294 | UNINIT, |
295 | LOCKED, |
296 | Ordering::Relaxed, |
297 | Ordering::Acquire, |
298 | ) { |
299 | Ok(_) => unsafe { |
300 | // SAFETY: we just acquired an exclusive lock. |
301 | *GLOBAL_SEED_STORAGE.seed.get() = seed; |
302 | GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release); |
303 | return; |
304 | }, |
305 | |
306 | Err(INIT) => return, |
307 | |
308 | // Yes, it's a spin loop. We need to support no_std (so no easy |
309 | // access to proper locks), this is a one-time-per-program |
310 | // initialization, and the critical section is only a few |
311 | // store instructions, so it'll be fine. |
312 | _ => core::hint::spin_loop(), |
313 | } |
314 | } |
315 | } |
316 | |
317 | #[inline (always)] |
318 | pub fn get(self) -> &'static [u64; 4] { |
319 | // SAFETY: our constructor ensured we are in the INIT state and thus |
320 | // this raw read does not race with any write. |
321 | unsafe { &*GLOBAL_SEED_STORAGE.seed.get() } |
322 | } |
323 | } |
324 | } |
325 | |
326 | #[cfg (not(target_has_atomic = "8" ))] |
327 | mod global { |
328 | #[derive (Copy, Clone, Debug)] |
329 | pub struct GlobalSeed {} |
330 | |
331 | impl GlobalSeed { |
332 | #[inline (always)] |
333 | pub fn new() -> Self { |
334 | Self {} |
335 | } |
336 | |
337 | #[inline (always)] |
338 | pub fn get(self) -> &'static [u64; 4] { |
339 | &super::FIXED_GLOBAL_SEED |
340 | } |
341 | } |
342 | } |
343 | |