| 1 | // These constants may end up unused depending on platform support. |
| 2 | #[allow (unused)] |
| 3 | use crate::{ARBITRARY1, ARBITRARY5}; |
| 4 | |
| 5 | use super::{ |
| 6 | folded_multiply, ARBITRARY10, ARBITRARY11, ARBITRARY2, ARBITRARY6, ARBITRARY7, ARBITRARY8, |
| 7 | ARBITRARY9, |
| 8 | }; |
| 9 | |
| 10 | /// Used for FixedState, and RandomState if atomics for dynamic init are unavailable. |
| 11 | const FIXED_GLOBAL_SEED: SharedSeed = SharedSeed { |
| 12 | seeds: [ |
| 13 | ARBITRARY6, |
| 14 | ARBITRARY7, |
| 15 | ARBITRARY8, |
| 16 | ARBITRARY9, |
| 17 | ARBITRARY10, |
| 18 | ARBITRARY11, |
| 19 | ], |
| 20 | }; |
| 21 | |
| 22 | pub(crate) fn gen_per_hasher_seed() -> u64 { |
| 23 | // We initialize the per-hasher seed with the stack pointer to ensure |
| 24 | // different threads have different seeds, with as side benefit that |
| 25 | // stack address randomization gives us further non-determinism. |
| 26 | let mut per_hasher_seed = 0; |
| 27 | let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64; |
| 28 | per_hasher_seed = stack_ptr; |
| 29 | |
| 30 | // If we have the standard library available we use a thread-local |
| 31 | // state to ensure RandomStates are different with high probability, |
| 32 | // even if the call stack is the same. |
| 33 | #[cfg (feature = "std" )] |
| 34 | { |
| 35 | use std::cell::Cell; |
| 36 | thread_local! { |
| 37 | static PER_HASHER_NONDETERMINISM: Cell<u64> = const { Cell::new(0) }; |
| 38 | } |
| 39 | |
| 40 | PER_HASHER_NONDETERMINISM.with(|cell| { |
| 41 | let nondeterminism = cell.get(); |
| 42 | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); |
| 43 | cell.set(per_hasher_seed); |
| 44 | }) |
| 45 | }; |
| 46 | |
| 47 | // If we don't have the standard library we instead use a global |
| 48 | // atomic instead of a thread-local state. |
| 49 | // |
| 50 | // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, |
| 51 | // but this doesn't matter in practice - it is impossible that two |
| 52 | // different threads have the same stack location, so they'll almost |
| 53 | // surely generate different seeds, and provide a different possible |
| 54 | // update for PER_HASHER_NONDETERMINISM. If we would use a proper |
| 55 | // fetch_add atomic update then there is a larger chance of |
| 56 | // problematic contention. |
| 57 | // |
| 58 | // We use usize instead of 64-bit atomics for best platform support. |
| 59 | #[cfg (not(feature = "std" ))] |
| 60 | { |
| 61 | use core::sync::atomic::{AtomicUsize, Ordering}; |
| 62 | static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0); |
| 63 | |
| 64 | let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64; |
| 65 | per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); |
| 66 | PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed); |
| 67 | } |
| 68 | |
| 69 | // One extra mixing step to ensure good random bits. |
| 70 | folded_multiply(per_hasher_seed, ARBITRARY2) |
| 71 | } |
| 72 | |
| 73 | /// A random seed intended to be shared by many different foldhash instances. |
| 74 | /// |
| 75 | /// This seed is consumed by [`FoldHasher::with_seed`](crate::fast::FoldHasher::with_seed), |
| 76 | /// and [`SeedableRandomState::with_seed`](crate::fast::SeedableRandomState::with_seed). |
| 77 | #[derive(Clone, Debug)] |
| 78 | pub struct SharedSeed { |
| 79 | pub(crate) seeds: [u64; 6], |
| 80 | } |
| 81 | |
| 82 | impl SharedSeed { |
| 83 | /// Returns the globally shared randomly initialized [`SharedSeed`] as used |
| 84 | /// by [`RandomState`](crate::fast::RandomState). |
| 85 | #[inline (always)] |
| 86 | pub fn global_random() -> &'static SharedSeed { |
| 87 | global::GlobalSeed::new().get() |
| 88 | } |
| 89 | |
| 90 | /// Returns the globally shared fixed [`SharedSeed`] as used |
| 91 | /// by [`FixedState`](crate::fast::FixedState). |
| 92 | #[inline (always)] |
| 93 | pub const fn global_fixed() -> &'static SharedSeed { |
| 94 | &FIXED_GLOBAL_SEED |
| 95 | } |
| 96 | |
| 97 | /// Generates a new [`SharedSeed`] from a single 64-bit seed. |
| 98 | /// |
| 99 | /// Note that this is somewhat expensive so it is suggested to re-use the |
| 100 | /// [`SharedSeed`] as much as possible, using the per-hasher seed to |
| 101 | /// differentiate between hash instances. |
| 102 | pub const fn from_u64(seed: u64) -> Self { |
| 103 | macro_rules! mix { |
| 104 | ($x: expr) => { |
| 105 | folded_multiply($x, ARBITRARY5) |
| 106 | }; |
| 107 | } |
| 108 | |
| 109 | let seed_a = mix!(mix!(mix!(seed))); |
| 110 | let seed_b = mix!(mix!(mix!(seed_a))); |
| 111 | let seed_c = mix!(mix!(mix!(seed_b))); |
| 112 | let seed_d = mix!(mix!(mix!(seed_c))); |
| 113 | let seed_e = mix!(mix!(mix!(seed_d))); |
| 114 | let seed_f = mix!(mix!(mix!(seed_e))); |
| 115 | |
| 116 | // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be |
| 117 | // a common input. So we want our global seeds that are XOR'ed with the |
| 118 | // input to always be non-zero. To also ensure there is always a good spread |
| 119 | // of bits, we give up 3 bits of entropy and simply force some bits on. |
| 120 | const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1; |
| 121 | Self { |
| 122 | seeds: [ |
| 123 | seed_a | FORCED_ONES, |
| 124 | seed_b | FORCED_ONES, |
| 125 | seed_c | FORCED_ONES, |
| 126 | seed_d | FORCED_ONES, |
| 127 | seed_e | FORCED_ONES, |
| 128 | seed_f | FORCED_ONES, |
| 129 | ], |
| 130 | } |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | #[cfg (target_has_atomic = "8" )] |
| 135 | mod global { |
| 136 | use super::*; |
| 137 | use core::cell::UnsafeCell; |
| 138 | use core::sync::atomic::{AtomicU8, Ordering}; |
| 139 | |
| 140 | fn generate_global_seed() -> SharedSeed { |
| 141 | let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY5); |
| 142 | |
| 143 | // Use address space layout randomization as our main randomness source. |
| 144 | // This isn't great, but we don't advertise HashDoS resistance in the first |
| 145 | // place. This is a whole lot better than nothing, at near zero cost with |
| 146 | // no dependencies. |
| 147 | let mut seed = 0; |
| 148 | let stack_ptr = &seed as *const _; |
| 149 | let func_ptr = generate_global_seed; |
| 150 | let static_ptr = &GLOBAL_SEED_STORAGE as *const _; |
| 151 | seed = mix(seed, stack_ptr as usize as u64); |
| 152 | seed = mix(seed, func_ptr as usize as u64); |
| 153 | seed = mix(seed, static_ptr as usize as u64); |
| 154 | |
| 155 | // If we have the standard library available, augment entropy with the |
| 156 | // current time and an address from the allocator. |
| 157 | #[cfg (feature = "std" )] |
| 158 | { |
| 159 | #[cfg (not(any( |
| 160 | miri, |
| 161 | all(target_family = "wasm" , target_os = "unknown" ), |
| 162 | target_os = "zkvm" |
| 163 | )))] |
| 164 | if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() { |
| 165 | seed = mix(seed, duration.subsec_nanos() as u64); |
| 166 | seed = mix(seed, duration.as_secs()); |
| 167 | } |
| 168 | |
| 169 | let box_ptr = &*Box::new(0u8) as *const _; |
| 170 | seed = mix(seed, box_ptr as usize as u64); |
| 171 | } |
| 172 | |
| 173 | SharedSeed::from_u64(seed) |
| 174 | } |
| 175 | |
| 176 | // Now all the below code purely exists to cache the above seed as |
| 177 | // efficiently as possible. Even if we weren't a no_std crate and had access to |
| 178 | // OnceLock, we don't want to check whether the global is set each time we |
| 179 | // hash an object, so we hand-roll a global storage where type safety allows us |
| 180 | // to assume the storage is initialized after construction. |
| 181 | struct GlobalSeedStorage { |
| 182 | state: AtomicU8, |
| 183 | seed: UnsafeCell<SharedSeed>, |
| 184 | } |
| 185 | |
| 186 | const UNINIT: u8 = 0; |
| 187 | const LOCKED: u8 = 1; |
| 188 | const INIT: u8 = 2; |
| 189 | |
| 190 | // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive |
| 191 | // LOCKED state, and only read the UnsafeCells when state is in the |
| 192 | // once-achieved-eternally-preserved state INIT. |
| 193 | unsafe impl Sync for GlobalSeedStorage {} |
| 194 | |
| 195 | static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage { |
| 196 | state: AtomicU8::new(UNINIT), |
| 197 | seed: UnsafeCell::new(SharedSeed { seeds: [0; 6] }), |
| 198 | }; |
| 199 | |
| 200 | /// An object representing an initialized global seed. |
| 201 | /// |
| 202 | /// Does not actually store the seed inside itself, it is a zero-sized type. |
| 203 | /// This prevents inflating the RandomState size and in turn HashMap's size. |
| 204 | #[derive(Copy, Clone, Debug)] |
| 205 | pub struct GlobalSeed { |
| 206 | // So we can't accidentally type GlobalSeed { } within this crate. |
| 207 | _no_accidental_unsafe_init: (), |
| 208 | } |
| 209 | |
| 210 | impl GlobalSeed { |
| 211 | #[inline (always)] |
| 212 | pub fn new() -> Self { |
| 213 | if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT { |
| 214 | Self::init_slow() |
| 215 | } |
| 216 | Self { |
| 217 | _no_accidental_unsafe_init: (), |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | #[cold ] |
| 222 | #[inline (never)] |
| 223 | fn init_slow() { |
| 224 | // Generate seed outside of critical section. |
| 225 | let seed = generate_global_seed(); |
| 226 | |
| 227 | loop { |
| 228 | match GLOBAL_SEED_STORAGE.state.compare_exchange_weak( |
| 229 | UNINIT, |
| 230 | LOCKED, |
| 231 | Ordering::Acquire, |
| 232 | Ordering::Acquire, |
| 233 | ) { |
| 234 | Ok(_) => unsafe { |
| 235 | // SAFETY: we just acquired an exclusive lock. |
| 236 | *GLOBAL_SEED_STORAGE.seed.get() = seed; |
| 237 | GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release); |
| 238 | return; |
| 239 | }, |
| 240 | |
| 241 | Err(INIT) => return, |
| 242 | |
| 243 | // Yes, it's a spin loop. We need to support no_std (so no easy |
| 244 | // access to proper locks), this is a one-time-per-program |
| 245 | // initialization, and the critical section is only a few |
| 246 | // store instructions, so it'll be fine. |
| 247 | _ => core::hint::spin_loop(), |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | #[inline (always)] |
| 253 | pub fn get(self) -> &'static SharedSeed { |
| 254 | // SAFETY: our constructor ensured we are in the INIT state and thus |
| 255 | // this raw read does not race with any write. |
| 256 | unsafe { &*GLOBAL_SEED_STORAGE.seed.get() } |
| 257 | } |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | #[cfg (not(target_has_atomic = "8" ))] |
| 262 | mod global { |
| 263 | use super::*; |
| 264 | |
| 265 | #[derive(Copy, Clone, Debug)] |
| 266 | pub struct GlobalSeed {} |
| 267 | |
| 268 | impl GlobalSeed { |
| 269 | #[inline (always)] |
| 270 | pub fn new() -> Self { |
| 271 | Self {} |
| 272 | } |
| 273 | |
| 274 | #[inline (always)] |
| 275 | pub fn get(self) -> &'static SharedSeed { |
| 276 | &super::FIXED_GLOBAL_SEED |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | pub(crate) use global::GlobalSeed; |
| 282 | |