| 1 | //! Stable hasher adapted for cross-platform independent hash. | 
| 2 |  | 
|---|
| 3 | use std::fmt; | 
|---|
| 4 | use std::hash::Hasher; | 
|---|
| 5 |  | 
|---|
| 6 | #[ cfg(test)] | 
|---|
| 7 | mod tests; | 
|---|
| 8 |  | 
|---|
| 9 | /// Extended [`Hasher`] trait for use with [`StableHasher`]. | 
|---|
| 10 | /// | 
|---|
| 11 | /// It permits returning an arbitrary type as the [`Self::Hash`] type | 
|---|
| 12 | /// contrary to the [`Hasher`] trait which can only return `u64`. This | 
|---|
| 13 | /// is useful when the hasher uses a different representation. | 
|---|
| 14 | /// | 
|---|
| 15 | /// # Example | 
|---|
| 16 | /// | 
|---|
| 17 | /// ``` | 
|---|
| 18 | /// use std::hash::Hasher; | 
|---|
| 19 | /// use rustc_stable_hash::ExtendedHasher; | 
|---|
| 20 | /// | 
|---|
| 21 | /// struct BogusHasher(u128); | 
|---|
| 22 | /// | 
|---|
| 23 | /// impl Hasher for BogusHasher { | 
|---|
| 24 | ///     fn write(&mut self, a: &[u8]) { | 
|---|
| 25 | ///         # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0; | 
|---|
| 26 | ///         // ... | 
|---|
| 27 | ///     } | 
|---|
| 28 | /// | 
|---|
| 29 | ///     fn finish(&self) -> u64 { | 
|---|
| 30 | ///         self.0 as u64 // really bogus | 
|---|
| 31 | ///     } | 
|---|
| 32 | /// } | 
|---|
| 33 | /// | 
|---|
| 34 | /// impl ExtendedHasher for BogusHasher { | 
|---|
| 35 | ///     type Hash = u128; | 
|---|
| 36 | /// | 
|---|
| 37 | ///     fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) { | 
|---|
| 38 | ///         self.write(&bytes) | 
|---|
| 39 | ///     } | 
|---|
| 40 | /// | 
|---|
| 41 | ///     fn finish(self) -> Self::Hash { | 
|---|
| 42 | ///         self.0 | 
|---|
| 43 | ///     } | 
|---|
| 44 | /// } | 
|---|
| 45 | /// ``` | 
|---|
| 46 | pub trait ExtendedHasher: Hasher { | 
|---|
| 47 | /// Type returned by the hasher. | 
|---|
| 48 | type Hash; | 
|---|
| 49 |  | 
|---|
| 50 | /// Optimized version of [`Hasher::write`] but for small write. | 
|---|
| 51 | fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) { | 
|---|
| 52 | self.write(&bytes); | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | /// Finalization method of the hasher to return the [`Hash`]. | 
|---|
| 56 | fn finish(self) -> Self::Hash; | 
|---|
| 57 | } | 
|---|
| 58 |  | 
|---|
| 59 | /// A Stable Hasher adapted for cross-platform independent hash. | 
|---|
| 60 | /// | 
|---|
| 61 | /// When hashing something that ends up affecting properties like symbol names, | 
|---|
| 62 | /// we want these symbol names to be calculated independently of other factors | 
|---|
| 63 | /// like what architecture you're compiling *from*. | 
|---|
| 64 | /// | 
|---|
| 65 | /// To that end we always convert integers to little-endian format before | 
|---|
| 66 | /// hashing and the architecture dependent `isize` and `usize` types are | 
|---|
| 67 | /// extended to 64 bits if needed. | 
|---|
| 68 | /// | 
|---|
| 69 | /// # Example | 
|---|
| 70 | /// | 
|---|
| 71 | /// ``` | 
|---|
| 72 | /// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash}; | 
|---|
| 73 | /// use rustc_stable_hash::{StableHasher, FromStableHash}; | 
|---|
| 74 | /// use std::hash::Hasher; | 
|---|
| 75 | /// | 
|---|
| 76 | /// struct Hash128([u64; 2]); | 
|---|
| 77 | /// impl FromStableHash for Hash128 { | 
|---|
| 78 | ///     type Hash = SipHasher128Hash; | 
|---|
| 79 | /// | 
|---|
| 80 | ///     fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 { | 
|---|
| 81 | ///         Hash128(hash) | 
|---|
| 82 | ///     } | 
|---|
| 83 | /// } | 
|---|
| 84 | /// | 
|---|
| 85 | /// let mut hasher = StableSipHasher128::new(); | 
|---|
| 86 | /// hasher.write_usize(0xFA); | 
|---|
| 87 | /// | 
|---|
| 88 | /// let hash: Hash128 = hasher.finish(); | 
|---|
| 89 | /// ``` | 
|---|
| 90 | #[ must_use] | 
|---|
| 91 | #[ derive(Clone)] | 
|---|
| 92 | pub struct StableHasher<H: ExtendedHasher> { | 
|---|
| 93 | state: H, | 
|---|
| 94 | } | 
|---|
| 95 |  | 
|---|
| 96 | /// Trait for processing the result of the stable hashing operation. | 
|---|
| 97 | /// | 
|---|
| 98 | /// # Example | 
|---|
| 99 | /// | 
|---|
| 100 | /// ``` | 
|---|
| 101 | /// use rustc_stable_hash::{StableHasher, FromStableHash}; | 
|---|
| 102 | /// | 
|---|
| 103 | /// struct Hash128(u128); | 
|---|
| 104 | /// | 
|---|
| 105 | /// impl FromStableHash for Hash128 { | 
|---|
| 106 | ///     type Hash = [u64; 2]; | 
|---|
| 107 | /// | 
|---|
| 108 | ///     fn from(hash: [u64; 2]) -> Hash128 { | 
|---|
| 109 | ///         let upper: u128 = hash[0] as u128; | 
|---|
| 110 | ///         let lower: u128 = hash[1] as u128; | 
|---|
| 111 | /// | 
|---|
| 112 | ///         Hash128((upper << 64) | lower) | 
|---|
| 113 | ///     } | 
|---|
| 114 | /// } | 
|---|
| 115 | /// ``` | 
|---|
| 116 | pub trait FromStableHash: Sized { | 
|---|
| 117 | type Hash; | 
|---|
| 118 |  | 
|---|
| 119 | /// Convert the finalized state of a [`StableHasher`] and construct | 
|---|
| 120 | /// an [`Self`] containing the processed hash. | 
|---|
| 121 | fn from(hash: Self::Hash) -> Self; | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | impl<H: ExtendedHasher + Default> StableHasher<H> { | 
|---|
| 125 | /// Creates a new [`StableHasher`]. | 
|---|
| 126 | /// | 
|---|
| 127 | /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`]. | 
|---|
| 128 | #[ inline] | 
|---|
| 129 | pub fn new() -> Self { | 
|---|
| 130 | Default::default() | 
|---|
| 131 | } | 
|---|
| 132 | } | 
|---|
| 133 |  | 
|---|
| 134 | impl<H: ExtendedHasher + Default> Default for StableHasher<H> { | 
|---|
| 135 | /// Creates a new [`StableHasher`]. | 
|---|
| 136 | /// | 
|---|
| 137 | /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`]. | 
|---|
| 138 | #[ inline] | 
|---|
| 139 | fn default() -> Self { | 
|---|
| 140 | StableHasher { | 
|---|
| 141 | state: Default::default(), | 
|---|
| 142 | } | 
|---|
| 143 | } | 
|---|
| 144 | } | 
|---|
| 145 |  | 
|---|
| 146 | impl<H: ExtendedHasher> StableHasher<H> { | 
|---|
| 147 | /// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`]. | 
|---|
| 148 | /// | 
|---|
| 149 | /// Useful when wanting to initialize a hasher with different parameters/keys. | 
|---|
| 150 | /// | 
|---|
| 151 | /// **Important**: Any use of the hasher before being given to a [`StableHasher`] | 
|---|
| 152 | /// is not covered by this crate guarentees and will make the resulting hash | 
|---|
| 153 | /// NOT platform independent. | 
|---|
| 154 | #[ inline] | 
|---|
| 155 | pub fn with_hasher(state: H) -> Self { | 
|---|
| 156 | StableHasher { state } | 
|---|
| 157 | } | 
|---|
| 158 |  | 
|---|
| 159 | /// Returns the typed-hash value for the values written. | 
|---|
| 160 | /// | 
|---|
| 161 | /// The resulting typed-hash value is constructed from an | 
|---|
| 162 | /// [`FromStableHash`] implemenation. | 
|---|
| 163 | /// | 
|---|
| 164 | /// To be used in-place of [`Hasher::finish`]. | 
|---|
| 165 | #[ inline] | 
|---|
| 166 | #[ must_use] | 
|---|
| 167 | pub fn finish<W: FromStableHash<Hash = H::Hash>>(self) -> W { | 
|---|
| 168 | W::from(self.state.finish()) | 
|---|
| 169 | } | 
|---|
| 170 | } | 
|---|
| 171 |  | 
|---|
| 172 | impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> { | 
|---|
| 173 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
|---|
| 174 | write!(f, "{:?} ", self.state) | 
|---|
| 175 | } | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | impl<H: ExtendedHasher> Hasher for StableHasher<H> { | 
|---|
| 179 | /// Returns a combined hash. | 
|---|
| 180 | /// | 
|---|
| 181 | /// For greater precision use instead [`StableHasher::finish`]. | 
|---|
| 182 | fn finish(&self) -> u64 { | 
|---|
| 183 | Hasher::finish(&self.state) | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|
| 186 | #[ inline] | 
|---|
| 187 | fn write(&mut self, bytes: &[u8]) { | 
|---|
| 188 | self.state.write(bytes); | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | #[ cfg(feature = "nightly")] | 
|---|
| 192 | #[ inline] | 
|---|
| 193 | fn write_str(&mut self, s: &str) { | 
|---|
| 194 | self.state.write_str(s); | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | #[ cfg(feature = "nightly")] | 
|---|
| 198 | #[ inline] | 
|---|
| 199 | fn write_length_prefix(&mut self, len: usize) { | 
|---|
| 200 | // Our impl for `usize` will extend it if needed. | 
|---|
| 201 | self.write_usize(len); | 
|---|
| 202 | } | 
|---|
| 203 |  | 
|---|
| 204 | #[ inline] | 
|---|
| 205 | fn write_u8(&mut self, i: u8) { | 
|---|
| 206 | self.state.write_u8(i); | 
|---|
| 207 | } | 
|---|
| 208 |  | 
|---|
| 209 | #[ inline] | 
|---|
| 210 | fn write_u16(&mut self, i: u16) { | 
|---|
| 211 | self.state.short_write(i.to_le_bytes()); | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | #[ inline] | 
|---|
| 215 | fn write_u32(&mut self, i: u32) { | 
|---|
| 216 | self.state.short_write(i.to_le_bytes()); | 
|---|
| 217 | } | 
|---|
| 218 |  | 
|---|
| 219 | #[ inline] | 
|---|
| 220 | fn write_u64(&mut self, i: u64) { | 
|---|
| 221 | self.state.short_write(i.to_le_bytes()); | 
|---|
| 222 | } | 
|---|
| 223 |  | 
|---|
| 224 | #[ inline] | 
|---|
| 225 | fn write_u128(&mut self, i: u128) { | 
|---|
| 226 | self.write_u64(i as u64); | 
|---|
| 227 | self.write_u64((i >> 64) as u64); | 
|---|
| 228 | } | 
|---|
| 229 |  | 
|---|
| 230 | #[ inline] | 
|---|
| 231 | fn write_usize(&mut self, i: usize) { | 
|---|
| 232 | // Always treat usize as u64 so we get the same results on 32 and 64 bit | 
|---|
| 233 | // platforms. This is important for symbol hashes when cross compiling, | 
|---|
| 234 | // for example. | 
|---|
| 235 | self.state.short_write((i as u64).to_le_bytes()); | 
|---|
| 236 | } | 
|---|
| 237 |  | 
|---|
| 238 | #[ inline] | 
|---|
| 239 | fn write_i8(&mut self, i: i8) { | 
|---|
| 240 | self.state.write_i8(i); | 
|---|
| 241 | } | 
|---|
| 242 |  | 
|---|
| 243 | #[ inline] | 
|---|
| 244 | fn write_i16(&mut self, i: i16) { | 
|---|
| 245 | self.state.short_write((i as u16).to_le_bytes()); | 
|---|
| 246 | } | 
|---|
| 247 |  | 
|---|
| 248 | #[ inline] | 
|---|
| 249 | fn write_i32(&mut self, i: i32) { | 
|---|
| 250 | self.state.short_write((i as u32).to_le_bytes()); | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | #[ inline] | 
|---|
| 254 | fn write_i64(&mut self, i: i64) { | 
|---|
| 255 | self.state.short_write((i as u64).to_le_bytes()); | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | #[ inline] | 
|---|
| 259 | fn write_i128(&mut self, i: i128) { | 
|---|
| 260 | self.state.write(&(i as u128).to_le_bytes()); | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | #[ inline] | 
|---|
| 264 | fn write_isize(&mut self, i: isize) { | 
|---|
| 265 | // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit | 
|---|
| 266 | // platforms. This is important for symbol hashes when cross compiling, | 
|---|
| 267 | // for example. Sign extending here is preferable as it means that the | 
|---|
| 268 | // same negative number hashes the same on both 32 and 64 bit platforms. | 
|---|
| 269 | let value = i as u64; | 
|---|
| 270 |  | 
|---|
| 271 | // Cold path | 
|---|
| 272 | #[ cold] | 
|---|
| 273 | #[ inline(never)] | 
|---|
| 274 | fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) { | 
|---|
| 275 | state.write_u8(0xFF); | 
|---|
| 276 | state.short_write(value.to_le_bytes()); | 
|---|
| 277 | } | 
|---|
| 278 |  | 
|---|
| 279 | // `isize` values often seem to have a small (positive) numeric value in practice. | 
|---|
| 280 | // To exploit this, if the value is small, we will hash a smaller amount of bytes. | 
|---|
| 281 | // However, we cannot just skip the leading zero bytes, as that would produce the same hash | 
|---|
| 282 | // e.g. if you hash two values that have the same bit pattern when they are swapped. | 
|---|
| 283 | // See https://github.com/rust-lang/rust/pull/93014 for context. | 
|---|
| 284 | // | 
|---|
| 285 | // Therefore, we employ the following strategy: | 
|---|
| 286 | // 1) When we encounter a value that fits within a single byte (the most common case), we | 
|---|
| 287 | // hash just that byte. This is the most common case that is being optimized. However, we do | 
|---|
| 288 | // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8). | 
|---|
| 289 | // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding | 
|---|
| 290 | // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two | 
|---|
| 291 | // `isize`s that fit within a different amount of bytes, they should always produce a different | 
|---|
| 292 | // byte stream for the hasher. | 
|---|
| 293 | if value < 0xFF { | 
|---|
| 294 | self.state.write_u8(value as u8); | 
|---|
| 295 | } else { | 
|---|
| 296 | hash_value(&mut self.state, value); | 
|---|
| 297 | } | 
|---|
| 298 | } | 
|---|
| 299 | } | 
|---|
| 300 |  | 
|---|