1 | //! Stable hasher adapted for cross-platform independent hash. |
2 | |
3 | use std::fmt; |
4 | use std::hash::Hasher; |
5 | |
6 | #[cfg (test)] |
7 | mod tests; |
8 | |
9 | /// Extended [`Hasher`] trait for use with [`StableHasher`]. |
10 | /// |
11 | /// It permits returning an arbitrary type as the [`Self::Hash`] type |
12 | /// contrary to the [`Hasher`] trait which can only return `u64`. This |
13 | /// is useful when the hasher uses a different representation. |
14 | /// |
15 | /// # Example |
16 | /// |
17 | /// ``` |
18 | /// use std::hash::Hasher; |
19 | /// use rustc_stable_hash::ExtendedHasher; |
20 | /// |
21 | /// struct BogusHasher(u128); |
22 | /// |
23 | /// impl Hasher for BogusHasher { |
24 | /// fn write(&mut self, a: &[u8]) { |
25 | /// # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0; |
26 | /// // ... |
27 | /// } |
28 | /// |
29 | /// fn finish(&self) -> u64 { |
30 | /// self.0 as u64 // really bogus |
31 | /// } |
32 | /// } |
33 | /// |
34 | /// impl ExtendedHasher for BogusHasher { |
35 | /// type Hash = u128; |
36 | /// |
37 | /// fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) { |
38 | /// self.write(&bytes) |
39 | /// } |
40 | /// |
41 | /// fn finish(self) -> Self::Hash { |
42 | /// self.0 |
43 | /// } |
44 | /// } |
45 | /// ``` |
46 | pub trait ExtendedHasher: Hasher { |
47 | /// Type returned by the hasher. |
48 | type Hash; |
49 | |
50 | /// Optimized version of [`Hasher::write`] but for small write. |
51 | fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) { |
52 | self.write(&bytes); |
53 | } |
54 | |
55 | /// Finalization method of the hasher to return the [`Hash`]. |
56 | fn finish(self) -> Self::Hash; |
57 | } |
58 | |
59 | /// A Stable Hasher adapted for cross-platform independent hash. |
60 | /// |
61 | /// When hashing something that ends up affecting properties like symbol names, |
62 | /// we want these symbol names to be calculated independently of other factors |
63 | /// like what architecture you're compiling *from*. |
64 | /// |
65 | /// To that end we always convert integers to little-endian format before |
66 | /// hashing and the architecture dependent `isize` and `usize` types are |
67 | /// extended to 64 bits if needed. |
68 | /// |
69 | /// # Example |
70 | /// |
71 | /// ``` |
72 | /// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash}; |
73 | /// use rustc_stable_hash::{StableHasher, FromStableHash}; |
74 | /// use std::hash::Hasher; |
75 | /// |
76 | /// struct Hash128([u64; 2]); |
77 | /// impl FromStableHash for Hash128 { |
78 | /// type Hash = SipHasher128Hash; |
79 | /// |
80 | /// fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 { |
81 | /// Hash128(hash) |
82 | /// } |
83 | /// } |
84 | /// |
85 | /// let mut hasher = StableSipHasher128::new(); |
86 | /// hasher.write_usize(0xFA); |
87 | /// |
88 | /// let hash: Hash128 = hasher.finish(); |
89 | /// ``` |
90 | #[must_use ] |
91 | #[derive (Clone)] |
92 | pub struct StableHasher<H: ExtendedHasher> { |
93 | state: H, |
94 | } |
95 | |
96 | /// Trait for processing the result of the stable hashing operation. |
97 | /// |
98 | /// # Example |
99 | /// |
100 | /// ``` |
101 | /// use rustc_stable_hash::{StableHasher, FromStableHash}; |
102 | /// |
103 | /// struct Hash128(u128); |
104 | /// |
105 | /// impl FromStableHash for Hash128 { |
106 | /// type Hash = [u64; 2]; |
107 | /// |
108 | /// fn from(hash: [u64; 2]) -> Hash128 { |
109 | /// let upper: u128 = hash[0] as u128; |
110 | /// let lower: u128 = hash[1] as u128; |
111 | /// |
112 | /// Hash128((upper << 64) | lower) |
113 | /// } |
114 | /// } |
115 | /// ``` |
116 | pub trait FromStableHash: Sized { |
117 | type Hash; |
118 | |
119 | /// Convert the finalized state of a [`StableHasher`] and construct |
120 | /// an [`Self`] containing the processed hash. |
121 | fn from(hash: Self::Hash) -> Self; |
122 | } |
123 | |
124 | impl<H: ExtendedHasher + Default> StableHasher<H> { |
125 | /// Creates a new [`StableHasher`]. |
126 | /// |
127 | /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`]. |
128 | #[inline ] |
129 | pub fn new() -> Self { |
130 | Default::default() |
131 | } |
132 | } |
133 | |
134 | impl<H: ExtendedHasher + Default> Default for StableHasher<H> { |
135 | /// Creates a new [`StableHasher`]. |
136 | /// |
137 | /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`]. |
138 | #[inline ] |
139 | fn default() -> Self { |
140 | StableHasher { |
141 | state: Default::default(), |
142 | } |
143 | } |
144 | } |
145 | |
146 | impl<H: ExtendedHasher> StableHasher<H> { |
147 | /// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`]. |
148 | /// |
149 | /// Useful when wanting to initialize a hasher with different parameters/keys. |
150 | /// |
151 | /// **Important**: Any use of the hasher before being given to a [`StableHasher`] |
152 | /// is not covered by this crate guarentees and will make the resulting hash |
153 | /// NOT platform independent. |
154 | #[inline ] |
155 | pub fn with_hasher(state: H) -> Self { |
156 | StableHasher { state } |
157 | } |
158 | |
159 | /// Returns the typed-hash value for the values written. |
160 | /// |
161 | /// The resulting typed-hash value is constructed from an |
162 | /// [`FromStableHash`] implemenation. |
163 | /// |
164 | /// To be used in-place of [`Hasher::finish`]. |
165 | #[inline ] |
166 | #[must_use ] |
167 | pub fn finish<W: FromStableHash<Hash = H::Hash>>(self) -> W { |
168 | W::from(self.state.finish()) |
169 | } |
170 | } |
171 | |
172 | impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> { |
173 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
174 | write!(f, " {:?}" , self.state) |
175 | } |
176 | } |
177 | |
178 | impl<H: ExtendedHasher> Hasher for StableHasher<H> { |
179 | /// Returns a combined hash. |
180 | /// |
181 | /// For greater precision use instead [`StableHasher::finish`]. |
182 | fn finish(&self) -> u64 { |
183 | Hasher::finish(&self.state) |
184 | } |
185 | |
186 | #[inline ] |
187 | fn write(&mut self, bytes: &[u8]) { |
188 | self.state.write(bytes); |
189 | } |
190 | |
191 | #[cfg (feature = "nightly" )] |
192 | #[inline ] |
193 | fn write_str(&mut self, s: &str) { |
194 | self.state.write_str(s); |
195 | } |
196 | |
197 | #[cfg (feature = "nightly" )] |
198 | #[inline ] |
199 | fn write_length_prefix(&mut self, len: usize) { |
200 | // Our impl for `usize` will extend it if needed. |
201 | self.write_usize(len); |
202 | } |
203 | |
204 | #[inline ] |
205 | fn write_u8(&mut self, i: u8) { |
206 | self.state.write_u8(i); |
207 | } |
208 | |
209 | #[inline ] |
210 | fn write_u16(&mut self, i: u16) { |
211 | self.state.short_write(i.to_le_bytes()); |
212 | } |
213 | |
214 | #[inline ] |
215 | fn write_u32(&mut self, i: u32) { |
216 | self.state.short_write(i.to_le_bytes()); |
217 | } |
218 | |
219 | #[inline ] |
220 | fn write_u64(&mut self, i: u64) { |
221 | self.state.short_write(i.to_le_bytes()); |
222 | } |
223 | |
224 | #[inline ] |
225 | fn write_u128(&mut self, i: u128) { |
226 | self.write_u64(i as u64); |
227 | self.write_u64((i >> 64) as u64); |
228 | } |
229 | |
230 | #[inline ] |
231 | fn write_usize(&mut self, i: usize) { |
232 | // Always treat usize as u64 so we get the same results on 32 and 64 bit |
233 | // platforms. This is important for symbol hashes when cross compiling, |
234 | // for example. |
235 | self.state.short_write((i as u64).to_le_bytes()); |
236 | } |
237 | |
238 | #[inline ] |
239 | fn write_i8(&mut self, i: i8) { |
240 | self.state.write_i8(i); |
241 | } |
242 | |
243 | #[inline ] |
244 | fn write_i16(&mut self, i: i16) { |
245 | self.state.short_write((i as u16).to_le_bytes()); |
246 | } |
247 | |
248 | #[inline ] |
249 | fn write_i32(&mut self, i: i32) { |
250 | self.state.short_write((i as u32).to_le_bytes()); |
251 | } |
252 | |
253 | #[inline ] |
254 | fn write_i64(&mut self, i: i64) { |
255 | self.state.short_write((i as u64).to_le_bytes()); |
256 | } |
257 | |
258 | #[inline ] |
259 | fn write_i128(&mut self, i: i128) { |
260 | self.state.write(&(i as u128).to_le_bytes()); |
261 | } |
262 | |
263 | #[inline ] |
264 | fn write_isize(&mut self, i: isize) { |
265 | // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit |
266 | // platforms. This is important for symbol hashes when cross compiling, |
267 | // for example. Sign extending here is preferable as it means that the |
268 | // same negative number hashes the same on both 32 and 64 bit platforms. |
269 | let value = i as u64; |
270 | |
271 | // Cold path |
272 | #[cold ] |
273 | #[inline (never)] |
274 | fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) { |
275 | state.write_u8(0xFF); |
276 | state.short_write(value.to_le_bytes()); |
277 | } |
278 | |
279 | // `isize` values often seem to have a small (positive) numeric value in practice. |
280 | // To exploit this, if the value is small, we will hash a smaller amount of bytes. |
281 | // However, we cannot just skip the leading zero bytes, as that would produce the same hash |
282 | // e.g. if you hash two values that have the same bit pattern when they are swapped. |
283 | // See https://github.com/rust-lang/rust/pull/93014 for context. |
284 | // |
285 | // Therefore, we employ the following strategy: |
286 | // 1) When we encounter a value that fits within a single byte (the most common case), we |
287 | // hash just that byte. This is the most common case that is being optimized. However, we do |
288 | // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8). |
289 | // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding |
290 | // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two |
291 | // `isize`s that fit within a different amount of bytes, they should always produce a different |
292 | // byte stream for the hasher. |
293 | if value < 0xFF { |
294 | self.state.write_u8(value as u8); |
295 | } else { |
296 | hash_value(&mut self.state, value); |
297 | } |
298 | } |
299 | } |
300 | |