1//! Stable hasher adapted for cross-platform independent hash.
2
3use std::fmt;
4use std::hash::Hasher;
5
6#[cfg(test)]
7mod tests;
8
9/// Extended [`Hasher`] trait for use with [`StableHasher`].
10///
11/// It permits returning an arbitrary type as the [`Self::Hash`] type
12/// contrary to the [`Hasher`] trait which can only return `u64`. This
13/// is useful when the hasher uses a different representation.
14///
15/// # Example
16///
17/// ```
18/// use std::hash::Hasher;
19/// use rustc_stable_hash::ExtendedHasher;
20///
21/// struct BogusHasher(u128);
22///
23/// impl Hasher for BogusHasher {
24/// fn write(&mut self, a: &[u8]) {
25/// # self.0 = a.iter().fold(0u128, |acc, a| acc + (*a as u128)) + self.0;
26/// // ...
27/// }
28///
29/// fn finish(&self) -> u64 {
30/// self.0 as u64 // really bogus
31/// }
32/// }
33///
34/// impl ExtendedHasher for BogusHasher {
35/// type Hash = u128;
36///
37/// fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
38/// self.write(&bytes)
39/// }
40///
41/// fn finish(self) -> Self::Hash {
42/// self.0
43/// }
44/// }
45/// ```
46pub trait ExtendedHasher: Hasher {
47 /// Type returned by the hasher.
48 type Hash;
49
50 /// Optimized version of [`Hasher::write`] but for small write.
51 fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
52 self.write(&bytes);
53 }
54
55 /// Finalization method of the hasher to return the [`Hash`].
56 fn finish(self) -> Self::Hash;
57}
58
59/// A Stable Hasher adapted for cross-platform independent hash.
60///
61/// When hashing something that ends up affecting properties like symbol names,
62/// we want these symbol names to be calculated independently of other factors
63/// like what architecture you're compiling *from*.
64///
65/// To that end we always convert integers to little-endian format before
66/// hashing and the architecture dependent `isize` and `usize` types are
67/// extended to 64 bits if needed.
68///
69/// # Example
70///
71/// ```
72/// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash};
73/// use rustc_stable_hash::{StableHasher, FromStableHash};
74/// use std::hash::Hasher;
75///
76/// struct Hash128([u64; 2]);
77/// impl FromStableHash for Hash128 {
78/// type Hash = SipHasher128Hash;
79///
80/// fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 {
81/// Hash128(hash)
82/// }
83/// }
84///
85/// let mut hasher = StableSipHasher128::new();
86/// hasher.write_usize(0xFA);
87///
88/// let hash: Hash128 = hasher.finish();
89/// ```
90#[must_use]
91#[derive(Clone)]
92pub struct StableHasher<H: ExtendedHasher> {
93 state: H,
94}
95
96/// Trait for processing the result of the stable hashing operation.
97///
98/// # Example
99///
100/// ```
101/// use rustc_stable_hash::{StableHasher, FromStableHash};
102///
103/// struct Hash128(u128);
104///
105/// impl FromStableHash for Hash128 {
106/// type Hash = [u64; 2];
107///
108/// fn from(hash: [u64; 2]) -> Hash128 {
109/// let upper: u128 = hash[0] as u128;
110/// let lower: u128 = hash[1] as u128;
111///
112/// Hash128((upper << 64) | lower)
113/// }
114/// }
115/// ```
116pub trait FromStableHash: Sized {
117 type Hash;
118
119 /// Convert the finalized state of a [`StableHasher`] and construct
120 /// an [`Self`] containing the processed hash.
121 fn from(hash: Self::Hash) -> Self;
122}
123
124impl<H: ExtendedHasher + Default> StableHasher<H> {
125 /// Creates a new [`StableHasher`].
126 ///
127 /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
128 #[inline]
129 pub fn new() -> Self {
130 Default::default()
131 }
132}
133
134impl<H: ExtendedHasher + Default> Default for StableHasher<H> {
135 /// Creates a new [`StableHasher`].
136 ///
137 /// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
138 #[inline]
139 fn default() -> Self {
140 StableHasher {
141 state: Default::default(),
142 }
143 }
144}
145
146impl<H: ExtendedHasher> StableHasher<H> {
147 /// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`].
148 ///
149 /// Useful when wanting to initialize a hasher with different parameters/keys.
150 ///
151 /// **Important**: Any use of the hasher before being given to a [`StableHasher`]
152 /// is not covered by this crate guarentees and will make the resulting hash
153 /// NOT platform independent.
154 #[inline]
155 pub fn with_hasher(state: H) -> Self {
156 StableHasher { state }
157 }
158
159 /// Returns the typed-hash value for the values written.
160 ///
161 /// The resulting typed-hash value is constructed from an
162 /// [`FromStableHash`] implemenation.
163 ///
164 /// To be used in-place of [`Hasher::finish`].
165 #[inline]
166 #[must_use]
167 pub fn finish<W: FromStableHash<Hash = H::Hash>>(self) -> W {
168 W::from(self.state.finish())
169 }
170}
171
172impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> {
173 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174 write!(f, "{:?}", self.state)
175 }
176}
177
178impl<H: ExtendedHasher> Hasher for StableHasher<H> {
179 /// Returns a combined hash.
180 ///
181 /// For greater precision use instead [`StableHasher::finish`].
182 fn finish(&self) -> u64 {
183 Hasher::finish(&self.state)
184 }
185
186 #[inline]
187 fn write(&mut self, bytes: &[u8]) {
188 self.state.write(bytes);
189 }
190
191 #[cfg(feature = "nightly")]
192 #[inline]
193 fn write_str(&mut self, s: &str) {
194 self.state.write_str(s);
195 }
196
197 #[cfg(feature = "nightly")]
198 #[inline]
199 fn write_length_prefix(&mut self, len: usize) {
200 // Our impl for `usize` will extend it if needed.
201 self.write_usize(len);
202 }
203
204 #[inline]
205 fn write_u8(&mut self, i: u8) {
206 self.state.write_u8(i);
207 }
208
209 #[inline]
210 fn write_u16(&mut self, i: u16) {
211 self.state.short_write(i.to_le_bytes());
212 }
213
214 #[inline]
215 fn write_u32(&mut self, i: u32) {
216 self.state.short_write(i.to_le_bytes());
217 }
218
219 #[inline]
220 fn write_u64(&mut self, i: u64) {
221 self.state.short_write(i.to_le_bytes());
222 }
223
224 #[inline]
225 fn write_u128(&mut self, i: u128) {
226 self.write_u64(i as u64);
227 self.write_u64((i >> 64) as u64);
228 }
229
230 #[inline]
231 fn write_usize(&mut self, i: usize) {
232 // Always treat usize as u64 so we get the same results on 32 and 64 bit
233 // platforms. This is important for symbol hashes when cross compiling,
234 // for example.
235 self.state.short_write((i as u64).to_le_bytes());
236 }
237
238 #[inline]
239 fn write_i8(&mut self, i: i8) {
240 self.state.write_i8(i);
241 }
242
243 #[inline]
244 fn write_i16(&mut self, i: i16) {
245 self.state.short_write((i as u16).to_le_bytes());
246 }
247
248 #[inline]
249 fn write_i32(&mut self, i: i32) {
250 self.state.short_write((i as u32).to_le_bytes());
251 }
252
253 #[inline]
254 fn write_i64(&mut self, i: i64) {
255 self.state.short_write((i as u64).to_le_bytes());
256 }
257
258 #[inline]
259 fn write_i128(&mut self, i: i128) {
260 self.state.write(&(i as u128).to_le_bytes());
261 }
262
263 #[inline]
264 fn write_isize(&mut self, i: isize) {
265 // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit
266 // platforms. This is important for symbol hashes when cross compiling,
267 // for example. Sign extending here is preferable as it means that the
268 // same negative number hashes the same on both 32 and 64 bit platforms.
269 let value = i as u64;
270
271 // Cold path
272 #[cold]
273 #[inline(never)]
274 fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) {
275 state.write_u8(0xFF);
276 state.short_write(value.to_le_bytes());
277 }
278
279 // `isize` values often seem to have a small (positive) numeric value in practice.
280 // To exploit this, if the value is small, we will hash a smaller amount of bytes.
281 // However, we cannot just skip the leading zero bytes, as that would produce the same hash
282 // e.g. if you hash two values that have the same bit pattern when they are swapped.
283 // See https://github.com/rust-lang/rust/pull/93014 for context.
284 //
285 // Therefore, we employ the following strategy:
286 // 1) When we encounter a value that fits within a single byte (the most common case), we
287 // hash just that byte. This is the most common case that is being optimized. However, we do
288 // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8).
289 // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding
290 // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
291 // `isize`s that fit within a different amount of bytes, they should always produce a different
292 // byte stream for the hasher.
293 if value < 0xFF {
294 self.state.write_u8(value as u8);
295 } else {
296 hash_value(&mut self.state, value);
297 }
298 }
299}
300