stable_hasher.rs source code [crates/rustc_stable_hash/src/stable_hasher.rs]

1	//! Stable hasher adapted for cross-platform independent hash.
2
3	use std::fmt;
4	use std::hash::Hasher;
5
6	#[cfg(test)]
7	mod tests;
8
9	/// Extended [`Hasher`] trait for use with [`StableHasher`].
10	///
11	/// It permits returning an arbitrary type as the [`Self::Hash`] type
12	/// contrary to the [`Hasher`] trait which can only return `u64`. This
13	/// is useful when the hasher uses a different representation.
14	///
15	/// # Example
16	///
17	/// ```
18	/// use std::hash::Hasher;
19	/// use rustc_stable_hash::ExtendedHasher;
20	///
21	/// struct BogusHasher(u128);
22	///
23	/// impl Hasher for BogusHasher {
24	/// fn write(&mut self, a: &[u8]) {
25	/// # self.0 = a.iter().fold(`0u128`, \|acc, a\| acc + (a as u128*)) + self.0;
26	/// // ...
27	/// }
28	///
29	/// fn finish(&self) -> u64 {
30	/// self.0 as u64 // really bogus
31	/// }
32	/// }
33	///
34	/// impl ExtendedHasher for BogusHasher {
35	/// type Hash = u128;
36	///
37	/// fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
38	/// self.write(&bytes)
39	/// }
40	///
41	/// fn finish(self) -> Self::Hash {
42	/// self.0
43	/// }
44	/// }
45	/// ```
46	pub trait ExtendedHasher: Hasher {
47	/// Type returned by the hasher.
48	type Hash;
49
50	/// Optimized version of [`Hasher::write`] but for small write.
51	fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
52	self.write(&bytes);
53	}
54
55	/// Finalization method of the hasher to return the [`Hash`].
56	fn finish(self) -> Self::Hash;
57	}
58
59	/// A Stable Hasher adapted for cross-platform independent hash.
60	///
61	/// When hashing something that ends up affecting properties like symbol names,
62	/// we want these symbol names to be calculated independently of other factors
63	/// like what architecture you're compiling from.
64	///
65	/// To that end we always convert integers to little-endian format before
66	/// hashing and the architecture dependent `isize` and `usize` types are
67	/// extended to 64 bits if needed.
68	///
69	/// # Example
70	///
71	/// ```
72	/// use rustc_stable_hash::hashers::{StableSipHasher128, SipHasher128Hash};
73	/// use rustc_stable_hash::{StableHasher, FromStableHash};
74	/// use std::hash::Hasher;
75	///
76	/// struct Hash128([u64; `2`]);
77	/// impl FromStableHash for Hash128 {
78	/// type Hash = SipHasher128Hash;
79	///
80	/// fn from(SipHasher128Hash(hash): SipHasher128Hash) -> Hash128 {
81	/// Hash128(hash)
82	/// }
83	/// }
84	///
85	/// let mut hasher = StableSipHasher128::new();
86	/// hasher.write_usize(`0xFA`);
87	///
88	/// let hash: Hash128 = hasher.finish();
89	/// ```
90	#[must_use]
91	#[derive(Clone)]
92	pub struct StableHasher<H: ExtendedHasher> {
93	state: H,
94	}
95
96	/// Trait for processing the result of the stable hashing operation.
97	///
98	/// # Example
99	///
100	/// ```
101	/// use rustc_stable_hash::{StableHasher, FromStableHash};
102	///
103	/// struct Hash128(u128);
104	///
105	/// impl FromStableHash for Hash128 {
106	/// type Hash = [u64; `2`];
107	///
108	/// fn from(hash: [u64; `2`]) -> Hash128 {
109	/// let upper: u128 = hash[`0`] as u128;
110	/// let lower: u128 = hash[`1`] as u128;
111	///
112	/// Hash128((upper << `64`) \| lower)
113	/// }
114	/// }
115	/// ```
116	pub trait FromStableHash: Sized {
117	type Hash;
118
119	/// Convert the finalized state of a [`StableHasher`] and construct
120	/// an [`Self`] containing the processed hash.
121	fn from(hash: Self::Hash) -> Self;
122	}
123
124	impl<H: ExtendedHasher + Default> StableHasher<H> {
125	/// Creates a new [`StableHasher`].
126	///
127	/// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
128	#[inline]
129	pub fn new() -> Self {
130	Default::default()
131	}
132	}
133
134	impl<H: ExtendedHasher + Default> Default for StableHasher<H> {
135	/// Creates a new [`StableHasher`].
136	///
137	/// To be used with the [`Hasher`] implementation and [`StableHasher::finish`].
138	#[inline]
139	fn default() -> Self {
140	StableHasher {
141	state: Default::default(),
142	}
143	}
144	}
145
146	impl<H: ExtendedHasher> StableHasher<H> {
147	/// Creates a new [`StableHasher`] from an already created [`ExtendedHasher`].
148	///
149	/// Useful when wanting to initialize a hasher with different parameters/keys.
150	///
151	/// Important: Any use of the hasher before being given to a [`StableHasher`]
152	/// is not covered by this crate guarentees and will make the resulting hash
153	/// NOT platform independent.
154	#[inline]
155	pub fn with_hasher(state: H) -> Self {
156	StableHasher { state }
157	}
158
159	/// Returns the typed-hash value for the values written.
160	///
161	/// The resulting typed-hash value is constructed from an
162	/// [`FromStableHash`] implemenation.
163	///
164	/// To be used in-place of [`Hasher::finish`].
165	#[inline]
166	#[must_use]
167	pub fn finish<W: FromStableHash<Hash = H::Hash>>(self) -> W {
168	W::from(self.state.finish())
169	}
170	}
171
172	impl<H: ExtendedHasher + fmt::Debug> fmt::Debug for StableHasher<H> {
173	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174	write!(f, "{:?}", self.state)
175	}
176	}
177
178	impl<H: ExtendedHasher> Hasher for StableHasher<H> {
179	/// Returns a combined hash.
180	///
181	/// For greater precision use instead [`StableHasher::finish`].
182	fn finish(&self) -> u64 {
183	Hasher::finish(&self.state)
184	}
185
186	#[inline]
187	fn write(&mut self, bytes: &[u8]) {
188	self.state.write(bytes);
189	}
190
191	#[cfg(feature = "nightly")]
192	#[inline]
193	fn write_str(&mut self, s: &str) {
194	self.state.write_str(s);
195	}
196
197	#[cfg(feature = "nightly")]
198	#[inline]
199	fn write_length_prefix(&mut self, len: usize) {
200	// Our impl for `usize` will extend it if needed.
201	self.write_usize(len);
202	}
203
204	#[inline]
205	fn write_u8(&mut self, i: u8) {
206	self.state.write_u8(i);
207	}
208
209	#[inline]
210	fn write_u16(&mut self, i: u16) {
211	self.state.short_write(i.to_le_bytes());
212	}
213
214	#[inline]
215	fn write_u32(&mut self, i: u32) {
216	self.state.short_write(i.to_le_bytes());
217	}
218
219	#[inline]
220	fn write_u64(&mut self, i: u64) {
221	self.state.short_write(i.to_le_bytes());
222	}
223
224	#[inline]
225	fn write_u128(&mut self, i: u128) {
226	self.write_u64(i as u64);
227	self.write_u64((i >> `64`) as u64);
228	}
229
230	#[inline]
231	fn write_usize(&mut self, i: usize) {
232	// Always treat usize as u64 so we get the same results on 32 and 64 bit
233	// platforms. This is important for symbol hashes when cross compiling,
234	// for example.
235	self.state.short_write((i as u64).to_le_bytes());
236	}
237
238	#[inline]
239	fn write_i8(&mut self, i: i8) {
240	self.state.write_i8(i);
241	}
242
243	#[inline]
244	fn write_i16(&mut self, i: i16) {
245	self.state.short_write((i as u16).to_le_bytes());
246	}
247
248	#[inline]
249	fn write_i32(&mut self, i: i32) {
250	self.state.short_write((i as u32).to_le_bytes());
251	}
252
253	#[inline]
254	fn write_i64(&mut self, i: i64) {
255	self.state.short_write((i as u64).to_le_bytes());
256	}
257
258	#[inline]
259	fn write_i128(&mut self, i: i128) {
260	self.state.write(&(i as u128).to_le_bytes());
261	}
262
263	#[inline]
264	fn write_isize(&mut self, i: isize) {
265	// Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit
266	// platforms. This is important for symbol hashes when cross compiling,
267	// for example. Sign extending here is preferable as it means that the
268	// same negative number hashes the same on both 32 and 64 bit platforms.
269	let value = i as u64;
270
271	// Cold path
272	#[cold]
273	#[inline(never)]
274	fn hash_value<H: ExtendedHasher>(state: &mut H, value: u64) {
275	state.write_u8(`0xFF`);
276	state.short_write(value.to_le_bytes());
277	}
278
279	// `isize` values often seem to have a small (positive) numeric value in practice.
280	// To exploit this, if the value is small, we will hash a smaller amount of bytes.
281	// However, we cannot just skip the leading zero bytes, as that would produce the same hash
282	// e.g. if you hash two values that have the same bit pattern when they are swapped.
283	// See https://github.com/rust-lang/rust/pull/93014 for context.
284	//
285	// Therefore, we employ the following strategy:
286	// 1) When we encounter a value that fits within a single byte (the most common case), we
287	// hash just that byte. This is the most common case that is being optimized. However, we do
288	// not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8).
289	// 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding
290	// 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
291	// `isize`s that fit within a different amount of bytes, they should always produce a different
292	// byte stream for the hasher.
293	if value < `0xFF` {
294	self.state.write_u8(value as u8);
295	} else {
296	hash_value(&mut self.state, value);
297	}
298	}
299	}
300