1use crate::UnalignedBuffer;
2use core::{cmp, hash::Hasher};
3
4#[cfg(feature = "serialize")]
5use serde::{Deserialize, Serialize};
6
7const CHUNK_SIZE: usize = 32;
8
9pub const PRIME_1: u64 = 11_400_714_785_074_694_791;
10pub const PRIME_2: u64 = 14_029_467_366_897_019_727;
11pub const PRIME_3: u64 = 1_609_587_929_392_839_161;
12pub const PRIME_4: u64 = 9_650_029_242_287_828_579;
13pub const PRIME_5: u64 = 2_870_177_450_012_600_261;
14
15#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
16#[derive(Copy, Clone, PartialEq)]
17struct XxCore {
18 v1: u64,
19 v2: u64,
20 v3: u64,
21 v4: u64,
22}
23
24/// Calculates the 64-bit hash.
25#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))]
26#[derive(Debug, Copy, Clone, PartialEq)]
27pub struct XxHash64 {
28 total_len: u64,
29 seed: u64,
30 core: XxCore,
31 #[cfg_attr(feature = "serialize", serde(flatten))]
32 buffer: Buffer,
33}
34
35impl XxCore {
36 fn with_seed(seed: u64) -> XxCore {
37 XxCore {
38 v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2),
39 v2: seed.wrapping_add(PRIME_2),
40 v3: seed,
41 v4: seed.wrapping_sub(PRIME_1),
42 }
43 }
44
45 #[inline(always)]
46 fn ingest_chunks<I>(&mut self, values: I)
47 where
48 I: IntoIterator<Item = [u64; 4]>,
49 {
50 #[inline(always)]
51 fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 {
52 value = value.wrapping_mul(PRIME_2);
53 current_value = current_value.wrapping_add(value);
54 current_value = current_value.rotate_left(31);
55 current_value.wrapping_mul(PRIME_1)
56 }
57
58 // By drawing these out, we can avoid going back and forth to
59 // memory. It only really helps for large files, when we need
60 // to iterate multiple times here.
61
62 let mut v1 = self.v1;
63 let mut v2 = self.v2;
64 let mut v3 = self.v3;
65 let mut v4 = self.v4;
66
67 for [n1, n2, n3, n4] in values {
68 v1 = ingest_one_number(v1, n1.to_le());
69 v2 = ingest_one_number(v2, n2.to_le());
70 v3 = ingest_one_number(v3, n3.to_le());
71 v4 = ingest_one_number(v4, n4.to_le());
72 }
73
74 self.v1 = v1;
75 self.v2 = v2;
76 self.v3 = v3;
77 self.v4 = v4;
78 }
79
80 #[inline(always)]
81 fn finish(&self) -> u64 {
82 // The original code pulls out local vars for v[1234]
83 // here. Performance tests did not show that to be effective
84 // here, presumably because this method is not called in a
85 // tight loop.
86
87 #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel
88 let mut hash;
89
90 hash = self.v1.rotate_left(1);
91 hash = hash.wrapping_add(self.v2.rotate_left(7));
92 hash = hash.wrapping_add(self.v3.rotate_left(12));
93 hash = hash.wrapping_add(self.v4.rotate_left(18));
94
95 #[inline(always)]
96 fn mix_one(mut hash: u64, mut value: u64) -> u64 {
97 value = value.wrapping_mul(PRIME_2);
98 value = value.rotate_left(31);
99 value = value.wrapping_mul(PRIME_1);
100 hash ^= value;
101 hash = hash.wrapping_mul(PRIME_1);
102 hash.wrapping_add(PRIME_4)
103 }
104
105 hash = mix_one(hash, self.v1);
106 hash = mix_one(hash, self.v2);
107 hash = mix_one(hash, self.v3);
108 hash = mix_one(hash, self.v4);
109
110 hash
111 }
112}
113
114impl core::fmt::Debug for XxCore {
115 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> {
116 write!(
117 f,
118 "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}",
119 self.v1, self.v2, self.v3, self.v4
120 )
121 }
122}
123
124#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
125#[derive(Debug, Copy, Clone, Default, PartialEq)]
126#[repr(align(8))]
127#[cfg_attr(feature = "serialize", serde(transparent))]
128struct AlignToU64<T>(T);
129
130#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))]
131#[derive(Debug, Copy, Clone, Default, PartialEq)]
132struct Buffer {
133 #[cfg_attr(feature = "serialize", serde(rename = "buffer"))]
134 data: AlignToU64<[u8; CHUNK_SIZE]>,
135 #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))]
136 len: usize,
137}
138
139impl Buffer {
140 fn data(&self) -> &[u8] {
141 &self.data.0[..self.len]
142 }
143
144 /// Consumes as much of the parameter as it can, returning the unused part.
145 fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
146 let to_use = cmp::min(self.available(), data.len());
147 let (data, remaining) = data.split_at(to_use);
148 self.data.0[self.len..][..to_use].copy_from_slice(data);
149 self.len += to_use;
150 remaining
151 }
152
153 fn set_data(&mut self, data: &[u8]) {
154 debug_assert!(self.is_empty());
155 debug_assert!(data.len() < CHUNK_SIZE);
156 self.data.0[..data.len()].copy_from_slice(data);
157 self.len = data.len();
158 }
159
160 fn available(&self) -> usize {
161 CHUNK_SIZE - self.len
162 }
163
164 fn is_empty(&self) -> bool {
165 self.len == 0
166 }
167
168 fn is_full(&self) -> bool {
169 self.len == CHUNK_SIZE
170 }
171}
172
173impl XxHash64 {
174 /// Constructs the hash with an initial seed
175 pub fn with_seed(seed: u64) -> XxHash64 {
176 XxHash64 {
177 total_len: 0,
178 seed,
179 core: XxCore::with_seed(seed),
180 buffer: Buffer::default(),
181 }
182 }
183
184 pub(crate) fn write(&mut self, bytes: &[u8]) {
185 let remaining = self.maybe_consume_bytes(bytes);
186 if !remaining.is_empty() {
187 let mut remaining = UnalignedBuffer::new(remaining);
188 self.core.ingest_chunks(&mut remaining);
189 self.buffer.set_data(remaining.remaining());
190 }
191 self.total_len += bytes.len() as u64;
192 }
193
194 // Consume bytes and try to make `self.buffer` empty.
195 // If there are not enough bytes, `self.buffer` can be non-empty, and this
196 // function returns an empty slice.
197 fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] {
198 if self.buffer.is_empty() {
199 data
200 } else {
201 let data = self.buffer.consume(data);
202 if self.buffer.is_full() {
203 let mut u64s = UnalignedBuffer::new(self.buffer.data());
204 self.core.ingest_chunks(&mut u64s);
205 debug_assert!(u64s.remaining().is_empty());
206 self.buffer.len = 0;
207 }
208 data
209 }
210 }
211
212 pub(crate) fn finish(&self) -> u64 {
213 let mut hash = if self.total_len >= CHUNK_SIZE as u64 {
214 // We have processed at least one full chunk
215 self.core.finish()
216 } else {
217 self.seed.wrapping_add(PRIME_5)
218 };
219
220 hash = hash.wrapping_add(self.total_len);
221
222 let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data());
223 for buffered_u64 in &mut buffered_u64s {
224 let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2);
225 k1 = k1.rotate_left(31);
226 k1 = k1.wrapping_mul(PRIME_1);
227 hash ^= k1;
228 hash = hash.rotate_left(27);
229 hash = hash.wrapping_mul(PRIME_1);
230 hash = hash.wrapping_add(PRIME_4);
231 }
232
233 let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining());
234 for buffered_u32 in &mut buffered_u32s {
235 let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1);
236 hash ^= k1;
237 hash = hash.rotate_left(23);
238 hash = hash.wrapping_mul(PRIME_2);
239 hash = hash.wrapping_add(PRIME_3);
240 }
241
242 let buffered_u8s = buffered_u32s.remaining();
243 for &buffered_u8 in buffered_u8s {
244 let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5);
245 hash ^= k1;
246 hash = hash.rotate_left(11);
247 hash = hash.wrapping_mul(PRIME_1);
248 }
249
250 // The final intermixing
251 hash ^= hash >> 33;
252 hash = hash.wrapping_mul(PRIME_2);
253 hash ^= hash >> 29;
254 hash = hash.wrapping_mul(PRIME_3);
255 hash ^= hash >> 32;
256
257 hash
258 }
259
260 pub fn seed(&self) -> u64 {
261 self.seed
262 }
263
264 pub fn total_len(&self) -> u64 {
265 self.total_len
266 }
267}
268
269impl Default for XxHash64 {
270 fn default() -> XxHash64 {
271 XxHash64::with_seed(0)
272 }
273}
274
275impl Hasher for XxHash64 {
276 fn finish(&self) -> u64 {
277 XxHash64::finish(self)
278 }
279
280 fn write(&mut self, bytes: &[u8]) {
281 XxHash64::write(self, bytes)
282 }
283}
284
285#[cfg(feature = "std")]
286pub use crate::std_support::sixty_four::RandomXxHashBuilder64;
287
288#[cfg(test)]
289mod test {
290 use super::{RandomXxHashBuilder64, XxHash64};
291 use std::collections::HashMap;
292 use std::hash::BuildHasherDefault;
293 use std::prelude::v1::*;
294
295 #[test]
296 fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() {
297 let bytes: Vec<_> = (0..32).map(|_| 0).collect();
298
299 let mut byte_by_byte = XxHash64::with_seed(0);
300 for byte in bytes.chunks(1) {
301 byte_by_byte.write(byte);
302 }
303
304 let mut one_chunk = XxHash64::with_seed(0);
305 one_chunk.write(&bytes);
306
307 assert_eq!(byte_by_byte.core, one_chunk.core);
308 }
309
310 #[test]
311 fn hash_of_nothing_matches_c_implementation() {
312 let mut hasher = XxHash64::with_seed(0);
313 hasher.write(&[]);
314 assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999);
315 }
316
317 #[test]
318 fn hash_of_single_byte_matches_c_implementation() {
319 let mut hasher = XxHash64::with_seed(0);
320 hasher.write(&[42]);
321 assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4);
322 }
323
324 #[test]
325 fn hash_of_multiple_bytes_matches_c_implementation() {
326 let mut hasher = XxHash64::with_seed(0);
327 hasher.write(b"Hello, world!\0");
328 assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f);
329 }
330
331 #[test]
332 fn hash_of_multiple_chunks_matches_c_implementation() {
333 let bytes: Vec<_> = (0..100).collect();
334 let mut hasher = XxHash64::with_seed(0);
335 hasher.write(&bytes);
336 assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597);
337 }
338
339 #[test]
340 fn hash_with_different_seed_matches_c_implementation() {
341 let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
342 hasher.write(&[]);
343 assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672);
344 }
345
346 #[test]
347 fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() {
348 let bytes: Vec<_> = (0..100).collect();
349 let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91);
350 hasher.write(&bytes);
351 assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1);
352 }
353
354 #[test]
355 fn can_be_used_in_a_hashmap_with_a_default_seed() {
356 let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default();
357 hash.insert(42, "the answer");
358 assert_eq!(hash.get(&42), Some(&"the answer"));
359 }
360
361 #[test]
362 fn can_be_used_in_a_hashmap_with_a_random_seed() {
363 let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default();
364 hash.insert(42, "the answer");
365 assert_eq!(hash.get(&42), Some(&"the answer"));
366 }
367
368 #[cfg(feature = "serialize")]
369 type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>;
370
371 #[cfg(feature = "serialize")]
372 #[test]
373 fn test_serialization_cycle() -> TestResult {
374 let mut hasher = XxHash64::with_seed(0);
375 hasher.write(b"Hello, world!\0");
376 hasher.finish();
377
378 let serialized = serde_json::to_string(&hasher)?;
379 let unserialized: XxHash64 = serde_json::from_str(&serialized)?;
380 assert_eq!(hasher, unserialized);
381 Ok(())
382 }
383
384 #[cfg(feature = "serialize")]
385 #[test]
386 fn test_serialization_stability() -> TestResult {
387 let mut hasher = XxHash64::with_seed(0);
388 hasher.write(b"Hello, world!\0");
389 hasher.finish();
390
391 let serialized = r#"{
392 "total_len": 14,
393 "seed": 0,
394 "core": {
395 "v1": 6983438078262162902,
396 "v2": 14029467366897019727,
397 "v3": 0,
398 "v4": 7046029288634856825
399 },
400 "buffer": [
401 72, 101, 108, 108, 111, 44, 32, 119,
402 111, 114, 108, 100, 33, 0, 0, 0,
403 0, 0, 0, 0, 0, 0, 0, 0,
404 0, 0, 0, 0, 0, 0, 0, 0
405 ],
406 "buffer_usage": 14
407 }"#;
408
409 let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap();
410 assert_eq!(hasher, unserialized);
411 Ok(())
412 }
413}
414