1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash with a 128-bit output.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18use core::ptr;
19use core::u64;
20
21/// A 128-bit (2x64) hash output
22#[derive(Debug, Clone, Copy, Default)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub struct Hash128 {
25 pub h1: u64,
26 pub h2: u64,
27}
28
29impl From<u128> for Hash128 {
30 fn from(v: u128) -> Self {
31 Hash128 {
32 h1: v as u64,
33 h2: (v >> 64) as u64,
34 }
35 }
36}
37
38impl From<Hash128> for u128 {
39 fn from(h: Hash128) -> u128 {
40 (h.h1 as u128) | ((h.h2 as u128) << 64)
41 }
42}
43
44/// An implementation of SipHash128 1-3.
45#[derive(Debug, Clone, Copy, Default)]
46#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47pub struct SipHasher13 {
48 hasher: Hasher<Sip13Rounds>,
49}
50
51/// An implementation of SipHash128 2-4.
52#[derive(Debug, Clone, Copy, Default)]
53#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54pub struct SipHasher24 {
55 hasher: Hasher<Sip24Rounds>,
56}
57
58/// An implementation of SipHash128 2-4.
59///
60/// SipHash is a general-purpose hashing function: it runs at a good
61/// speed (competitive with Spooky and City) and permits strong _keyed_
62/// hashing. This lets you key your hashtables from a strong RNG, such as
63/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
64///
65/// Although the SipHash algorithm is considered to be generally strong,
66/// it is not intended for cryptographic purposes. As such, all
67/// cryptographic uses of this implementation are _strongly discouraged_.
68#[derive(Debug, Clone, Copy, Default)]
69pub struct SipHasher(SipHasher24);
70
71#[derive(Debug, Copy)]
72#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73struct Hasher<S: Sip> {
74 k0: u64,
75 k1: u64,
76 length: usize, // how many bytes we've processed
77 state: State, // hash State
78 tail: u64, // unprocessed bytes le
79 ntail: usize, // how many bytes in tail are valid
80 _marker: PhantomData<S>,
81}
82
83#[derive(Debug, Clone, Copy)]
84#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
85struct State {
86 // v0, v2 and v1, v3 show up in pairs in the algorithm,
87 // and simd implementations of SipHash will use vectors
88 // of v02 and v13. By placing them in this order in the struct,
89 // the compiler can pick up on just a few simd optimizations by itself.
90 v0: u64,
91 v2: u64,
92 v1: u64,
93 v3: u64,
94}
95
96macro_rules! compress {
97 ($state:expr) => {{
98 compress!($state.v0, $state.v1, $state.v2, $state.v3)
99 }};
100 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
101 $v0 = $v0.wrapping_add($v1);
102 $v1 = $v1.rotate_left(13);
103 $v1 ^= $v0;
104 $v0 = $v0.rotate_left(32);
105 $v2 = $v2.wrapping_add($v3);
106 $v3 = $v3.rotate_left(16);
107 $v3 ^= $v2;
108 $v0 = $v0.wrapping_add($v3);
109 $v3 = $v3.rotate_left(21);
110 $v3 ^= $v0;
111 $v2 = $v2.wrapping_add($v1);
112 $v1 = $v1.rotate_left(17);
113 $v1 ^= $v2;
114 $v2 = $v2.rotate_left(32);
115 }};
116}
117
118/// Loads an integer of the desired type from a byte stream, in LE order. Uses
119/// `copy_nonoverlapping` to let the compiler generate the most efficient way
120/// to load it from a possibly unaligned address.
121///
122/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
123macro_rules! load_int_le {
124 ($buf:expr, $i:expr, $int_ty:ident) => {{
125 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
126 let mut data = 0 as $int_ty;
127 ptr::copy_nonoverlapping(
128 $buf.as_ptr().add($i),
129 &mut data as *mut _ as *mut u8,
130 mem::size_of::<$int_ty>(),
131 );
132 data.to_le()
133 }};
134}
135
136/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
137/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
138/// sizes and avoid calling `memcpy`, which is good for speed.
139///
140/// Unsafe because: unchecked indexing at start..start+len
141#[inline]
142unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
143 debug_assert!(len < 8);
144 let mut i: usize = 0; // current byte index (from LSB) in the output u64
145 let mut out: u64 = 0;
146 if i + 3 < len {
147 out = load_int_le!(buf, start + i, u32) as u64;
148 i += 4;
149 }
150 if i + 1 < len {
151 out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
152 i += 2
153 }
154 if i < len {
155 out |= (*buf.get_unchecked(index:start + i) as u64) << (i * 8);
156 i += 1;
157 }
158 debug_assert_eq!(i, len);
159 out
160}
161
162pub trait Hasher128 {
163 /// Return a 128-bit hash
164 fn finish128(&self) -> Hash128;
165}
166
167impl SipHasher {
168 /// Creates a new `SipHasher` with the two initial keys set to 0.
169 #[inline]
170 pub fn new() -> SipHasher {
171 SipHasher::new_with_keys(0, 0)
172 }
173
174 /// Creates a `SipHasher` that is keyed off the provided keys.
175 #[inline]
176 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
177 SipHasher(SipHasher24::new_with_keys(key0, key1))
178 }
179
180 /// Creates a `SipHasher` from a 16 byte key.
181 pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
182 let mut b0 = [0u8; 8];
183 let mut b1 = [0u8; 8];
184 b0.copy_from_slice(&key[0..8]);
185 b1.copy_from_slice(&key[8..16]);
186 let key0 = u64::from_le_bytes(b0);
187 let key1 = u64::from_le_bytes(b1);
188 Self::new_with_keys(key0, key1)
189 }
190
191 /// Get the keys used by this hasher
192 pub fn keys(&self) -> (u64, u64) {
193 (self.0.hasher.k0, self.0.hasher.k1)
194 }
195
196 /// Get the key used by this hasher as a 16 byte vector
197 pub fn key(&self) -> [u8; 16] {
198 let mut bytes = [0u8; 16];
199 bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
200 bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
201 bytes
202 }
203
204 /// Hash a byte array - This is the easiest and safest way to use SipHash.
205 #[inline]
206 pub fn hash(&self, bytes: &[u8]) -> Hash128 {
207 let mut hasher = self.0.hasher;
208 hasher.write(bytes);
209 hasher.finish128()
210 }
211}
212
213impl Hasher128 for SipHasher {
214 /// Return a 128-bit hash
215 #[inline]
216 fn finish128(&self) -> Hash128 {
217 self.0.finish128()
218 }
219}
220
221impl SipHasher13 {
222 /// Creates a new `SipHasher13` with the two initial keys set to 0.
223 #[inline]
224 pub fn new() -> SipHasher13 {
225 SipHasher13::new_with_keys(0, 0)
226 }
227
228 /// Creates a `SipHasher13` that is keyed off the provided keys.
229 #[inline]
230 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
231 SipHasher13 {
232 hasher: Hasher::new_with_keys(key0, key1),
233 }
234 }
235
236 /// Creates a `SipHasher13` from a 16 byte key.
237 pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
238 let mut b0 = [0u8; 8];
239 let mut b1 = [0u8; 8];
240 b0.copy_from_slice(&key[0..8]);
241 b1.copy_from_slice(&key[8..16]);
242 let key0 = u64::from_le_bytes(b0);
243 let key1 = u64::from_le_bytes(b1);
244 Self::new_with_keys(key0, key1)
245 }
246
247 /// Get the keys used by this hasher
248 pub fn keys(&self) -> (u64, u64) {
249 (self.hasher.k0, self.hasher.k1)
250 }
251
252 /// Get the key used by this hasher as a 16 byte vector
253 pub fn key(&self) -> [u8; 16] {
254 let mut bytes = [0u8; 16];
255 bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
256 bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
257 bytes
258 }
259
260 /// Hash a byte array - This is the easiest and safest way to use SipHash.
261 #[inline]
262 pub fn hash(&self, bytes: &[u8]) -> Hash128 {
263 let mut hasher = self.hasher;
264 hasher.write(bytes);
265 hasher.finish128()
266 }
267}
268
269impl Hasher128 for SipHasher13 {
270 /// Return a 128-bit hash
271 #[inline]
272 fn finish128(&self) -> Hash128 {
273 self.hasher.finish128()
274 }
275}
276
277impl SipHasher24 {
278 /// Creates a new `SipHasher24` with the two initial keys set to 0.
279 #[inline]
280 pub fn new() -> SipHasher24 {
281 SipHasher24::new_with_keys(0, 0)
282 }
283
284 /// Creates a `SipHasher24` that is keyed off the provided keys.
285 #[inline]
286 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
287 SipHasher24 {
288 hasher: Hasher::new_with_keys(key0, key1),
289 }
290 }
291
292 /// Creates a `SipHasher24` from a 16 byte key.
293 pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
294 let mut b0 = [0u8; 8];
295 let mut b1 = [0u8; 8];
296 b0.copy_from_slice(&key[0..8]);
297 b1.copy_from_slice(&key[8..16]);
298 let key0 = u64::from_le_bytes(b0);
299 let key1 = u64::from_le_bytes(b1);
300 Self::new_with_keys(key0, key1)
301 }
302
303 /// Get the keys used by this hasher
304 pub fn keys(&self) -> (u64, u64) {
305 (self.hasher.k0, self.hasher.k1)
306 }
307
308 /// Get the key used by this hasher as a 16 byte vector
309 pub fn key(&self) -> [u8; 16] {
310 let mut bytes = [0u8; 16];
311 bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
312 bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
313 bytes
314 }
315
316 /// Hash a byte array - This is the easiest and safest way to use SipHash.
317 #[inline]
318 pub fn hash(&self, bytes: &[u8]) -> Hash128 {
319 let mut hasher = self.hasher;
320 hasher.write(bytes);
321 hasher.finish128()
322 }
323}
324
325impl Hasher128 for SipHasher24 {
326 /// Return a 128-bit hash
327 #[inline]
328 fn finish128(&self) -> Hash128 {
329 self.hasher.finish128()
330 }
331}
332
333impl<S: Sip> Hasher<S> {
334 #[inline]
335 fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
336 let mut state = Hasher {
337 k0: key0,
338 k1: key1,
339 length: 0,
340 state: State {
341 v0: 0,
342 v1: 0xee,
343 v2: 0,
344 v3: 0,
345 },
346 tail: 0,
347 ntail: 0,
348 _marker: PhantomData,
349 };
350 state.reset();
351 state
352 }
353
354 #[inline]
355 fn reset(&mut self) {
356 self.length = 0;
357 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
358 self.state.v1 = self.k1 ^ 0x646f72616e646f83;
359 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
360 self.state.v3 = self.k1 ^ 0x7465646279746573;
361 self.ntail = 0;
362 }
363
364 // A specialized write function for values with size <= 8.
365 //
366 // The hashing of multi-byte integers depends on endianness. E.g.:
367 // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
368 // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
369 //
370 // This function does the right thing for little-endian hardware. On
371 // big-endian hardware `x` must be byte-swapped first to give the right
372 // behaviour. After any byte-swapping, the input must be zero-extended to
373 // 64-bits. The caller is responsible for the byte-swapping and
374 // zero-extension.
375 #[inline]
376 fn short_write<T>(&mut self, _x: T, x: u64) {
377 let size = mem::size_of::<T>();
378 self.length += size;
379
380 // The original number must be zero-extended, not sign-extended.
381 debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
382
383 // The number of bytes needed to fill `self.tail`.
384 let needed = 8 - self.ntail;
385
386 self.tail |= x << (8 * self.ntail);
387 if size < needed {
388 self.ntail += size;
389 return;
390 }
391
392 // `self.tail` is full, process it.
393 self.state.v3 ^= self.tail;
394 S::c_rounds(&mut self.state);
395 self.state.v0 ^= self.tail;
396
397 self.ntail = size - needed;
398 self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
399 }
400}
401
402impl<S: Sip> Hasher<S> {
403 #[inline]
404 pub fn finish128(&self) -> Hash128 {
405 let mut state: State = self.state;
406
407 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
408
409 state.v3 ^= b;
410 S::c_rounds(&mut state);
411 state.v0 ^= b;
412
413 state.v2 ^= 0xee;
414 S::d_rounds(&mut state);
415 let h1: u64 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
416
417 state.v1 ^= 0xdd;
418 S::d_rounds(&mut state);
419 let h2: u64 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
420
421 Hash128 { h1, h2 }
422 }
423}
424
425impl hash::Hasher for SipHasher {
426 #[inline]
427 fn write(&mut self, msg: &[u8]) {
428 self.0.write(msg)
429 }
430
431 #[inline]
432 fn finish(&self) -> u64 {
433 self.0.finish()
434 }
435
436 #[inline]
437 fn write_usize(&mut self, i: usize) {
438 self.0.write_usize(i);
439 }
440
441 #[inline]
442 fn write_u8(&mut self, i: u8) {
443 self.0.write_u8(i);
444 }
445
446 #[inline]
447 fn write_u16(&mut self, i: u16) {
448 self.0.write_u16(i);
449 }
450
451 #[inline]
452 fn write_u32(&mut self, i: u32) {
453 self.0.write_u32(i);
454 }
455
456 #[inline]
457 fn write_u64(&mut self, i: u64) {
458 self.0.write_u64(i);
459 }
460}
461
462impl hash::Hasher for SipHasher13 {
463 #[inline]
464 fn write(&mut self, msg: &[u8]) {
465 self.hasher.write(msg)
466 }
467
468 #[inline]
469 fn finish(&self) -> u64 {
470 self.hasher.finish()
471 }
472
473 #[inline]
474 fn write_usize(&mut self, i: usize) {
475 self.hasher.write_usize(i);
476 }
477
478 #[inline]
479 fn write_u8(&mut self, i: u8) {
480 self.hasher.write_u8(i);
481 }
482
483 #[inline]
484 fn write_u16(&mut self, i: u16) {
485 self.hasher.write_u16(i);
486 }
487
488 #[inline]
489 fn write_u32(&mut self, i: u32) {
490 self.hasher.write_u32(i);
491 }
492
493 #[inline]
494 fn write_u64(&mut self, i: u64) {
495 self.hasher.write_u64(i);
496 }
497}
498
499impl hash::Hasher for SipHasher24 {
500 #[inline]
501 fn write(&mut self, msg: &[u8]) {
502 self.hasher.write(msg)
503 }
504
505 #[inline]
506 fn finish(&self) -> u64 {
507 self.hasher.finish()
508 }
509
510 #[inline]
511 fn write_usize(&mut self, i: usize) {
512 self.hasher.write_usize(i);
513 }
514
515 #[inline]
516 fn write_u8(&mut self, i: u8) {
517 self.hasher.write_u8(i);
518 }
519
520 #[inline]
521 fn write_u16(&mut self, i: u16) {
522 self.hasher.write_u16(i);
523 }
524
525 #[inline]
526 fn write_u32(&mut self, i: u32) {
527 self.hasher.write_u32(i);
528 }
529
530 #[inline]
531 fn write_u64(&mut self, i: u64) {
532 self.hasher.write_u64(i);
533 }
534}
535
536impl<S: Sip> hash::Hasher for Hasher<S> {
537 #[inline]
538 fn write_usize(&mut self, i: usize) {
539 self.short_write(i, i.to_le() as u64);
540 }
541
542 #[inline]
543 fn write_u8(&mut self, i: u8) {
544 self.short_write(i, i as u64);
545 }
546
547 #[inline]
548 fn write_u32(&mut self, i: u32) {
549 self.short_write(i, i.to_le() as u64);
550 }
551
552 #[inline]
553 fn write_u64(&mut self, i: u64) {
554 self.short_write(i, i.to_le());
555 }
556
557 #[inline]
558 fn write(&mut self, msg: &[u8]) {
559 let length = msg.len();
560 self.length += length;
561
562 let mut needed = 0;
563
564 if self.ntail != 0 {
565 needed = 8 - self.ntail;
566 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
567 if length < needed {
568 self.ntail += length;
569 return;
570 } else {
571 self.state.v3 ^= self.tail;
572 S::c_rounds(&mut self.state);
573 self.state.v0 ^= self.tail;
574 self.ntail = 0;
575 }
576 }
577
578 // Buffered tail is now flushed, process new input.
579 let len = length - needed;
580 let left = len & 0x7;
581
582 let mut i = needed;
583 while i < len - left {
584 let mi = unsafe { load_int_le!(msg, i, u64) };
585
586 self.state.v3 ^= mi;
587 S::c_rounds(&mut self.state);
588 self.state.v0 ^= mi;
589
590 i += 8;
591 }
592
593 self.tail = unsafe { u8to64_le(msg, i, left) };
594 self.ntail = left;
595 }
596
597 #[inline]
598 fn finish(&self) -> u64 {
599 self.finish128().h2
600 }
601}
602
603impl<S: Sip> Clone for Hasher<S> {
604 #[inline]
605 fn clone(&self) -> Hasher<S> {
606 Hasher {
607 k0: self.k0,
608 k1: self.k1,
609 length: self.length,
610 state: self.state,
611 tail: self.tail,
612 ntail: self.ntail,
613 _marker: self._marker,
614 }
615 }
616}
617
618impl<S: Sip> Default for Hasher<S> {
619 /// Creates a `Hasher<S>` with the two initial keys set to 0.
620 #[inline]
621 fn default() -> Hasher<S> {
622 Hasher::new_with_keys(key0:0, key1:0)
623 }
624}
625
626#[doc(hidden)]
627trait Sip {
628 fn c_rounds(_: &mut State);
629 fn d_rounds(_: &mut State);
630}
631
632#[derive(Debug, Clone, Copy, Default)]
633struct Sip13Rounds;
634
635impl Sip for Sip13Rounds {
636 #[inline]
637 fn c_rounds(state: &mut State) {
638 compress!(state);
639 }
640
641 #[inline]
642 fn d_rounds(state: &mut State) {
643 compress!(state);
644 compress!(state);
645 compress!(state);
646 }
647}
648
649#[derive(Debug, Clone, Copy, Default)]
650struct Sip24Rounds;
651
652impl Sip for Sip24Rounds {
653 #[inline]
654 fn c_rounds(state: &mut State) {
655 compress!(state);
656 compress!(state);
657 }
658
659 #[inline]
660 fn d_rounds(state: &mut State) {
661 compress!(state);
662 compress!(state);
663 compress!(state);
664 compress!(state);
665 }
666}
667
668impl Hash128 {
669 /// Convert into a 16-bytes vector
670 pub fn as_bytes(&self) -> [u8; 16] {
671 let mut bytes = [0u8; 16];
672 let h1 = self.h1.to_le();
673 let h2 = self.h2.to_le();
674 unsafe {
675 ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.as_mut_ptr(), 8);
676 ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.as_mut_ptr().add(8), 8);
677 }
678 bytes
679 }
680
681 /// Convert into a `u128`
682 #[inline]
683 pub fn as_u128(&self) -> u128 {
684 let h1 = self.h1.to_le();
685 let h2 = self.h2.to_le();
686 h1 as u128 | ((h2 as u128) << 64)
687 }
688
689 /// Convert into `(u64, u64)`
690 #[inline]
691 pub fn as_u64(&self) -> (u64, u64) {
692 let h1 = self.h1.to_le();
693 let h2 = self.h2.to_le();
694 (h1, h2)
695 }
696}
697