1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18use core::ptr;
19use core::u64;
20
21/// An implementation of SipHash 1-3.
22///
23/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24#[derive(Debug, Clone, Copy, Default)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct SipHasher13 {
27 hasher: Hasher<Sip13Rounds>,
28}
29
30/// An implementation of SipHash 2-4.
31///
32/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33#[derive(Debug, Clone, Copy, Default)]
34#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35pub struct SipHasher24 {
36 hasher: Hasher<Sip24Rounds>,
37}
38
39/// An implementation of SipHash 2-4.
40///
41/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42///
43/// SipHash is a general-purpose hashing function: it runs at a good
44/// speed (competitive with Spooky and City) and permits strong _keyed_
45/// hashing. This lets you key your hashtables from a strong RNG, such as
46/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47///
48/// Although the SipHash algorithm is considered to be generally strong,
49/// it is not intended for cryptographic purposes. As such, all
50/// cryptographic uses of this implementation are _strongly discouraged_.
51#[derive(Debug, Clone, Copy, Default)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct SipHasher(SipHasher24);
54
55#[derive(Debug, Clone, Copy)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57struct Hasher<S: Sip> {
58 k0: u64,
59 k1: u64,
60 length: usize, // how many bytes we've processed
61 state: State, // hash State
62 tail: u64, // unprocessed bytes le
63 ntail: usize, // how many bytes in tail are valid
64 _marker: PhantomData<S>,
65}
66
67#[derive(Debug, Clone, Copy)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69struct State {
70 // v0, v2 and v1, v3 show up in pairs in the algorithm,
71 // and simd implementations of SipHash will use vectors
72 // of v02 and v13. By placing them in this order in the struct,
73 // the compiler can pick up on just a few simd optimizations by itself.
74 v0: u64,
75 v2: u64,
76 v1: u64,
77 v3: u64,
78}
79
80macro_rules! compress {
81 ($state:expr) => {{
82 compress!($state.v0, $state.v1, $state.v2, $state.v3)
83 }};
84 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
85 $v0 = $v0.wrapping_add($v1);
86 $v1 = $v1.rotate_left(13);
87 $v1 ^= $v0;
88 $v0 = $v0.rotate_left(32);
89 $v2 = $v2.wrapping_add($v3);
90 $v3 = $v3.rotate_left(16);
91 $v3 ^= $v2;
92 $v0 = $v0.wrapping_add($v3);
93 $v3 = $v3.rotate_left(21);
94 $v3 ^= $v0;
95 $v2 = $v2.wrapping_add($v1);
96 $v1 = $v1.rotate_left(17);
97 $v1 ^= $v2;
98 $v2 = $v2.rotate_left(32);
99 }};
100}
101
102/// Loads an integer of the desired type from a byte stream, in LE order. Uses
103/// `copy_nonoverlapping` to let the compiler generate the most efficient way
104/// to load it from a possibly unaligned address.
105///
106/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
107macro_rules! load_int_le {
108 ($buf:expr, $i:expr, $int_ty:ident) => {{
109 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
110 let mut data = 0 as $int_ty;
111 ptr::copy_nonoverlapping(
112 $buf.as_ptr().add($i),
113 &mut data as *mut _ as *mut u8,
114 mem::size_of::<$int_ty>(),
115 );
116 data.to_le()
117 }};
118}
119
120/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
121/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
122/// sizes and avoid calling `memcpy`, which is good for speed.
123///
124/// Unsafe because: unchecked indexing at start..start+len
125#[inline]
126unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
127 debug_assert!(len < 8);
128 let mut i: usize = 0; // current byte index (from LSB) in the output u64
129 let mut out: u64 = 0;
130 if i + 3 < len {
131 out = load_int_le!(buf, start + i, u32) as u64;
132 i += 4;
133 }
134 if i + 1 < len {
135 out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
136 i += 2
137 }
138 if i < len {
139 out |= (*buf.get_unchecked(index:start + i) as u64) << (i * 8);
140 i += 1;
141 }
142 debug_assert_eq!(i, len);
143 out
144}
145
146impl SipHasher {
147 /// Creates a new `SipHasher` with the two initial keys set to 0.
148 #[inline]
149 pub fn new() -> SipHasher {
150 SipHasher::new_with_keys(0, 0)
151 }
152
153 /// Creates a `SipHasher` that is keyed off the provided keys.
154 #[inline]
155 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
156 SipHasher(SipHasher24::new_with_keys(key0, key1))
157 }
158
159 /// Creates a `SipHasher` from a 16 byte key.
160 pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
161 let mut b0 = [0u8; 8];
162 let mut b1 = [0u8; 8];
163 b0.copy_from_slice(&key[0..8]);
164 b1.copy_from_slice(&key[8..16]);
165 let key0 = u64::from_le_bytes(b0);
166 let key1 = u64::from_le_bytes(b1);
167 Self::new_with_keys(key0, key1)
168 }
169
170 /// Get the keys used by this hasher
171 pub fn keys(&self) -> (u64, u64) {
172 (self.0.hasher.k0, self.0.hasher.k1)
173 }
174
175 /// Get the key used by this hasher as a 16 byte vector
176 pub fn key(&self) -> [u8; 16] {
177 let mut bytes = [0u8; 16];
178 bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
179 bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
180 bytes
181 }
182
183 /// Hash a byte array - This is the easiest and safest way to use SipHash.
184 #[inline]
185 pub fn hash(&self, bytes: &[u8]) -> u64 {
186 let mut hasher = self.0.hasher;
187 hasher.write(bytes);
188 hasher.finish()
189 }
190}
191
192impl SipHasher13 {
193 /// Creates a new `SipHasher13` with the two initial keys set to 0.
194 #[inline]
195 pub fn new() -> SipHasher13 {
196 SipHasher13::new_with_keys(0, 0)
197 }
198
199 /// Creates a `SipHasher13` that is keyed off the provided keys.
200 #[inline]
201 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202 SipHasher13 {
203 hasher: Hasher::new_with_keys(key0, key1),
204 }
205 }
206
207 /// Creates a `SipHasher13` from a 16 byte key.
208 pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
209 let mut b0 = [0u8; 8];
210 let mut b1 = [0u8; 8];
211 b0.copy_from_slice(&key[0..8]);
212 b1.copy_from_slice(&key[8..16]);
213 let key0 = u64::from_le_bytes(b0);
214 let key1 = u64::from_le_bytes(b1);
215 Self::new_with_keys(key0, key1)
216 }
217
218 /// Get the keys used by this hasher
219 pub fn keys(&self) -> (u64, u64) {
220 (self.hasher.k0, self.hasher.k1)
221 }
222
223 /// Get the key used by this hasher as a 16 byte vector
224 pub fn key(&self) -> [u8; 16] {
225 let mut bytes = [0u8; 16];
226 bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
227 bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
228 bytes
229 }
230
231 /// Hash a byte array - This is the easiest and safest way to use SipHash.
232 #[inline]
233 pub fn hash(&self, bytes: &[u8]) -> u64 {
234 let mut hasher = self.hasher;
235 hasher.write(bytes);
236 hasher.finish()
237 }
238}
239
240impl SipHasher24 {
241 /// Creates a new `SipHasher24` with the two initial keys set to 0.
242 #[inline]
243 pub fn new() -> SipHasher24 {
244 SipHasher24::new_with_keys(0, 0)
245 }
246
247 /// Creates a `SipHasher24` that is keyed off the provided keys.
248 #[inline]
249 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
250 SipHasher24 {
251 hasher: Hasher::new_with_keys(key0, key1),
252 }
253 }
254
255 /// Creates a `SipHasher24` from a 16 byte key.
256 pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
257 let mut b0 = [0u8; 8];
258 let mut b1 = [0u8; 8];
259 b0.copy_from_slice(&key[0..8]);
260 b1.copy_from_slice(&key[8..16]);
261 let key0 = u64::from_le_bytes(b0);
262 let key1 = u64::from_le_bytes(b1);
263 Self::new_with_keys(key0, key1)
264 }
265
266 /// Get the keys used by this hasher
267 pub fn keys(&self) -> (u64, u64) {
268 (self.hasher.k0, self.hasher.k1)
269 }
270
271 /// Get the key used by this hasher as a 16 byte vector
272 pub fn key(&self) -> [u8; 16] {
273 let mut bytes = [0u8; 16];
274 bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
275 bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
276 bytes
277 }
278
279 /// Hash a byte array - This is the easiest and safest way to use SipHash.
280 #[inline]
281 pub fn hash(&self, bytes: &[u8]) -> u64 {
282 let mut hasher = self.hasher;
283 hasher.write(bytes);
284 hasher.finish()
285 }
286}
287
288impl<S: Sip> Hasher<S> {
289 #[inline]
290 fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
291 let mut state = Hasher {
292 k0: key0,
293 k1: key1,
294 length: 0,
295 state: State {
296 v0: 0,
297 v1: 0,
298 v2: 0,
299 v3: 0,
300 },
301 tail: 0,
302 ntail: 0,
303 _marker: PhantomData,
304 };
305 state.reset();
306 state
307 }
308
309 #[inline]
310 fn reset(&mut self) {
311 self.length = 0;
312 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
313 self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
314 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
315 self.state.v3 = self.k1 ^ 0x7465646279746573;
316 self.ntail = 0;
317 }
318
319 // A specialized write function for values with size <= 8.
320 //
321 // The hashing of multi-byte integers depends on endianness. E.g.:
322 // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
323 // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
324 //
325 // This function does the right thing for little-endian hardware. On
326 // big-endian hardware `x` must be byte-swapped first to give the right
327 // behaviour. After any byte-swapping, the input must be zero-extended to
328 // 64-bits. The caller is responsible for the byte-swapping and
329 // zero-extension.
330 #[inline]
331 fn short_write<T>(&mut self, _x: T, x: u64) {
332 let size = mem::size_of::<T>();
333 self.length += size;
334
335 // The original number must be zero-extended, not sign-extended.
336 debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
337
338 // The number of bytes needed to fill `self.tail`.
339 let needed = 8 - self.ntail;
340
341 self.tail |= x << (8 * self.ntail);
342 if size < needed {
343 self.ntail += size;
344 return;
345 }
346
347 // `self.tail` is full, process it.
348 self.state.v3 ^= self.tail;
349 S::c_rounds(&mut self.state);
350 self.state.v0 ^= self.tail;
351
352 self.ntail = size - needed;
353 self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
354 }
355}
356
357impl hash::Hasher for SipHasher {
358 #[inline]
359 fn write(&mut self, msg: &[u8]) {
360 self.0.write(msg)
361 }
362
363 #[inline]
364 fn finish(&self) -> u64 {
365 self.0.finish()
366 }
367
368 #[inline]
369 fn write_usize(&mut self, i: usize) {
370 self.0.write_usize(i);
371 }
372
373 #[inline]
374 fn write_u8(&mut self, i: u8) {
375 self.0.write_u8(i);
376 }
377
378 #[inline]
379 fn write_u16(&mut self, i: u16) {
380 self.0.write_u16(i);
381 }
382
383 #[inline]
384 fn write_u32(&mut self, i: u32) {
385 self.0.write_u32(i);
386 }
387
388 #[inline]
389 fn write_u64(&mut self, i: u64) {
390 self.0.write_u64(i);
391 }
392}
393
394impl hash::Hasher for SipHasher13 {
395 #[inline]
396 fn write(&mut self, msg: &[u8]) {
397 self.hasher.write(msg)
398 }
399
400 #[inline]
401 fn finish(&self) -> u64 {
402 self.hasher.finish()
403 }
404
405 #[inline]
406 fn write_usize(&mut self, i: usize) {
407 self.hasher.write_usize(i);
408 }
409
410 #[inline]
411 fn write_u8(&mut self, i: u8) {
412 self.hasher.write_u8(i);
413 }
414
415 #[inline]
416 fn write_u16(&mut self, i: u16) {
417 self.hasher.write_u16(i);
418 }
419
420 #[inline]
421 fn write_u32(&mut self, i: u32) {
422 self.hasher.write_u32(i);
423 }
424
425 #[inline]
426 fn write_u64(&mut self, i: u64) {
427 self.hasher.write_u64(i);
428 }
429}
430
431impl hash::Hasher for SipHasher24 {
432 #[inline]
433 fn write(&mut self, msg: &[u8]) {
434 self.hasher.write(msg)
435 }
436
437 #[inline]
438 fn finish(&self) -> u64 {
439 self.hasher.finish()
440 }
441
442 #[inline]
443 fn write_usize(&mut self, i: usize) {
444 self.hasher.write_usize(i);
445 }
446
447 #[inline]
448 fn write_u8(&mut self, i: u8) {
449 self.hasher.write_u8(i);
450 }
451
452 #[inline]
453 fn write_u16(&mut self, i: u16) {
454 self.hasher.write_u16(i);
455 }
456
457 #[inline]
458 fn write_u32(&mut self, i: u32) {
459 self.hasher.write_u32(i);
460 }
461
462 #[inline]
463 fn write_u64(&mut self, i: u64) {
464 self.hasher.write_u64(i);
465 }
466}
467
468impl<S: Sip> hash::Hasher for Hasher<S> {
469 #[inline]
470 fn write_usize(&mut self, i: usize) {
471 self.short_write(i, i.to_le() as u64);
472 }
473
474 #[inline]
475 fn write_u8(&mut self, i: u8) {
476 self.short_write(i, i as u64);
477 }
478
479 #[inline]
480 fn write_u32(&mut self, i: u32) {
481 self.short_write(i, i.to_le() as u64);
482 }
483
484 #[inline]
485 fn write_u64(&mut self, i: u64) {
486 self.short_write(i, i.to_le());
487 }
488
489 #[inline]
490 fn write(&mut self, msg: &[u8]) {
491 let length = msg.len();
492 self.length += length;
493
494 let mut needed = 0;
495
496 if self.ntail != 0 {
497 needed = 8 - self.ntail;
498 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
499 if length < needed {
500 self.ntail += length;
501 return;
502 } else {
503 self.state.v3 ^= self.tail;
504 S::c_rounds(&mut self.state);
505 self.state.v0 ^= self.tail;
506 self.ntail = 0;
507 }
508 }
509
510 // Buffered tail is now flushed, process new input.
511 let len = length - needed;
512 let left = len & 0x7;
513
514 let mut i = needed;
515 while i < len - left {
516 let mi = unsafe { load_int_le!(msg, i, u64) };
517
518 self.state.v3 ^= mi;
519 S::c_rounds(&mut self.state);
520 self.state.v0 ^= mi;
521
522 i += 8;
523 }
524
525 self.tail = unsafe { u8to64_le(msg, i, left) };
526 self.ntail = left;
527 }
528
529 #[inline]
530 fn finish(&self) -> u64 {
531 let mut state = self.state;
532
533 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
534
535 state.v3 ^= b;
536 S::c_rounds(&mut state);
537 state.v0 ^= b;
538
539 state.v2 ^= 0xff;
540 S::d_rounds(&mut state);
541
542 state.v0 ^ state.v1 ^ state.v2 ^ state.v3
543 }
544}
545
546impl<S: Sip> Default for Hasher<S> {
547 /// Creates a `Hasher<S>` with the two initial keys set to 0.
548 #[inline]
549 fn default() -> Hasher<S> {
550 Hasher::new_with_keys(key0:0, key1:0)
551 }
552}
553
554#[doc(hidden)]
555trait Sip {
556 fn c_rounds(_: &mut State);
557 fn d_rounds(_: &mut State);
558}
559
560#[derive(Debug, Clone, Copy, Default)]
561struct Sip13Rounds;
562
563impl Sip for Sip13Rounds {
564 #[inline]
565 fn c_rounds(state: &mut State) {
566 compress!(state);
567 }
568
569 #[inline]
570 fn d_rounds(state: &mut State) {
571 compress!(state);
572 compress!(state);
573 compress!(state);
574 }
575}
576
577#[derive(Debug, Clone, Copy, Default)]
578struct Sip24Rounds;
579
580impl Sip for Sip24Rounds {
581 #[inline]
582 fn c_rounds(state: &mut State) {
583 compress!(state);
584 compress!(state);
585 }
586
587 #[inline]
588 fn d_rounds(state: &mut State) {
589 compress!(state);
590 compress!(state);
591 compress!(state);
592 compress!(state);
593 }
594}
595