1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! An implementation of SipHash. |
12 | |
13 | use core::cmp; |
14 | use core::hash; |
15 | use core::hash::Hasher as _; |
16 | use core::marker::PhantomData; |
17 | use core::mem; |
18 | use core::ptr; |
19 | use core::u64; |
20 | |
21 | /// An implementation of SipHash 1-3. |
22 | /// |
23 | /// See: <https://www.aumasson.jp/siphash/siphash.pdf> |
24 | #[derive (Debug, Clone, Copy, Default)] |
25 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
26 | pub struct SipHasher13 { |
27 | hasher: Hasher<Sip13Rounds>, |
28 | } |
29 | |
30 | /// An implementation of SipHash 2-4. |
31 | /// |
32 | /// See: <https://www.aumasson.jp/siphash/siphash.pdf> |
33 | #[derive (Debug, Clone, Copy, Default)] |
34 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
35 | pub struct SipHasher24 { |
36 | hasher: Hasher<Sip24Rounds>, |
37 | } |
38 | |
39 | /// An implementation of SipHash 2-4. |
40 | /// |
41 | /// See: <https://www.aumasson.jp/siphash/siphash.pdf> |
42 | /// |
43 | /// SipHash is a general-purpose hashing function: it runs at a good |
44 | /// speed (competitive with Spooky and City) and permits strong _keyed_ |
45 | /// hashing. This lets you key your hashtables from a strong RNG, such as |
46 | /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html). |
47 | /// |
48 | /// Although the SipHash algorithm is considered to be generally strong, |
49 | /// it is not intended for cryptographic purposes. As such, all |
50 | /// cryptographic uses of this implementation are _strongly discouraged_. |
51 | #[derive (Debug, Clone, Copy, Default)] |
52 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
53 | pub struct SipHasher(SipHasher24); |
54 | |
55 | #[derive (Debug, Clone, Copy)] |
56 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
57 | struct Hasher<S: Sip> { |
58 | k0: u64, |
59 | k1: u64, |
60 | length: usize, // how many bytes we've processed |
61 | state: State, // hash State |
62 | tail: u64, // unprocessed bytes le |
63 | ntail: usize, // how many bytes in tail are valid |
64 | _marker: PhantomData<S>, |
65 | } |
66 | |
67 | #[derive (Debug, Clone, Copy)] |
68 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
69 | struct State { |
70 | // v0, v2 and v1, v3 show up in pairs in the algorithm, |
71 | // and simd implementations of SipHash will use vectors |
72 | // of v02 and v13. By placing them in this order in the struct, |
73 | // the compiler can pick up on just a few simd optimizations by itself. |
74 | v0: u64, |
75 | v2: u64, |
76 | v1: u64, |
77 | v3: u64, |
78 | } |
79 | |
80 | macro_rules! compress { |
81 | ($state:expr) => {{ |
82 | compress!($state.v0, $state.v1, $state.v2, $state.v3) |
83 | }}; |
84 | ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ |
85 | $v0 = $v0.wrapping_add($v1); |
86 | $v1 = $v1.rotate_left(13); |
87 | $v1 ^= $v0; |
88 | $v0 = $v0.rotate_left(32); |
89 | $v2 = $v2.wrapping_add($v3); |
90 | $v3 = $v3.rotate_left(16); |
91 | $v3 ^= $v2; |
92 | $v0 = $v0.wrapping_add($v3); |
93 | $v3 = $v3.rotate_left(21); |
94 | $v3 ^= $v0; |
95 | $v2 = $v2.wrapping_add($v1); |
96 | $v1 = $v1.rotate_left(17); |
97 | $v1 ^= $v2; |
98 | $v2 = $v2.rotate_left(32); |
99 | }}; |
100 | } |
101 | |
102 | /// Loads an integer of the desired type from a byte stream, in LE order. Uses |
103 | /// `copy_nonoverlapping` to let the compiler generate the most efficient way |
104 | /// to load it from a possibly unaligned address. |
105 | /// |
106 | /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)` |
107 | macro_rules! load_int_le { |
108 | ($buf:expr, $i:expr, $int_ty:ident) => {{ |
109 | debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len()); |
110 | let mut data = 0 as $int_ty; |
111 | ptr::copy_nonoverlapping( |
112 | $buf.as_ptr().add($i), |
113 | &mut data as *mut _ as *mut u8, |
114 | mem::size_of::<$int_ty>(), |
115 | ); |
116 | data.to_le() |
117 | }}; |
118 | } |
119 | |
120 | /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the |
121 | /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed |
122 | /// sizes and avoid calling `memcpy`, which is good for speed. |
123 | /// |
124 | /// Unsafe because: unchecked indexing at start..start+len |
125 | #[inline ] |
126 | unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { |
127 | debug_assert!(len < 8); |
128 | let mut i: usize = 0; // current byte index (from LSB) in the output u64 |
129 | let mut out: u64 = 0; |
130 | if i + 3 < len { |
131 | out = load_int_le!(buf, start + i, u32) as u64; |
132 | i += 4; |
133 | } |
134 | if i + 1 < len { |
135 | out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8); |
136 | i += 2 |
137 | } |
138 | if i < len { |
139 | out |= (*buf.get_unchecked(index:start + i) as u64) << (i * 8); |
140 | i += 1; |
141 | } |
142 | debug_assert_eq!(i, len); |
143 | out |
144 | } |
145 | |
146 | impl SipHasher { |
147 | /// Creates a new `SipHasher` with the two initial keys set to 0. |
148 | #[inline ] |
149 | pub fn new() -> SipHasher { |
150 | SipHasher::new_with_keys(0, 0) |
151 | } |
152 | |
153 | /// Creates a `SipHasher` that is keyed off the provided keys. |
154 | #[inline ] |
155 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher { |
156 | SipHasher(SipHasher24::new_with_keys(key0, key1)) |
157 | } |
158 | |
159 | /// Creates a `SipHasher` from a 16 byte key. |
160 | pub fn new_with_key(key: &[u8; 16]) -> SipHasher { |
161 | let mut b0 = [0u8; 8]; |
162 | let mut b1 = [0u8; 8]; |
163 | b0.copy_from_slice(&key[0..8]); |
164 | b1.copy_from_slice(&key[8..16]); |
165 | let key0 = u64::from_le_bytes(b0); |
166 | let key1 = u64::from_le_bytes(b1); |
167 | Self::new_with_keys(key0, key1) |
168 | } |
169 | |
170 | /// Get the keys used by this hasher |
171 | pub fn keys(&self) -> (u64, u64) { |
172 | (self.0.hasher.k0, self.0.hasher.k1) |
173 | } |
174 | |
175 | /// Get the key used by this hasher as a 16 byte vector |
176 | pub fn key(&self) -> [u8; 16] { |
177 | let mut bytes = [0u8; 16]; |
178 | bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes()); |
179 | bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes()); |
180 | bytes |
181 | } |
182 | |
183 | /// Hash a byte array - This is the easiest and safest way to use SipHash. |
184 | #[inline ] |
185 | pub fn hash(&self, bytes: &[u8]) -> u64 { |
186 | let mut hasher = self.0.hasher; |
187 | hasher.write(bytes); |
188 | hasher.finish() |
189 | } |
190 | } |
191 | |
192 | impl SipHasher13 { |
193 | /// Creates a new `SipHasher13` with the two initial keys set to 0. |
194 | #[inline ] |
195 | pub fn new() -> SipHasher13 { |
196 | SipHasher13::new_with_keys(0, 0) |
197 | } |
198 | |
199 | /// Creates a `SipHasher13` that is keyed off the provided keys. |
200 | #[inline ] |
201 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 { |
202 | SipHasher13 { |
203 | hasher: Hasher::new_with_keys(key0, key1), |
204 | } |
205 | } |
206 | |
207 | /// Creates a `SipHasher13` from a 16 byte key. |
208 | pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 { |
209 | let mut b0 = [0u8; 8]; |
210 | let mut b1 = [0u8; 8]; |
211 | b0.copy_from_slice(&key[0..8]); |
212 | b1.copy_from_slice(&key[8..16]); |
213 | let key0 = u64::from_le_bytes(b0); |
214 | let key1 = u64::from_le_bytes(b1); |
215 | Self::new_with_keys(key0, key1) |
216 | } |
217 | |
218 | /// Get the keys used by this hasher |
219 | pub fn keys(&self) -> (u64, u64) { |
220 | (self.hasher.k0, self.hasher.k1) |
221 | } |
222 | |
223 | /// Get the key used by this hasher as a 16 byte vector |
224 | pub fn key(&self) -> [u8; 16] { |
225 | let mut bytes = [0u8; 16]; |
226 | bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes()); |
227 | bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes()); |
228 | bytes |
229 | } |
230 | |
231 | /// Hash a byte array - This is the easiest and safest way to use SipHash. |
232 | #[inline ] |
233 | pub fn hash(&self, bytes: &[u8]) -> u64 { |
234 | let mut hasher = self.hasher; |
235 | hasher.write(bytes); |
236 | hasher.finish() |
237 | } |
238 | } |
239 | |
240 | impl SipHasher24 { |
241 | /// Creates a new `SipHasher24` with the two initial keys set to 0. |
242 | #[inline ] |
243 | pub fn new() -> SipHasher24 { |
244 | SipHasher24::new_with_keys(0, 0) |
245 | } |
246 | |
247 | /// Creates a `SipHasher24` that is keyed off the provided keys. |
248 | #[inline ] |
249 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 { |
250 | SipHasher24 { |
251 | hasher: Hasher::new_with_keys(key0, key1), |
252 | } |
253 | } |
254 | |
255 | /// Creates a `SipHasher24` from a 16 byte key. |
256 | pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 { |
257 | let mut b0 = [0u8; 8]; |
258 | let mut b1 = [0u8; 8]; |
259 | b0.copy_from_slice(&key[0..8]); |
260 | b1.copy_from_slice(&key[8..16]); |
261 | let key0 = u64::from_le_bytes(b0); |
262 | let key1 = u64::from_le_bytes(b1); |
263 | Self::new_with_keys(key0, key1) |
264 | } |
265 | |
266 | /// Get the keys used by this hasher |
267 | pub fn keys(&self) -> (u64, u64) { |
268 | (self.hasher.k0, self.hasher.k1) |
269 | } |
270 | |
271 | /// Get the key used by this hasher as a 16 byte vector |
272 | pub fn key(&self) -> [u8; 16] { |
273 | let mut bytes = [0u8; 16]; |
274 | bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes()); |
275 | bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes()); |
276 | bytes |
277 | } |
278 | |
279 | /// Hash a byte array - This is the easiest and safest way to use SipHash. |
280 | #[inline ] |
281 | pub fn hash(&self, bytes: &[u8]) -> u64 { |
282 | let mut hasher = self.hasher; |
283 | hasher.write(bytes); |
284 | hasher.finish() |
285 | } |
286 | } |
287 | |
288 | impl<S: Sip> Hasher<S> { |
289 | #[inline ] |
290 | fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> { |
291 | let mut state = Hasher { |
292 | k0: key0, |
293 | k1: key1, |
294 | length: 0, |
295 | state: State { |
296 | v0: 0, |
297 | v1: 0, |
298 | v2: 0, |
299 | v3: 0, |
300 | }, |
301 | tail: 0, |
302 | ntail: 0, |
303 | _marker: PhantomData, |
304 | }; |
305 | state.reset(); |
306 | state |
307 | } |
308 | |
309 | #[inline ] |
310 | fn reset(&mut self) { |
311 | self.length = 0; |
312 | self.state.v0 = self.k0 ^ 0x736f6d6570736575; |
313 | self.state.v1 = self.k1 ^ 0x646f72616e646f6d; |
314 | self.state.v2 = self.k0 ^ 0x6c7967656e657261; |
315 | self.state.v3 = self.k1 ^ 0x7465646279746573; |
316 | self.ntail = 0; |
317 | } |
318 | |
319 | // A specialized write function for values with size <= 8. |
320 | // |
321 | // The hashing of multi-byte integers depends on endianness. E.g.: |
322 | // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])` |
323 | // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])` |
324 | // |
325 | // This function does the right thing for little-endian hardware. On |
326 | // big-endian hardware `x` must be byte-swapped first to give the right |
327 | // behaviour. After any byte-swapping, the input must be zero-extended to |
328 | // 64-bits. The caller is responsible for the byte-swapping and |
329 | // zero-extension. |
330 | #[inline ] |
331 | fn short_write<T>(&mut self, _x: T, x: u64) { |
332 | let size = mem::size_of::<T>(); |
333 | self.length += size; |
334 | |
335 | // The original number must be zero-extended, not sign-extended. |
336 | debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true }); |
337 | |
338 | // The number of bytes needed to fill `self.tail`. |
339 | let needed = 8 - self.ntail; |
340 | |
341 | self.tail |= x << (8 * self.ntail); |
342 | if size < needed { |
343 | self.ntail += size; |
344 | return; |
345 | } |
346 | |
347 | // `self.tail` is full, process it. |
348 | self.state.v3 ^= self.tail; |
349 | S::c_rounds(&mut self.state); |
350 | self.state.v0 ^= self.tail; |
351 | |
352 | self.ntail = size - needed; |
353 | self.tail = if needed < 8 { x >> (8 * needed) } else { 0 }; |
354 | } |
355 | } |
356 | |
357 | impl hash::Hasher for SipHasher { |
358 | #[inline ] |
359 | fn write(&mut self, msg: &[u8]) { |
360 | self.0.write(msg) |
361 | } |
362 | |
363 | #[inline ] |
364 | fn finish(&self) -> u64 { |
365 | self.0.finish() |
366 | } |
367 | |
368 | #[inline ] |
369 | fn write_usize(&mut self, i: usize) { |
370 | self.0.write_usize(i); |
371 | } |
372 | |
373 | #[inline ] |
374 | fn write_u8(&mut self, i: u8) { |
375 | self.0.write_u8(i); |
376 | } |
377 | |
378 | #[inline ] |
379 | fn write_u16(&mut self, i: u16) { |
380 | self.0.write_u16(i); |
381 | } |
382 | |
383 | #[inline ] |
384 | fn write_u32(&mut self, i: u32) { |
385 | self.0.write_u32(i); |
386 | } |
387 | |
388 | #[inline ] |
389 | fn write_u64(&mut self, i: u64) { |
390 | self.0.write_u64(i); |
391 | } |
392 | } |
393 | |
394 | impl hash::Hasher for SipHasher13 { |
395 | #[inline ] |
396 | fn write(&mut self, msg: &[u8]) { |
397 | self.hasher.write(msg) |
398 | } |
399 | |
400 | #[inline ] |
401 | fn finish(&self) -> u64 { |
402 | self.hasher.finish() |
403 | } |
404 | |
405 | #[inline ] |
406 | fn write_usize(&mut self, i: usize) { |
407 | self.hasher.write_usize(i); |
408 | } |
409 | |
410 | #[inline ] |
411 | fn write_u8(&mut self, i: u8) { |
412 | self.hasher.write_u8(i); |
413 | } |
414 | |
415 | #[inline ] |
416 | fn write_u16(&mut self, i: u16) { |
417 | self.hasher.write_u16(i); |
418 | } |
419 | |
420 | #[inline ] |
421 | fn write_u32(&mut self, i: u32) { |
422 | self.hasher.write_u32(i); |
423 | } |
424 | |
425 | #[inline ] |
426 | fn write_u64(&mut self, i: u64) { |
427 | self.hasher.write_u64(i); |
428 | } |
429 | } |
430 | |
431 | impl hash::Hasher for SipHasher24 { |
432 | #[inline ] |
433 | fn write(&mut self, msg: &[u8]) { |
434 | self.hasher.write(msg) |
435 | } |
436 | |
437 | #[inline ] |
438 | fn finish(&self) -> u64 { |
439 | self.hasher.finish() |
440 | } |
441 | |
442 | #[inline ] |
443 | fn write_usize(&mut self, i: usize) { |
444 | self.hasher.write_usize(i); |
445 | } |
446 | |
447 | #[inline ] |
448 | fn write_u8(&mut self, i: u8) { |
449 | self.hasher.write_u8(i); |
450 | } |
451 | |
452 | #[inline ] |
453 | fn write_u16(&mut self, i: u16) { |
454 | self.hasher.write_u16(i); |
455 | } |
456 | |
457 | #[inline ] |
458 | fn write_u32(&mut self, i: u32) { |
459 | self.hasher.write_u32(i); |
460 | } |
461 | |
462 | #[inline ] |
463 | fn write_u64(&mut self, i: u64) { |
464 | self.hasher.write_u64(i); |
465 | } |
466 | } |
467 | |
468 | impl<S: Sip> hash::Hasher for Hasher<S> { |
469 | #[inline ] |
470 | fn write_usize(&mut self, i: usize) { |
471 | self.short_write(i, i.to_le() as u64); |
472 | } |
473 | |
474 | #[inline ] |
475 | fn write_u8(&mut self, i: u8) { |
476 | self.short_write(i, i as u64); |
477 | } |
478 | |
479 | #[inline ] |
480 | fn write_u32(&mut self, i: u32) { |
481 | self.short_write(i, i.to_le() as u64); |
482 | } |
483 | |
484 | #[inline ] |
485 | fn write_u64(&mut self, i: u64) { |
486 | self.short_write(i, i.to_le()); |
487 | } |
488 | |
489 | #[inline ] |
490 | fn write(&mut self, msg: &[u8]) { |
491 | let length = msg.len(); |
492 | self.length += length; |
493 | |
494 | let mut needed = 0; |
495 | |
496 | if self.ntail != 0 { |
497 | needed = 8 - self.ntail; |
498 | self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail); |
499 | if length < needed { |
500 | self.ntail += length; |
501 | return; |
502 | } else { |
503 | self.state.v3 ^= self.tail; |
504 | S::c_rounds(&mut self.state); |
505 | self.state.v0 ^= self.tail; |
506 | self.ntail = 0; |
507 | } |
508 | } |
509 | |
510 | // Buffered tail is now flushed, process new input. |
511 | let len = length - needed; |
512 | let left = len & 0x7; |
513 | |
514 | let mut i = needed; |
515 | while i < len - left { |
516 | let mi = unsafe { load_int_le!(msg, i, u64) }; |
517 | |
518 | self.state.v3 ^= mi; |
519 | S::c_rounds(&mut self.state); |
520 | self.state.v0 ^= mi; |
521 | |
522 | i += 8; |
523 | } |
524 | |
525 | self.tail = unsafe { u8to64_le(msg, i, left) }; |
526 | self.ntail = left; |
527 | } |
528 | |
529 | #[inline ] |
530 | fn finish(&self) -> u64 { |
531 | let mut state = self.state; |
532 | |
533 | let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail; |
534 | |
535 | state.v3 ^= b; |
536 | S::c_rounds(&mut state); |
537 | state.v0 ^= b; |
538 | |
539 | state.v2 ^= 0xff; |
540 | S::d_rounds(&mut state); |
541 | |
542 | state.v0 ^ state.v1 ^ state.v2 ^ state.v3 |
543 | } |
544 | } |
545 | |
546 | impl<S: Sip> Default for Hasher<S> { |
547 | /// Creates a `Hasher<S>` with the two initial keys set to 0. |
548 | #[inline ] |
549 | fn default() -> Hasher<S> { |
550 | Hasher::new_with_keys(key0:0, key1:0) |
551 | } |
552 | } |
553 | |
554 | #[doc (hidden)] |
555 | trait Sip { |
556 | fn c_rounds(_: &mut State); |
557 | fn d_rounds(_: &mut State); |
558 | } |
559 | |
560 | #[derive (Debug, Clone, Copy, Default)] |
561 | struct Sip13Rounds; |
562 | |
563 | impl Sip for Sip13Rounds { |
564 | #[inline ] |
565 | fn c_rounds(state: &mut State) { |
566 | compress!(state); |
567 | } |
568 | |
569 | #[inline ] |
570 | fn d_rounds(state: &mut State) { |
571 | compress!(state); |
572 | compress!(state); |
573 | compress!(state); |
574 | } |
575 | } |
576 | |
577 | #[derive (Debug, Clone, Copy, Default)] |
578 | struct Sip24Rounds; |
579 | |
580 | impl Sip for Sip24Rounds { |
581 | #[inline ] |
582 | fn c_rounds(state: &mut State) { |
583 | compress!(state); |
584 | compress!(state); |
585 | } |
586 | |
587 | #[inline ] |
588 | fn d_rounds(state: &mut State) { |
589 | compress!(state); |
590 | compress!(state); |
591 | compress!(state); |
592 | compress!(state); |
593 | } |
594 | } |
595 | |