1 | // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at |
3 | // http://rust-lang.org/COPYRIGHT. |
4 | // |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | // option. This file may not be copied, modified, or distributed |
9 | // except according to those terms. |
10 | |
11 | //! An implementation of SipHash. |
12 | |
13 | use core::cmp; |
14 | use core::hash; |
15 | use core::marker::PhantomData; |
16 | use core::mem; |
17 | use core::ptr; |
18 | use core::u64; |
19 | |
20 | /// An implementation of SipHash 1-3. |
21 | /// |
22 | /// See: <https://www.aumasson.jp/siphash/siphash.pdf> |
23 | #[derive (Debug, Clone, Copy, Default)] |
24 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
25 | pub struct SipHasher13 { |
26 | hasher: Hasher<Sip13Rounds>, |
27 | } |
28 | |
29 | /// An implementation of SipHash 2-4. |
30 | /// |
31 | /// See: <https://www.aumasson.jp/siphash/siphash.pdf> |
32 | #[derive (Debug, Clone, Copy, Default)] |
33 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
34 | pub struct SipHasher24 { |
35 | hasher: Hasher<Sip24Rounds>, |
36 | } |
37 | |
38 | /// An implementation of SipHash 2-4. |
39 | /// |
40 | /// See: <https://www.aumasson.jp/siphash/siphash.pdf> |
41 | /// |
42 | /// SipHash is a general-purpose hashing function: it runs at a good |
43 | /// speed (competitive with Spooky and City) and permits strong _keyed_ |
44 | /// hashing. This lets you key your hashtables from a strong RNG, such as |
45 | /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html). |
46 | /// |
47 | /// Although the SipHash algorithm is considered to be generally strong, |
48 | /// it is not intended for cryptographic purposes. As such, all |
49 | /// cryptographic uses of this implementation are _strongly discouraged_. |
50 | #[derive (Debug, Clone, Copy, Default)] |
51 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
52 | pub struct SipHasher(SipHasher24); |
53 | |
54 | #[derive (Debug, Clone, Copy)] |
55 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
56 | struct Hasher<S: Sip> { |
57 | k0: u64, |
58 | k1: u64, |
59 | length: usize, // how many bytes we've processed |
60 | state: State, // hash State |
61 | tail: u64, // unprocessed bytes le |
62 | ntail: usize, // how many bytes in tail are valid |
63 | _marker: PhantomData<S>, |
64 | } |
65 | |
66 | #[derive (Debug, Clone, Copy)] |
67 | #[cfg_attr (feature = "serde" , derive(serde::Serialize, serde::Deserialize))] |
68 | struct State { |
69 | // v0, v2 and v1, v3 show up in pairs in the algorithm, |
70 | // and simd implementations of SipHash will use vectors |
71 | // of v02 and v13. By placing them in this order in the struct, |
72 | // the compiler can pick up on just a few simd optimizations by itself. |
73 | v0: u64, |
74 | v2: u64, |
75 | v1: u64, |
76 | v3: u64, |
77 | } |
78 | |
79 | macro_rules! compress { |
80 | ($state:expr) => {{ |
81 | compress!($state.v0, $state.v1, $state.v2, $state.v3) |
82 | }}; |
83 | ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ |
84 | $v0 = $v0.wrapping_add($v1); |
85 | $v1 = $v1.rotate_left(13); |
86 | $v1 ^= $v0; |
87 | $v0 = $v0.rotate_left(32); |
88 | $v2 = $v2.wrapping_add($v3); |
89 | $v3 = $v3.rotate_left(16); |
90 | $v3 ^= $v2; |
91 | $v0 = $v0.wrapping_add($v3); |
92 | $v3 = $v3.rotate_left(21); |
93 | $v3 ^= $v0; |
94 | $v2 = $v2.wrapping_add($v1); |
95 | $v1 = $v1.rotate_left(17); |
96 | $v1 ^= $v2; |
97 | $v2 = $v2.rotate_left(32); |
98 | }}; |
99 | } |
100 | |
101 | /// Loads an integer of the desired type from a byte stream, in LE order. Uses |
102 | /// `copy_nonoverlapping` to let the compiler generate the most efficient way |
103 | /// to load it from a possibly unaligned address. |
104 | /// |
105 | /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)` |
106 | macro_rules! load_int_le { |
107 | ($buf:expr, $i:expr, $int_ty:ident) => {{ |
108 | debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len()); |
109 | let mut data = 0 as $int_ty; |
110 | ptr::copy_nonoverlapping( |
111 | $buf.as_ptr().add($i), |
112 | &mut data as *mut _ as *mut u8, |
113 | mem::size_of::<$int_ty>(), |
114 | ); |
115 | data.to_le() |
116 | }}; |
117 | } |
118 | |
119 | /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the |
120 | /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed |
121 | /// sizes and avoid calling `memcpy`, which is good for speed. |
122 | /// |
123 | /// Unsafe because: unchecked indexing at start..start+len |
124 | #[inline ] |
125 | unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 { |
126 | debug_assert!(len < 8); |
127 | let mut i: usize = 0; // current byte index (from LSB) in the output u64 |
128 | let mut out: u64 = 0; |
129 | if i + 3 < len { |
130 | out = load_int_le!(buf, start + i, u32) as u64; |
131 | i += 4; |
132 | } |
133 | if i + 1 < len { |
134 | out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8); |
135 | i += 2 |
136 | } |
137 | if i < len { |
138 | out |= (*buf.get_unchecked(index:start + i) as u64) << (i * 8); |
139 | i += 1; |
140 | } |
141 | debug_assert_eq!(i, len); |
142 | out |
143 | } |
144 | |
145 | impl SipHasher { |
146 | /// Creates a new `SipHasher` with the two initial keys set to 0. |
147 | #[inline ] |
148 | pub fn new() -> SipHasher { |
149 | SipHasher::new_with_keys(0, 0) |
150 | } |
151 | |
152 | /// Creates a `SipHasher` that is keyed off the provided keys. |
153 | #[inline ] |
154 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher { |
155 | SipHasher(SipHasher24::new_with_keys(key0, key1)) |
156 | } |
157 | |
158 | /// Creates a `SipHasher` from a 16 byte key. |
159 | pub fn new_with_key(key: &[u8; 16]) -> SipHasher { |
160 | let mut b0 = [0u8; 8]; |
161 | let mut b1 = [0u8; 8]; |
162 | b0.copy_from_slice(&key[0..8]); |
163 | b1.copy_from_slice(&key[8..16]); |
164 | let key0 = u64::from_le_bytes(b0); |
165 | let key1 = u64::from_le_bytes(b1); |
166 | Self::new_with_keys(key0, key1) |
167 | } |
168 | |
169 | /// Get the keys used by this hasher |
170 | pub fn keys(&self) -> (u64, u64) { |
171 | (self.0.hasher.k0, self.0.hasher.k1) |
172 | } |
173 | |
174 | /// Get the key used by this hasher as a 16 byte vector |
175 | pub fn key(&self) -> [u8; 16] { |
176 | let mut bytes = [0u8; 16]; |
177 | bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes()); |
178 | bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes()); |
179 | bytes |
180 | } |
181 | } |
182 | |
183 | impl SipHasher13 { |
184 | /// Creates a new `SipHasher13` with the two initial keys set to 0. |
185 | #[inline ] |
186 | pub fn new() -> SipHasher13 { |
187 | SipHasher13::new_with_keys(0, 0) |
188 | } |
189 | |
190 | /// Creates a `SipHasher13` that is keyed off the provided keys. |
191 | #[inline ] |
192 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 { |
193 | SipHasher13 { |
194 | hasher: Hasher::new_with_keys(key0, key1), |
195 | } |
196 | } |
197 | |
198 | /// Creates a `SipHasher13` from a 16 byte key. |
199 | pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 { |
200 | let mut b0 = [0u8; 8]; |
201 | let mut b1 = [0u8; 8]; |
202 | b0.copy_from_slice(&key[0..8]); |
203 | b1.copy_from_slice(&key[8..16]); |
204 | let key0 = u64::from_le_bytes(b0); |
205 | let key1 = u64::from_le_bytes(b1); |
206 | Self::new_with_keys(key0, key1) |
207 | } |
208 | |
209 | /// Get the keys used by this hasher |
210 | pub fn keys(&self) -> (u64, u64) { |
211 | (self.hasher.k0, self.hasher.k1) |
212 | } |
213 | |
214 | /// Get the key used by this hasher as a 16 byte vector |
215 | pub fn key(&self) -> [u8; 16] { |
216 | let mut bytes = [0u8; 16]; |
217 | bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes()); |
218 | bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes()); |
219 | bytes |
220 | } |
221 | } |
222 | |
223 | impl SipHasher24 { |
224 | /// Creates a new `SipHasher24` with the two initial keys set to 0. |
225 | #[inline ] |
226 | pub fn new() -> SipHasher24 { |
227 | SipHasher24::new_with_keys(0, 0) |
228 | } |
229 | |
230 | /// Creates a `SipHasher24` that is keyed off the provided keys. |
231 | #[inline ] |
232 | pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 { |
233 | SipHasher24 { |
234 | hasher: Hasher::new_with_keys(key0, key1), |
235 | } |
236 | } |
237 | |
238 | /// Creates a `SipHasher24` from a 16 byte key. |
239 | pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 { |
240 | let mut b0 = [0u8; 8]; |
241 | let mut b1 = [0u8; 8]; |
242 | b0.copy_from_slice(&key[0..8]); |
243 | b1.copy_from_slice(&key[8..16]); |
244 | let key0 = u64::from_le_bytes(b0); |
245 | let key1 = u64::from_le_bytes(b1); |
246 | Self::new_with_keys(key0, key1) |
247 | } |
248 | |
249 | /// Get the keys used by this hasher |
250 | pub fn keys(&self) -> (u64, u64) { |
251 | (self.hasher.k0, self.hasher.k1) |
252 | } |
253 | |
254 | /// Get the key used by this hasher as a 16 byte vector |
255 | pub fn key(&self) -> [u8; 16] { |
256 | let mut bytes = [0u8; 16]; |
257 | bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes()); |
258 | bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes()); |
259 | bytes |
260 | } |
261 | } |
262 | |
263 | impl<S: Sip> Hasher<S> { |
264 | #[inline ] |
265 | fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> { |
266 | let mut state = Hasher { |
267 | k0: key0, |
268 | k1: key1, |
269 | length: 0, |
270 | state: State { |
271 | v0: 0, |
272 | v1: 0, |
273 | v2: 0, |
274 | v3: 0, |
275 | }, |
276 | tail: 0, |
277 | ntail: 0, |
278 | _marker: PhantomData, |
279 | }; |
280 | state.reset(); |
281 | state |
282 | } |
283 | |
284 | #[inline ] |
285 | fn reset(&mut self) { |
286 | self.length = 0; |
287 | self.state.v0 = self.k0 ^ 0x736f6d6570736575; |
288 | self.state.v1 = self.k1 ^ 0x646f72616e646f6d; |
289 | self.state.v2 = self.k0 ^ 0x6c7967656e657261; |
290 | self.state.v3 = self.k1 ^ 0x7465646279746573; |
291 | self.ntail = 0; |
292 | } |
293 | |
294 | // A specialized write function for values with size <= 8. |
295 | // |
296 | // The hashing of multi-byte integers depends on endianness. E.g.: |
297 | // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])` |
298 | // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])` |
299 | // |
300 | // This function does the right thing for little-endian hardware. On |
301 | // big-endian hardware `x` must be byte-swapped first to give the right |
302 | // behaviour. After any byte-swapping, the input must be zero-extended to |
303 | // 64-bits. The caller is responsible for the byte-swapping and |
304 | // zero-extension. |
305 | #[inline ] |
306 | fn short_write<T>(&mut self, _x: T, x: u64) { |
307 | let size = mem::size_of::<T>(); |
308 | self.length += size; |
309 | |
310 | // The original number must be zero-extended, not sign-extended. |
311 | debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true }); |
312 | |
313 | // The number of bytes needed to fill `self.tail`. |
314 | let needed = 8 - self.ntail; |
315 | |
316 | self.tail |= x << (8 * self.ntail); |
317 | if size < needed { |
318 | self.ntail += size; |
319 | return; |
320 | } |
321 | |
322 | // `self.tail` is full, process it. |
323 | self.state.v3 ^= self.tail; |
324 | S::c_rounds(&mut self.state); |
325 | self.state.v0 ^= self.tail; |
326 | |
327 | self.ntail = size - needed; |
328 | self.tail = if needed < 8 { x >> (8 * needed) } else { 0 }; |
329 | } |
330 | } |
331 | |
332 | impl hash::Hasher for SipHasher { |
333 | #[inline ] |
334 | fn write(&mut self, msg: &[u8]) { |
335 | self.0.write(msg) |
336 | } |
337 | |
338 | #[inline ] |
339 | fn finish(&self) -> u64 { |
340 | self.0.finish() |
341 | } |
342 | |
343 | #[inline ] |
344 | fn write_usize(&mut self, i: usize) { |
345 | self.0.write_usize(i); |
346 | } |
347 | |
348 | #[inline ] |
349 | fn write_u8(&mut self, i: u8) { |
350 | self.0.write_u8(i); |
351 | } |
352 | |
353 | #[inline ] |
354 | fn write_u16(&mut self, i: u16) { |
355 | self.0.write_u16(i); |
356 | } |
357 | |
358 | #[inline ] |
359 | fn write_u32(&mut self, i: u32) { |
360 | self.0.write_u32(i); |
361 | } |
362 | |
363 | #[inline ] |
364 | fn write_u64(&mut self, i: u64) { |
365 | self.0.write_u64(i); |
366 | } |
367 | } |
368 | |
369 | impl hash::Hasher for SipHasher13 { |
370 | #[inline ] |
371 | fn write(&mut self, msg: &[u8]) { |
372 | self.hasher.write(msg) |
373 | } |
374 | |
375 | #[inline ] |
376 | fn finish(&self) -> u64 { |
377 | self.hasher.finish() |
378 | } |
379 | |
380 | #[inline ] |
381 | fn write_usize(&mut self, i: usize) { |
382 | self.hasher.write_usize(i); |
383 | } |
384 | |
385 | #[inline ] |
386 | fn write_u8(&mut self, i: u8) { |
387 | self.hasher.write_u8(i); |
388 | } |
389 | |
390 | #[inline ] |
391 | fn write_u16(&mut self, i: u16) { |
392 | self.hasher.write_u16(i); |
393 | } |
394 | |
395 | #[inline ] |
396 | fn write_u32(&mut self, i: u32) { |
397 | self.hasher.write_u32(i); |
398 | } |
399 | |
400 | #[inline ] |
401 | fn write_u64(&mut self, i: u64) { |
402 | self.hasher.write_u64(i); |
403 | } |
404 | } |
405 | |
406 | impl hash::Hasher for SipHasher24 { |
407 | #[inline ] |
408 | fn write(&mut self, msg: &[u8]) { |
409 | self.hasher.write(msg) |
410 | } |
411 | |
412 | #[inline ] |
413 | fn finish(&self) -> u64 { |
414 | self.hasher.finish() |
415 | } |
416 | |
417 | #[inline ] |
418 | fn write_usize(&mut self, i: usize) { |
419 | self.hasher.write_usize(i); |
420 | } |
421 | |
422 | #[inline ] |
423 | fn write_u8(&mut self, i: u8) { |
424 | self.hasher.write_u8(i); |
425 | } |
426 | |
427 | #[inline ] |
428 | fn write_u16(&mut self, i: u16) { |
429 | self.hasher.write_u16(i); |
430 | } |
431 | |
432 | #[inline ] |
433 | fn write_u32(&mut self, i: u32) { |
434 | self.hasher.write_u32(i); |
435 | } |
436 | |
437 | #[inline ] |
438 | fn write_u64(&mut self, i: u64) { |
439 | self.hasher.write_u64(i); |
440 | } |
441 | } |
442 | |
443 | impl<S: Sip> hash::Hasher for Hasher<S> { |
444 | #[inline ] |
445 | fn write_usize(&mut self, i: usize) { |
446 | self.short_write(i, i.to_le() as u64); |
447 | } |
448 | |
449 | #[inline ] |
450 | fn write_u8(&mut self, i: u8) { |
451 | self.short_write(i, i as u64); |
452 | } |
453 | |
454 | #[inline ] |
455 | fn write_u32(&mut self, i: u32) { |
456 | self.short_write(i, i.to_le() as u64); |
457 | } |
458 | |
459 | #[inline ] |
460 | fn write_u64(&mut self, i: u64) { |
461 | self.short_write(i, i.to_le() as u64); |
462 | } |
463 | |
464 | #[inline ] |
465 | fn write(&mut self, msg: &[u8]) { |
466 | let length = msg.len(); |
467 | self.length += length; |
468 | |
469 | let mut needed = 0; |
470 | |
471 | if self.ntail != 0 { |
472 | needed = 8 - self.ntail; |
473 | self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail); |
474 | if length < needed { |
475 | self.ntail += length; |
476 | return; |
477 | } else { |
478 | self.state.v3 ^= self.tail; |
479 | S::c_rounds(&mut self.state); |
480 | self.state.v0 ^= self.tail; |
481 | self.ntail = 0; |
482 | } |
483 | } |
484 | |
485 | // Buffered tail is now flushed, process new input. |
486 | let len = length - needed; |
487 | let left = len & 0x7; |
488 | |
489 | let mut i = needed; |
490 | while i < len - left { |
491 | let mi = unsafe { load_int_le!(msg, i, u64) }; |
492 | |
493 | self.state.v3 ^= mi; |
494 | S::c_rounds(&mut self.state); |
495 | self.state.v0 ^= mi; |
496 | |
497 | i += 8; |
498 | } |
499 | |
500 | self.tail = unsafe { u8to64_le(msg, i, left) }; |
501 | self.ntail = left; |
502 | } |
503 | |
504 | #[inline ] |
505 | fn finish(&self) -> u64 { |
506 | let mut state = self.state; |
507 | |
508 | let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail; |
509 | |
510 | state.v3 ^= b; |
511 | S::c_rounds(&mut state); |
512 | state.v0 ^= b; |
513 | |
514 | state.v2 ^= 0xff; |
515 | S::d_rounds(&mut state); |
516 | |
517 | state.v0 ^ state.v1 ^ state.v2 ^ state.v3 |
518 | } |
519 | } |
520 | |
521 | impl<S: Sip> Default for Hasher<S> { |
522 | /// Creates a `Hasher<S>` with the two initial keys set to 0. |
523 | #[inline ] |
524 | fn default() -> Hasher<S> { |
525 | Hasher::new_with_keys(key0:0, key1:0) |
526 | } |
527 | } |
528 | |
529 | #[doc (hidden)] |
530 | trait Sip { |
531 | fn c_rounds(_: &mut State); |
532 | fn d_rounds(_: &mut State); |
533 | } |
534 | |
535 | #[derive (Debug, Clone, Copy, Default)] |
536 | struct Sip13Rounds; |
537 | |
538 | impl Sip for Sip13Rounds { |
539 | #[inline ] |
540 | fn c_rounds(state: &mut State) { |
541 | compress!(state); |
542 | } |
543 | |
544 | #[inline ] |
545 | fn d_rounds(state: &mut State) { |
546 | compress!(state); |
547 | compress!(state); |
548 | compress!(state); |
549 | } |
550 | } |
551 | |
552 | #[derive (Debug, Clone, Copy, Default)] |
553 | struct Sip24Rounds; |
554 | |
555 | impl Sip for Sip24Rounds { |
556 | #[inline ] |
557 | fn c_rounds(state: &mut State) { |
558 | compress!(state); |
559 | compress!(state); |
560 | } |
561 | |
562 | #[inline ] |
563 | fn d_rounds(state: &mut State) { |
564 | compress!(state); |
565 | compress!(state); |
566 | compress!(state); |
567 | compress!(state); |
568 | } |
569 | } |
570 | |