sip128.rs source code [crates/siphasher/src/sip128.rs]

1	// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2	// file at the top-level directory of this distribution and at
3	// http://rust-lang.org/COPYRIGHT.
4	//
5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8	// option. This file may not be copied, modified, or distributed
9	// except according to those terms.
10
11	//! An implementation of SipHash with a 128-bit output.
12
13	use core::cmp;
14	use core::hash;
15	use core::marker::PhantomData;
16	use core::mem;
17	use core::ptr;
18	use core::u64;
19
20	/// A 128-bit (2x64) hash output
21	#[derive(Debug, Clone, Copy, Default)]
22	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
23	pub struct Hash128 {
24	pub h1: u64,
25	pub h2: u64,
26	}
27
28	impl From<u128> for Hash128 {
29	fn from(v: u128) -> Self {
30	Hash128 {
31	h1: v as u64,
32	h2: (v >> `64`) as u64,
33	}
34	}
35	}
36
37	impl From<Hash128> for u128 {
38	fn from(h: Hash128) -> u128 {
39	(h.h1 as u128) \| ((h.h2 as u128) << `64`)
40	}
41	}
42
43	/// An implementation of SipHash128 1-3.
44	#[derive(Debug, Clone, Copy, Default)]
45	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46	pub struct SipHasher13 {
47	hasher: Hasher<Sip13Rounds>,
48	}
49
50	/// An implementation of SipHash128 2-4.
51	#[derive(Debug, Clone, Copy, Default)]
52	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53	pub struct SipHasher24 {
54	hasher: Hasher<Sip24Rounds>,
55	}
56
57	/// An implementation of SipHash128 2-4.
58	///
59	/// SipHash is a general-purpose hashing function: it runs at a good
60	/// speed (competitive with Spooky and City) and permits strong _keyed_
61	/// hashing. This lets you key your hashtables from a strong RNG, such as
62	/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
63	///
64	/// Although the SipHash algorithm is considered to be generally strong,
65	/// it is not intended for cryptographic purposes. As such, all
66	/// cryptographic uses of this implementation are _strongly discouraged_.
67	#[derive(Debug, Clone, Copy, Default)]
68	pub struct SipHasher(SipHasher24);
69
70	#[derive(Debug, Copy)]
71	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
72	struct Hasher<S: Sip> {
73	k0: u64,
74	k1: u64,
75	length: usize, // how many bytes we've processed
76	state: State, // hash State
77	tail: u64, // unprocessed bytes le
78	ntail: usize, // how many bytes in tail are valid
79	_marker: PhantomData<S>,
80	}
81
82	#[derive(Debug, Clone, Copy)]
83	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
84	struct State {
85	// v0, v2 and v1, v3 show up in pairs in the algorithm,
86	// and simd implementations of SipHash will use vectors
87	// of v02 and v13. By placing them in this order in the struct,
88	// the compiler can pick up on just a few simd optimizations by itself.
89	v0: u64,
90	v2: u64,
91	v1: u64,
92	v3: u64,
93	}
94
95	macro_rules! compress {
96	($state:expr) => {{
97	compress!($state.v0, $state.v1, $state.v2, $state.v3)
98	}};
99	($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
100	$v0 = $v0.wrapping_add($v1);
101	$v1 = $v1.rotate_left(`13`);
102	$v1 ^= $v0;
103	$v0 = $v0.rotate_left(`32`);
104	$v2 = $v2.wrapping_add($v3);
105	$v3 = $v3.rotate_left(`16`);
106	$v3 ^= $v2;
107	$v0 = $v0.wrapping_add($v3);
108	$v3 = $v3.rotate_left(`21`);
109	$v3 ^= $v0;
110	$v2 = $v2.wrapping_add($v1);
111	$v1 = $v1.rotate_left(`17`);
112	$v1 ^= $v2;
113	$v2 = $v2.rotate_left(`32`);
114	}};
115	}
116
117	/// Loads an integer of the desired type from a byte stream, in LE order. Uses
118	/// `copy_nonoverlapping` to let the compiler generate the most efficient way
119	/// to load it from a possibly unaligned address.
120	///
121	/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
122	macro_rules! load_int_le {
123	($buf:expr, $i:expr, $int_ty:ident) => {{
124	debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
125	let mut data = `0` as $int_ty;
126	ptr::copy_nonoverlapping(
127	$buf.as_ptr().add($i),
128	&mut data as *mut _ as *mut u8,
129	mem::size_of::<$int_ty>(),
130	);
131	data.to_le()
132	}};
133	}
134
135	/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
136	/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
137	/// sizes and avoid calling `memcpy`, which is good for speed.
138	///
139	/// Unsafe because: unchecked indexing at start..start+len
140	#[inline]
141	unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
142	debug_assert!(len < `8`);
143	let mut i: usize = `0`; // current byte index (from LSB) in the output u64
144	let mut out: u64 = `0`;
145	if i + `3` < len {
146	out = load_int_le!(buf, start + i, u32) as u64;
147	i += `4`;
148	}
149	if i + `1` < len {
150	out \|= (load_int_le!(buf, start + i, u16) as u64) << (i * `8`);
151	i += `2`
152	}
153	if i < len {
154	out \|= (buf.get_unchecked(index:start + i) as u64) << (i `8`);
155	i += `1`;
156	}
157	debug_assert_eq!(i, len);
158	out
159	}
160
161	pub trait Hasher128 {
162	/// Return a 128-bit hash
163	fn finish128(&self) -> Hash128;
164	}
165
166	impl SipHasher {
167	/// Creates a new `SipHasher` with the two initial keys set to 0.
168	#[inline]
169	pub fn new() -> SipHasher {
170	SipHasher::new_with_keys(`0`, `0`)
171	}
172
173	/// Creates a `SipHasher` that is keyed off the provided keys.
174	#[inline]
175	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
176	SipHasher(SipHasher24::new_with_keys(key0, key1))
177	}
178
179	/// Creates a `SipHasher` from a 16 byte key.
180	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher {
181	let mut b0 = [`0u8`; `8`];
182	let mut b1 = [`0u8`; `8`];
183	b0.copy_from_slice(&key[`0`..`8`]);
184	b1.copy_from_slice(&key[`8`..`16`]);
185	let key0 = u64::from_le_bytes(b0);
186	let key1 = u64::from_le_bytes(b1);
187	Self::new_with_keys(key0, key1)
188	}
189
190	/// Get the keys used by this hasher
191	pub fn keys(&self) -> (u64, u64) {
192	(self.0.hasher.k0, self.0.hasher.k1)
193	}
194
195	/// Get the key used by this hasher as a 16 byte vector
196	pub fn key(&self) -> [u8; `16`] {
197	let mut bytes = [`0u8`; `16`];
198	bytes[`0`..`8`].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
199	bytes[`8`..`16`].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
200	bytes
201	}
202	}
203
204	impl Hasher128 for SipHasher {
205	/// Return a 128-bit hash
206	#[inline]
207	fn finish128(&self) -> Hash128 {
208	self.0.finish128()
209	}
210	}
211
212	impl SipHasher13 {
213	/// Creates a new `SipHasher13` with the two initial keys set to 0.
214	#[inline]
215	pub fn new() -> SipHasher13 {
216	SipHasher13::new_with_keys(`0`, `0`)
217	}
218
219	/// Creates a `SipHasher13` that is keyed off the provided keys.
220	#[inline]
221	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
222	SipHasher13 {
223	hasher: Hasher::new_with_keys(key0, key1),
224	}
225	}
226
227	/// Creates a `SipHasher13` from a 16 byte key.
228	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher13 {
229	let mut b0 = [`0u8`; `8`];
230	let mut b1 = [`0u8`; `8`];
231	b0.copy_from_slice(&key[`0`..`8`]);
232	b1.copy_from_slice(&key[`8`..`16`]);
233	let key0 = u64::from_le_bytes(b0);
234	let key1 = u64::from_le_bytes(b1);
235	Self::new_with_keys(key0, key1)
236	}
237
238	/// Get the keys used by this hasher
239	pub fn keys(&self) -> (u64, u64) {
240	(self.hasher.k0, self.hasher.k1)
241	}
242
243	/// Get the key used by this hasher as a 16 byte vector
244	pub fn key(&self) -> [u8; `16`] {
245	let mut bytes = [`0u8`; `16`];
246	bytes[`0`..`8`].copy_from_slice(&self.hasher.k0.to_le_bytes());
247	bytes[`8`..`16`].copy_from_slice(&self.hasher.k1.to_le_bytes());
248	bytes
249	}
250	}
251
252	impl Hasher128 for SipHasher13 {
253	/// Return a 128-bit hash
254	#[inline]
255	fn finish128(&self) -> Hash128 {
256	self.hasher.finish128()
257	}
258	}
259
260	impl SipHasher24 {
261	/// Creates a new `SipHasher24` with the two initial keys set to 0.
262	#[inline]
263	pub fn new() -> SipHasher24 {
264	SipHasher24::new_with_keys(`0`, `0`)
265	}
266
267	/// Creates a `SipHasher24` that is keyed off the provided keys.
268	#[inline]
269	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
270	SipHasher24 {
271	hasher: Hasher::new_with_keys(key0, key1),
272	}
273	}
274
275	/// Creates a `SipHasher24` from a 16 byte key.
276	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher24 {
277	let mut b0 = [`0u8`; `8`];
278	let mut b1 = [`0u8`; `8`];
279	b0.copy_from_slice(&key[`0`..`8`]);
280	b1.copy_from_slice(&key[`8`..`16`]);
281	let key0 = u64::from_le_bytes(b0);
282	let key1 = u64::from_le_bytes(b1);
283	Self::new_with_keys(key0, key1)
284	}
285
286	/// Get the keys used by this hasher
287	pub fn keys(&self) -> (u64, u64) {
288	(self.hasher.k0, self.hasher.k1)
289	}
290
291	/// Get the key used by this hasher as a 16 byte vector
292	pub fn key(&self) -> [u8; `16`] {
293	let mut bytes = [`0u8`; `16`];
294	bytes[`0`..`8`].copy_from_slice(&self.hasher.k0.to_le_bytes());
295	bytes[`8`..`16`].copy_from_slice(&self.hasher.k1.to_le_bytes());
296	bytes
297	}
298	}
299
300	impl Hasher128 for SipHasher24 {
301	/// Return a 128-bit hash
302	#[inline]
303	fn finish128(&self) -> Hash128 {
304	self.hasher.finish128()
305	}
306	}
307
308	impl<S: Sip> Hasher<S> {
309	#[inline]
310	fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
311	let mut state = Hasher {
312	k0: key0,
313	k1: key1,
314	length: `0`,
315	state: State {
316	v0: `0`,
317	v1: `0xee`,
318	v2: `0`,
319	v3: `0`,
320	},
321	tail: `0`,
322	ntail: `0`,
323	_marker: PhantomData,
324	};
325	state.reset();
326	state
327	}
328
329	#[inline]
330	fn reset(&mut self) {
331	self.length = `0`;
332	self.state.v0 = self.k0 ^ `0x736f6d6570736575`;
333	self.state.v1 = self.k1 ^ `0x646f72616e646f83`;
334	self.state.v2 = self.k0 ^ `0x6c7967656e657261`;
335	self.state.v3 = self.k1 ^ `0x7465646279746573`;
336	self.ntail = `0`;
337	}
338
339	// A specialized write function for values with size <= 8.
340	//
341	// The hashing of multi-byte integers depends on endianness. E.g.:
342	// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
343	// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
344	//
345	// This function does the right thing for little-endian hardware. On
346	// big-endian hardware `x` must be byte-swapped first to give the right
347	// behaviour. After any byte-swapping, the input must be zero-extended to
348	// 64-bits. The caller is responsible for the byte-swapping and
349	// zero-extension.
350	#[inline]
351	fn short_write<T>(&mut self, _x: T, x: u64) {
352	let size = mem::size_of::<T>();
353	self.length += size;
354
355	// The original number must be zero-extended, not sign-extended.
356	debug_assert!(if size < `8` { x >> (`8` * size) == `0` } else { `true` });
357
358	// The number of bytes needed to fill `self.tail`.
359	let needed = `8` - self.ntail;
360
361	self.tail \|= x << (`8` * self.ntail);
362	if size < needed {
363	self.ntail += size;
364	return;
365	}
366
367	// `self.tail` is full, process it.
368	self.state.v3 ^= self.tail;
369	S::c_rounds(&mut self.state);
370	self.state.v0 ^= self.tail;
371
372	self.ntail = size - needed;
373	self.tail = if needed < `8` { x >> (`8` * needed) } else { `0` };
374	}
375	}
376
377	impl<S: Sip> Hasher<S> {
378	#[inline]
379	pub fn finish128(&self) -> Hash128 {
380	let mut state: State = self.state;
381
382	let b: u64 = ((self.length as u64 & `0xff`) << `56`) \| self.tail;
383
384	state.v3 ^= b;
385	S::c_rounds(&mut state);
386	state.v0 ^= b;
387
388	state.v2 ^= `0xee`;
389	S::d_rounds(&mut state);
390	let h1: u64 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
391
392	state.v1 ^= `0xdd`;
393	S::d_rounds(&mut state);
394	let h2: u64 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
395
396	Hash128 { h1, h2 }
397	}
398	}
399
400	impl hash::Hasher for SipHasher {
401	#[inline]
402	fn write(&mut self, msg: &[u8]) {
403	self.0.write(msg)
404	}
405
406	#[inline]
407	fn finish(&self) -> u64 {
408	self.0.finish()
409	}
410
411	#[inline]
412	fn write_usize(&mut self, i: usize) {
413	self.0.write_usize(i);
414	}
415
416	#[inline]
417	fn write_u8(&mut self, i: u8) {
418	self.0.write_u8(i);
419	}
420
421	#[inline]
422	fn write_u16(&mut self, i: u16) {
423	self.0.write_u16(i);
424	}
425
426	#[inline]
427	fn write_u32(&mut self, i: u32) {
428	self.0.write_u32(i);
429	}
430
431	#[inline]
432	fn write_u64(&mut self, i: u64) {
433	self.0.write_u64(i);
434	}
435	}
436
437	impl hash::Hasher for SipHasher13 {
438	#[inline]
439	fn write(&mut self, msg: &[u8]) {
440	self.hasher.write(msg)
441	}
442
443	#[inline]
444	fn finish(&self) -> u64 {
445	self.hasher.finish()
446	}
447
448	#[inline]
449	fn write_usize(&mut self, i: usize) {
450	self.hasher.write_usize(i);
451	}
452
453	#[inline]
454	fn write_u8(&mut self, i: u8) {
455	self.hasher.write_u8(i);
456	}
457
458	#[inline]
459	fn write_u16(&mut self, i: u16) {
460	self.hasher.write_u16(i);
461	}
462
463	#[inline]
464	fn write_u32(&mut self, i: u32) {
465	self.hasher.write_u32(i);
466	}
467
468	#[inline]
469	fn write_u64(&mut self, i: u64) {
470	self.hasher.write_u64(i);
471	}
472	}
473
474	impl hash::Hasher for SipHasher24 {
475	#[inline]
476	fn write(&mut self, msg: &[u8]) {
477	self.hasher.write(msg)
478	}
479
480	#[inline]
481	fn finish(&self) -> u64 {
482	self.hasher.finish()
483	}
484
485	#[inline]
486	fn write_usize(&mut self, i: usize) {
487	self.hasher.write_usize(i);
488	}
489
490	#[inline]
491	fn write_u8(&mut self, i: u8) {
492	self.hasher.write_u8(i);
493	}
494
495	#[inline]
496	fn write_u16(&mut self, i: u16) {
497	self.hasher.write_u16(i);
498	}
499
500	#[inline]
501	fn write_u32(&mut self, i: u32) {
502	self.hasher.write_u32(i);
503	}
504
505	#[inline]
506	fn write_u64(&mut self, i: u64) {
507	self.hasher.write_u64(i);
508	}
509	}
510
511	impl<S: Sip> hash::Hasher for Hasher<S> {
512	#[inline]
513	fn write_usize(&mut self, i: usize) {
514	self.short_write(i, i.to_le() as u64);
515	}
516
517	#[inline]
518	fn write_u8(&mut self, i: u8) {
519	self.short_write(i, i as u64);
520	}
521
522	#[inline]
523	fn write_u32(&mut self, i: u32) {
524	self.short_write(i, i.to_le() as u64);
525	}
526
527	#[inline]
528	fn write_u64(&mut self, i: u64) {
529	self.short_write(i, i.to_le() as u64);
530	}
531
532	#[inline]
533	fn write(&mut self, msg: &[u8]) {
534	let length = msg.len();
535	self.length += length;
536
537	let mut needed = `0`;
538
539	if self.ntail != `0` {
540	needed = `8` - self.ntail;
541	self.tail \|= unsafe { u8to64_le(msg, `0`, cmp::min(length, needed)) } << (`8` * self.ntail);
542	if length < needed {
543	self.ntail += length;
544	return;
545	} else {
546	self.state.v3 ^= self.tail;
547	S::c_rounds(&mut self.state);
548	self.state.v0 ^= self.tail;
549	self.ntail = `0`;
550	}
551	}
552
553	// Buffered tail is now flushed, process new input.
554	let len = length - needed;
555	let left = len & `0x7`;
556
557	let mut i = needed;
558	while i < len - left {
559	let mi = unsafe { load_int_le!(msg, i, u64) };
560
561	self.state.v3 ^= mi;
562	S::c_rounds(&mut self.state);
563	self.state.v0 ^= mi;
564
565	i += `8`;
566	}
567
568	self.tail = unsafe { u8to64_le(msg, i, left) };
569	self.ntail = left;
570	}
571
572	#[inline]
573	fn finish(&self) -> u64 {
574	self.finish128().h2
575	}
576	}
577
578	impl<S: Sip> Clone for Hasher<S> {
579	#[inline]
580	fn clone(&self) -> Hasher<S> {
581	Hasher {
582	k0: self.k0,
583	k1: self.k1,
584	length: self.length,
585	state: self.state,
586	tail: self.tail,
587	ntail: self.ntail,
588	_marker: self._marker,
589	}
590	}
591	}
592
593	impl<S: Sip> Default for Hasher<S> {
594	/// Creates a `Hasher<S>` with the two initial keys set to 0.
595	#[inline]
596	fn default() -> Hasher<S> {
597	Hasher::new_with_keys(key0:`0`, key1:`0`)
598	}
599	}
600
601	#[doc(hidden)]
602	trait Sip {
603	fn c_rounds(_: &mut State);
604	fn d_rounds(_: &mut State);
605	}
606
607	#[derive(Debug, Clone, Copy, Default)]
608	struct Sip13Rounds;
609
610	impl Sip for Sip13Rounds {
611	#[inline]
612	fn c_rounds(state: &mut State) {
613	compress!(state);
614	}
615
616	#[inline]
617	fn d_rounds(state: &mut State) {
618	compress!(state);
619	compress!(state);
620	compress!(state);
621	}
622	}
623
624	#[derive(Debug, Clone, Copy, Default)]
625	struct Sip24Rounds;
626
627	impl Sip for Sip24Rounds {
628	#[inline]
629	fn c_rounds(state: &mut State) {
630	compress!(state);
631	compress!(state);
632	}
633
634	#[inline]
635	fn d_rounds(state: &mut State) {
636	compress!(state);
637	compress!(state);
638	compress!(state);
639	compress!(state);
640	}
641	}
642
643	impl Hash128 {
644	/// Convert into a 16-bytes vector
645	pub fn as_bytes(&self) -> [u8; `16`] {
646	let mut bytes = [`0u8`; `16`];
647	let h1 = self.h1.to_le();
648	let h2 = self.h2.to_le();
649	unsafe {
650	ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.as_mut_ptr(), `8`);
651	ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.as_mut_ptr().add(`8`), `8`);
652	}
653	bytes
654	}
655
656	/// Convert into a `u128`
657	#[inline]
658	pub fn as_u128(&self) -> u128 {
659	let h1 = self.h1.to_le();
660	let h2 = self.h2.to_le();
661	h1 as u128 \| ((h2 as u128) << `64`)
662	}
663
664	/// Convert into `(u64, u64)`
665	#[inline]
666	pub fn as_u64(&self) -> (u64, u64) {
667	let h1 = self.h1.to_le();
668	let h2 = self.h2.to_le();
669	(h1, h2)
670	}
671	}
672