sip128.rs source code [crates/siphasher-0.3.11/src/sip128.rs]

1	// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2	// file at the top-level directory of this distribution and at
3	// http://rust-lang.org/COPYRIGHT.
4	//
5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8	// option. This file may not be copied, modified, or distributed
9	// except according to those terms.
10
11	//! An implementation of SipHash with a 128-bit output.
12
13	use core::cmp;
14	use core::hash;
15	use core::hash::Hasher as _;
16	use core::marker::PhantomData;
17	use core::mem;
18	use core::ptr;
19	use core::u64;
20
21	/// A 128-bit (2x64) hash output
22	#[derive(Debug, Clone, Copy, Default)]
23	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24	pub struct Hash128 {
25	pub h1: u64,
26	pub h2: u64,
27	}
28
29	impl From<u128> for Hash128 {
30	fn from(v: u128) -> Self {
31	Hash128 {
32	h1: v as u64,
33	h2: (v >> `64`) as u64,
34	}
35	}
36	}
37
38	impl From<Hash128> for u128 {
39	fn from(h: Hash128) -> u128 {
40	(h.h1 as u128) \| ((h.h2 as u128) << `64`)
41	}
42	}
43
44	/// An implementation of SipHash128 1-3.
45	#[derive(Debug, Clone, Copy, Default)]
46	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
47	pub struct SipHasher13 {
48	hasher: Hasher<Sip13Rounds>,
49	}
50
51	/// An implementation of SipHash128 2-4.
52	#[derive(Debug, Clone, Copy, Default)]
53	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54	pub struct SipHasher24 {
55	hasher: Hasher<Sip24Rounds>,
56	}
57
58	/// An implementation of SipHash128 2-4.
59	///
60	/// SipHash is a general-purpose hashing function: it runs at a good
61	/// speed (competitive with Spooky and City) and permits strong _keyed_
62	/// hashing. This lets you key your hashtables from a strong RNG, such as
63	/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
64	///
65	/// Although the SipHash algorithm is considered to be generally strong,
66	/// it is not intended for cryptographic purposes. As such, all
67	/// cryptographic uses of this implementation are _strongly discouraged_.
68	#[derive(Debug, Clone, Copy, Default)]
69	pub struct SipHasher(SipHasher24);
70
71	#[derive(Debug, Copy)]
72	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73	struct Hasher<S: Sip> {
74	k0: u64,
75	k1: u64,
76	length: usize, // how many bytes we've processed
77	state: State, // hash State
78	tail: u64, // unprocessed bytes le
79	ntail: usize, // how many bytes in tail are valid
80	_marker: PhantomData<S>,
81	}
82
83	#[derive(Debug, Clone, Copy)]
84	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
85	struct State {
86	// v0, v2 and v1, v3 show up in pairs in the algorithm,
87	// and simd implementations of SipHash will use vectors
88	// of v02 and v13. By placing them in this order in the struct,
89	// the compiler can pick up on just a few simd optimizations by itself.
90	v0: u64,
91	v2: u64,
92	v1: u64,
93	v3: u64,
94	}
95
96	macro_rules! compress {
97	($state:expr) => {{
98	compress!($state.v0, $state.v1, $state.v2, $state.v3)
99	}};
100	($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
101	$v0 = $v0.wrapping_add($v1);
102	$v1 = $v1.rotate_left(`13`);
103	$v1 ^= $v0;
104	$v0 = $v0.rotate_left(`32`);
105	$v2 = $v2.wrapping_add($v3);
106	$v3 = $v3.rotate_left(`16`);
107	$v3 ^= $v2;
108	$v0 = $v0.wrapping_add($v3);
109	$v3 = $v3.rotate_left(`21`);
110	$v3 ^= $v0;
111	$v2 = $v2.wrapping_add($v1);
112	$v1 = $v1.rotate_left(`17`);
113	$v1 ^= $v2;
114	$v2 = $v2.rotate_left(`32`);
115	}};
116	}
117
118	/// Loads an integer of the desired type from a byte stream, in LE order. Uses
119	/// `copy_nonoverlapping` to let the compiler generate the most efficient way
120	/// to load it from a possibly unaligned address.
121	///
122	/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
123	macro_rules! load_int_le {
124	($buf:expr, $i:expr, $int_ty:ident) => {{
125	debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
126	let mut data = `0` as $int_ty;
127	ptr::copy_nonoverlapping(
128	$buf.as_ptr().add($i),
129	&mut data as *mut _ as *mut u8,
130	mem::size_of::<$int_ty>(),
131	);
132	data.to_le()
133	}};
134	}
135
136	/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
137	/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
138	/// sizes and avoid calling `memcpy`, which is good for speed.
139	///
140	/// Unsafe because: unchecked indexing at start..start+len
141	#[inline]
142	unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
143	debug_assert!(len < `8`);
144	let mut i: usize = `0`; // current byte index (from LSB) in the output u64
145	let mut out: u64 = `0`;
146	if i + `3` < len {
147	out = load_int_le!(buf, start + i, u32) as u64;
148	i += `4`;
149	}
150	if i + `1` < len {
151	out \|= (load_int_le!(buf, start + i, u16) as u64) << (i * `8`);
152	i += `2`
153	}
154	if i < len {
155	out \|= (buf.get_unchecked(index:start + i) as u64) << (i `8`);
156	i += `1`;
157	}
158	debug_assert_eq!(i, len);
159	out
160	}
161
162	pub trait Hasher128 {
163	/// Return a 128-bit hash
164	fn finish128(&self) -> Hash128;
165	}
166
167	impl SipHasher {
168	/// Creates a new `SipHasher` with the two initial keys set to 0.
169	#[inline]
170	pub fn new() -> SipHasher {
171	SipHasher::new_with_keys(`0`, `0`)
172	}
173
174	/// Creates a `SipHasher` that is keyed off the provided keys.
175	#[inline]
176	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
177	SipHasher(SipHasher24::new_with_keys(key0, key1))
178	}
179
180	/// Creates a `SipHasher` from a 16 byte key.
181	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher {
182	let mut b0 = [`0u8`; `8`];
183	let mut b1 = [`0u8`; `8`];
184	b0.copy_from_slice(&key[`0`..`8`]);
185	b1.copy_from_slice(&key[`8`..`16`]);
186	let key0 = u64::from_le_bytes(b0);
187	let key1 = u64::from_le_bytes(b1);
188	Self::new_with_keys(key0, key1)
189	}
190
191	/// Get the keys used by this hasher
192	pub fn keys(&self) -> (u64, u64) {
193	(self.0.hasher.k0, self.0.hasher.k1)
194	}
195
196	/// Get the key used by this hasher as a 16 byte vector
197	pub fn key(&self) -> [u8; `16`] {
198	let mut bytes = [`0u8`; `16`];
199	bytes[`0`..`8`].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
200	bytes[`8`..`16`].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
201	bytes
202	}
203
204	/// Hash a byte array - This is the easiest and safest way to use SipHash.
205	#[inline]
206	pub fn hash(&self, bytes: &[u8]) -> Hash128 {
207	let mut hasher = self.0.hasher;
208	hasher.write(bytes);
209	hasher.finish128()
210	}
211	}
212
213	impl Hasher128 for SipHasher {
214	/// Return a 128-bit hash
215	#[inline]
216	fn finish128(&self) -> Hash128 {
217	self.0.finish128()
218	}
219	}
220
221	impl SipHasher13 {
222	/// Creates a new `SipHasher13` with the two initial keys set to 0.
223	#[inline]
224	pub fn new() -> SipHasher13 {
225	SipHasher13::new_with_keys(`0`, `0`)
226	}
227
228	/// Creates a `SipHasher13` that is keyed off the provided keys.
229	#[inline]
230	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
231	SipHasher13 {
232	hasher: Hasher::new_with_keys(key0, key1),
233	}
234	}
235
236	/// Creates a `SipHasher13` from a 16 byte key.
237	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher13 {
238	let mut b0 = [`0u8`; `8`];
239	let mut b1 = [`0u8`; `8`];
240	b0.copy_from_slice(&key[`0`..`8`]);
241	b1.copy_from_slice(&key[`8`..`16`]);
242	let key0 = u64::from_le_bytes(b0);
243	let key1 = u64::from_le_bytes(b1);
244	Self::new_with_keys(key0, key1)
245	}
246
247	/// Get the keys used by this hasher
248	pub fn keys(&self) -> (u64, u64) {
249	(self.hasher.k0, self.hasher.k1)
250	}
251
252	/// Get the key used by this hasher as a 16 byte vector
253	pub fn key(&self) -> [u8; `16`] {
254	let mut bytes = [`0u8`; `16`];
255	bytes[`0`..`8`].copy_from_slice(&self.hasher.k0.to_le_bytes());
256	bytes[`8`..`16`].copy_from_slice(&self.hasher.k1.to_le_bytes());
257	bytes
258	}
259
260	/// Hash a byte array - This is the easiest and safest way to use SipHash.
261	#[inline]
262	pub fn hash(&self, bytes: &[u8]) -> Hash128 {
263	let mut hasher = self.hasher;
264	hasher.write(bytes);
265	hasher.finish128()
266	}
267	}
268
269	impl Hasher128 for SipHasher13 {
270	/// Return a 128-bit hash
271	#[inline]
272	fn finish128(&self) -> Hash128 {
273	self.hasher.finish128()
274	}
275	}
276
277	impl SipHasher24 {
278	/// Creates a new `SipHasher24` with the two initial keys set to 0.
279	#[inline]
280	pub fn new() -> SipHasher24 {
281	SipHasher24::new_with_keys(`0`, `0`)
282	}
283
284	/// Creates a `SipHasher24` that is keyed off the provided keys.
285	#[inline]
286	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
287	SipHasher24 {
288	hasher: Hasher::new_with_keys(key0, key1),
289	}
290	}
291
292	/// Creates a `SipHasher24` from a 16 byte key.
293	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher24 {
294	let mut b0 = [`0u8`; `8`];
295	let mut b1 = [`0u8`; `8`];
296	b0.copy_from_slice(&key[`0`..`8`]);
297	b1.copy_from_slice(&key[`8`..`16`]);
298	let key0 = u64::from_le_bytes(b0);
299	let key1 = u64::from_le_bytes(b1);
300	Self::new_with_keys(key0, key1)
301	}
302
303	/// Get the keys used by this hasher
304	pub fn keys(&self) -> (u64, u64) {
305	(self.hasher.k0, self.hasher.k1)
306	}
307
308	/// Get the key used by this hasher as a 16 byte vector
309	pub fn key(&self) -> [u8; `16`] {
310	let mut bytes = [`0u8`; `16`];
311	bytes[`0`..`8`].copy_from_slice(&self.hasher.k0.to_le_bytes());
312	bytes[`8`..`16`].copy_from_slice(&self.hasher.k1.to_le_bytes());
313	bytes
314	}
315
316	/// Hash a byte array - This is the easiest and safest way to use SipHash.
317	#[inline]
318	pub fn hash(&self, bytes: &[u8]) -> Hash128 {
319	let mut hasher = self.hasher;
320	hasher.write(bytes);
321	hasher.finish128()
322	}
323	}
324
325	impl Hasher128 for SipHasher24 {
326	/// Return a 128-bit hash
327	#[inline]
328	fn finish128(&self) -> Hash128 {
329	self.hasher.finish128()
330	}
331	}
332
333	impl<S: Sip> Hasher<S> {
334	#[inline]
335	fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
336	let mut state = Hasher {
337	k0: key0,
338	k1: key1,
339	length: `0`,
340	state: State {
341	v0: `0`,
342	v1: `0xee`,
343	v2: `0`,
344	v3: `0`,
345	},
346	tail: `0`,
347	ntail: `0`,
348	_marker: PhantomData,
349	};
350	state.reset();
351	state
352	}
353
354	#[inline]
355	fn reset(&mut self) {
356	self.length = `0`;
357	self.state.v0 = self.k0 ^ `0x736f6d6570736575`;
358	self.state.v1 = self.k1 ^ `0x646f72616e646f83`;
359	self.state.v2 = self.k0 ^ `0x6c7967656e657261`;
360	self.state.v3 = self.k1 ^ `0x7465646279746573`;
361	self.ntail = `0`;
362	}
363
364	// A specialized write function for values with size <= 8.
365	//
366	// The hashing of multi-byte integers depends on endianness. E.g.:
367	// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
368	// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
369	//
370	// This function does the right thing for little-endian hardware. On
371	// big-endian hardware `x` must be byte-swapped first to give the right
372	// behaviour. After any byte-swapping, the input must be zero-extended to
373	// 64-bits. The caller is responsible for the byte-swapping and
374	// zero-extension.
375	#[inline]
376	fn short_write<T>(&mut self, _x: T, x: u64) {
377	let size = mem::size_of::<T>();
378	self.length += size;
379
380	// The original number must be zero-extended, not sign-extended.
381	debug_assert!(if size < `8` { x >> (`8` * size) == `0` } else { `true` });
382
383	// The number of bytes needed to fill `self.tail`.
384	let needed = `8` - self.ntail;
385
386	self.tail \|= x << (`8` * self.ntail);
387	if size < needed {
388	self.ntail += size;
389	return;
390	}
391
392	// `self.tail` is full, process it.
393	self.state.v3 ^= self.tail;
394	S::c_rounds(&mut self.state);
395	self.state.v0 ^= self.tail;
396
397	self.ntail = size - needed;
398	self.tail = if needed < `8` { x >> (`8` * needed) } else { `0` };
399	}
400	}
401
402	impl<S: Sip> Hasher<S> {
403	#[inline]
404	pub fn finish128(&self) -> Hash128 {
405	let mut state: State = self.state;
406
407	let b: u64 = ((self.length as u64 & `0xff`) << `56`) \| self.tail;
408
409	state.v3 ^= b;
410	S::c_rounds(&mut state);
411	state.v0 ^= b;
412
413	state.v2 ^= `0xee`;
414	S::d_rounds(&mut state);
415	let h1: u64 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
416
417	state.v1 ^= `0xdd`;
418	S::d_rounds(&mut state);
419	let h2: u64 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
420
421	Hash128 { h1, h2 }
422	}
423	}
424
425	impl hash::Hasher for SipHasher {
426	#[inline]
427	fn write(&mut self, msg: &[u8]) {
428	self.0.write(msg)
429	}
430
431	#[inline]
432	fn finish(&self) -> u64 {
433	self.0.finish()
434	}
435
436	#[inline]
437	fn write_usize(&mut self, i: usize) {
438	self.0.write_usize(i);
439	}
440
441	#[inline]
442	fn write_u8(&mut self, i: u8) {
443	self.0.write_u8(i);
444	}
445
446	#[inline]
447	fn write_u16(&mut self, i: u16) {
448	self.0.write_u16(i);
449	}
450
451	#[inline]
452	fn write_u32(&mut self, i: u32) {
453	self.0.write_u32(i);
454	}
455
456	#[inline]
457	fn write_u64(&mut self, i: u64) {
458	self.0.write_u64(i);
459	}
460	}
461
462	impl hash::Hasher for SipHasher13 {
463	#[inline]
464	fn write(&mut self, msg: &[u8]) {
465	self.hasher.write(msg)
466	}
467
468	#[inline]
469	fn finish(&self) -> u64 {
470	self.hasher.finish()
471	}
472
473	#[inline]
474	fn write_usize(&mut self, i: usize) {
475	self.hasher.write_usize(i);
476	}
477
478	#[inline]
479	fn write_u8(&mut self, i: u8) {
480	self.hasher.write_u8(i);
481	}
482
483	#[inline]
484	fn write_u16(&mut self, i: u16) {
485	self.hasher.write_u16(i);
486	}
487
488	#[inline]
489	fn write_u32(&mut self, i: u32) {
490	self.hasher.write_u32(i);
491	}
492
493	#[inline]
494	fn write_u64(&mut self, i: u64) {
495	self.hasher.write_u64(i);
496	}
497	}
498
499	impl hash::Hasher for SipHasher24 {
500	#[inline]
501	fn write(&mut self, msg: &[u8]) {
502	self.hasher.write(msg)
503	}
504
505	#[inline]
506	fn finish(&self) -> u64 {
507	self.hasher.finish()
508	}
509
510	#[inline]
511	fn write_usize(&mut self, i: usize) {
512	self.hasher.write_usize(i);
513	}
514
515	#[inline]
516	fn write_u8(&mut self, i: u8) {
517	self.hasher.write_u8(i);
518	}
519
520	#[inline]
521	fn write_u16(&mut self, i: u16) {
522	self.hasher.write_u16(i);
523	}
524
525	#[inline]
526	fn write_u32(&mut self, i: u32) {
527	self.hasher.write_u32(i);
528	}
529
530	#[inline]
531	fn write_u64(&mut self, i: u64) {
532	self.hasher.write_u64(i);
533	}
534	}
535
536	impl<S: Sip> hash::Hasher for Hasher<S> {
537	#[inline]
538	fn write_usize(&mut self, i: usize) {
539	self.short_write(i, i.to_le() as u64);
540	}
541
542	#[inline]
543	fn write_u8(&mut self, i: u8) {
544	self.short_write(i, i as u64);
545	}
546
547	#[inline]
548	fn write_u32(&mut self, i: u32) {
549	self.short_write(i, i.to_le() as u64);
550	}
551
552	#[inline]
553	fn write_u64(&mut self, i: u64) {
554	self.short_write(i, i.to_le());
555	}
556
557	#[inline]
558	fn write(&mut self, msg: &[u8]) {
559	let length = msg.len();
560	self.length += length;
561
562	let mut needed = `0`;
563
564	if self.ntail != `0` {
565	needed = `8` - self.ntail;
566	self.tail \|= unsafe { u8to64_le(msg, `0`, cmp::min(length, needed)) } << (`8` * self.ntail);
567	if length < needed {
568	self.ntail += length;
569	return;
570	} else {
571	self.state.v3 ^= self.tail;
572	S::c_rounds(&mut self.state);
573	self.state.v0 ^= self.tail;
574	self.ntail = `0`;
575	}
576	}
577
578	// Buffered tail is now flushed, process new input.
579	let len = length - needed;
580	let left = len & `0x7`;
581
582	let mut i = needed;
583	while i < len - left {
584	let mi = unsafe { load_int_le!(msg, i, u64) };
585
586	self.state.v3 ^= mi;
587	S::c_rounds(&mut self.state);
588	self.state.v0 ^= mi;
589
590	i += `8`;
591	}
592
593	self.tail = unsafe { u8to64_le(msg, i, left) };
594	self.ntail = left;
595	}
596
597	#[inline]
598	fn finish(&self) -> u64 {
599	self.finish128().h2
600	}
601	}
602
603	impl<S: Sip> Clone for Hasher<S> {
604	#[inline]
605	fn clone(&self) -> Hasher<S> {
606	Hasher {
607	k0: self.k0,
608	k1: self.k1,
609	length: self.length,
610	state: self.state,
611	tail: self.tail,
612	ntail: self.ntail,
613	_marker: self._marker,
614	}
615	}
616	}
617
618	impl<S: Sip> Default for Hasher<S> {
619	/// Creates a `Hasher<S>` with the two initial keys set to 0.
620	#[inline]
621	fn default() -> Hasher<S> {
622	Hasher::new_with_keys(key0:`0`, key1:`0`)
623	}
624	}
625
626	#[doc(hidden)]
627	trait Sip {
628	fn c_rounds(_: &mut State);
629	fn d_rounds(_: &mut State);
630	}
631
632	#[derive(Debug, Clone, Copy, Default)]
633	struct Sip13Rounds;
634
635	impl Sip for Sip13Rounds {
636	#[inline]
637	fn c_rounds(state: &mut State) {
638	compress!(state);
639	}
640
641	#[inline]
642	fn d_rounds(state: &mut State) {
643	compress!(state);
644	compress!(state);
645	compress!(state);
646	}
647	}
648
649	#[derive(Debug, Clone, Copy, Default)]
650	struct Sip24Rounds;
651
652	impl Sip for Sip24Rounds {
653	#[inline]
654	fn c_rounds(state: &mut State) {
655	compress!(state);
656	compress!(state);
657	}
658
659	#[inline]
660	fn d_rounds(state: &mut State) {
661	compress!(state);
662	compress!(state);
663	compress!(state);
664	compress!(state);
665	}
666	}
667
668	impl Hash128 {
669	/// Convert into a 16-bytes vector
670	pub fn as_bytes(&self) -> [u8; `16`] {
671	let mut bytes = [`0u8`; `16`];
672	let h1 = self.h1.to_le();
673	let h2 = self.h2.to_le();
674	unsafe {
675	ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.as_mut_ptr(), `8`);
676	ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.as_mut_ptr().add(`8`), `8`);
677	}
678	bytes
679	}
680
681	/// Convert into a `u128`
682	#[inline]
683	pub fn as_u128(&self) -> u128 {
684	let h1 = self.h1.to_le();
685	let h2 = self.h2.to_le();
686	h1 as u128 \| ((h2 as u128) << `64`)
687	}
688
689	/// Convert into `(u64, u64)`
690	#[inline]
691	pub fn as_u64(&self) -> (u64, u64) {
692	let h1 = self.h1.to_le();
693	let h2 = self.h2.to_le();
694	(h1, h2)
695	}
696	}
697