sip.rs source code [crates/siphasher/src/sip.rs]

1	// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2	// file at the top-level directory of this distribution and at
3	// http://rust-lang.org/COPYRIGHT.
4	//
5	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8	// option. This file may not be copied, modified, or distributed
9	// except according to those terms.
10
11	//! An implementation of SipHash.
12
13	use core::cmp;
14	use core::hash;
15	use core::hash::Hasher as _;
16	use core::marker::PhantomData;
17	use core::mem;
18	use core::ptr;
19	use core::u64;
20
21	/// An implementation of SipHash 1-3.
22	///
23	/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24	#[derive(Debug, Clone, Copy, Default)]
25	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26	pub struct SipHasher13 {
27	hasher: Hasher<Sip13Rounds>,
28	}
29
30	/// An implementation of SipHash 2-4.
31	///
32	/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33	#[derive(Debug, Clone, Copy, Default)]
34	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35	pub struct SipHasher24 {
36	hasher: Hasher<Sip24Rounds>,
37	}
38
39	/// An implementation of SipHash 2-4.
40	///
41	/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42	///
43	/// SipHash is a general-purpose hashing function: it runs at a good
44	/// speed (competitive with Spooky and City) and permits strong _keyed_
45	/// hashing. This lets you key your hashtables from a strong RNG, such as
46	/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47	///
48	/// Although the SipHash algorithm is considered to be generally strong,
49	/// it is not intended for cryptographic purposes. As such, all
50	/// cryptographic uses of this implementation are _strongly discouraged_.
51	#[derive(Debug, Clone, Copy, Default)]
52	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53	pub struct SipHasher(SipHasher24);
54
55	#[derive(Debug, Clone, Copy)]
56	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57	struct Hasher<S: Sip> {
58	k0: u64,
59	k1: u64,
60	length: usize, // how many bytes we've processed
61	state: State, // hash State
62	tail: u64, // unprocessed bytes le
63	ntail: usize, // how many bytes in tail are valid
64	_marker: PhantomData<S>,
65	}
66
67	#[derive(Debug, Clone, Copy)]
68	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69	struct State {
70	// v0, v2 and v1, v3 show up in pairs in the algorithm,
71	// and simd implementations of SipHash will use vectors
72	// of v02 and v13. By placing them in this order in the struct,
73	// the compiler can pick up on just a few simd optimizations by itself.
74	v0: u64,
75	v2: u64,
76	v1: u64,
77	v3: u64,
78	}
79
80	macro_rules! compress {
81	($state:expr) => {{
82	compress!($state.v0, $state.v1, $state.v2, $state.v3)
83	}};
84	($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
85	$v0 = $v0.wrapping_add($v1);
86	$v1 = $v1.rotate_left(`13`);
87	$v1 ^= $v0;
88	$v0 = $v0.rotate_left(`32`);
89	$v2 = $v2.wrapping_add($v3);
90	$v3 = $v3.rotate_left(`16`);
91	$v3 ^= $v2;
92	$v0 = $v0.wrapping_add($v3);
93	$v3 = $v3.rotate_left(`21`);
94	$v3 ^= $v0;
95	$v2 = $v2.wrapping_add($v1);
96	$v1 = $v1.rotate_left(`17`);
97	$v1 ^= $v2;
98	$v2 = $v2.rotate_left(`32`);
99	}};
100	}
101
102	/// Loads an integer of the desired type from a byte stream, in LE order. Uses
103	/// `copy_nonoverlapping` to let the compiler generate the most efficient way
104	/// to load it from a possibly unaligned address.
105	///
106	/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
107	macro_rules! load_int_le {
108	($buf:expr, $i:expr, $int_ty:ident) => {{
109	debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
110	let mut data = `0` as $int_ty;
111	ptr::copy_nonoverlapping(
112	$buf.as_ptr().add($i),
113	&mut data as *mut _ as *mut u8,
114	mem::size_of::<$int_ty>(),
115	);
116	data.to_le()
117	}};
118	}
119
120	/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
121	/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
122	/// sizes and avoid calling `memcpy`, which is good for speed.
123	///
124	/// Unsafe because: unchecked indexing at start..start+len
125	#[inline]
126	unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
127	debug_assert!(len < `8`);
128	let mut i: usize = `0`; // current byte index (from LSB) in the output u64
129	let mut out: u64 = `0`;
130	if i + `3` < len {
131	out = load_int_le!(buf, start + i, u32) as u64;
132	i += `4`;
133	}
134	if i + `1` < len {
135	out \|= (load_int_le!(buf, start + i, u16) as u64) << (i * `8`);
136	i += `2`
137	}
138	if i < len {
139	out \|= (buf.get_unchecked(index:start + i) as u64) << (i `8`);
140	i += `1`;
141	}
142	debug_assert_eq!(i, len);
143	out
144	}
145
146	impl SipHasher {
147	/// Creates a new `SipHasher` with the two initial keys set to 0.
148	#[inline]
149	pub fn new() -> SipHasher {
150	SipHasher::new_with_keys(`0`, `0`)
151	}
152
153	/// Creates a `SipHasher` that is keyed off the provided keys.
154	#[inline]
155	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
156	SipHasher(SipHasher24::new_with_keys(key0, key1))
157	}
158
159	/// Creates a `SipHasher` from a 16 byte key.
160	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher {
161	let mut b0 = [`0u8`; `8`];
162	let mut b1 = [`0u8`; `8`];
163	b0.copy_from_slice(&key[`0`..`8`]);
164	b1.copy_from_slice(&key[`8`..`16`]);
165	let key0 = u64::from_le_bytes(b0);
166	let key1 = u64::from_le_bytes(b1);
167	Self::new_with_keys(key0, key1)
168	}
169
170	/// Get the keys used by this hasher
171	pub fn keys(&self) -> (u64, u64) {
172	(self.0.hasher.k0, self.0.hasher.k1)
173	}
174
175	/// Get the key used by this hasher as a 16 byte vector
176	pub fn key(&self) -> [u8; `16`] {
177	let mut bytes = [`0u8`; `16`];
178	bytes[`0`..`8`].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
179	bytes[`8`..`16`].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
180	bytes
181	}
182
183	/// Hash a byte array - This is the easiest and safest way to use SipHash.
184	#[inline]
185	pub fn hash(&self, bytes: &[u8]) -> u64 {
186	let mut hasher = self.0.hasher;
187	hasher.write(bytes);
188	hasher.finish()
189	}
190	}
191
192	impl SipHasher13 {
193	/// Creates a new `SipHasher13` with the two initial keys set to 0.
194	#[inline]
195	pub fn new() -> SipHasher13 {
196	SipHasher13::new_with_keys(`0`, `0`)
197	}
198
199	/// Creates a `SipHasher13` that is keyed off the provided keys.
200	#[inline]
201	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202	SipHasher13 {
203	hasher: Hasher::new_with_keys(key0, key1),
204	}
205	}
206
207	/// Creates a `SipHasher13` from a 16 byte key.
208	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher13 {
209	let mut b0 = [`0u8`; `8`];
210	let mut b1 = [`0u8`; `8`];
211	b0.copy_from_slice(&key[`0`..`8`]);
212	b1.copy_from_slice(&key[`8`..`16`]);
213	let key0 = u64::from_le_bytes(b0);
214	let key1 = u64::from_le_bytes(b1);
215	Self::new_with_keys(key0, key1)
216	}
217
218	/// Get the keys used by this hasher
219	pub fn keys(&self) -> (u64, u64) {
220	(self.hasher.k0, self.hasher.k1)
221	}
222
223	/// Get the key used by this hasher as a 16 byte vector
224	pub fn key(&self) -> [u8; `16`] {
225	let mut bytes = [`0u8`; `16`];
226	bytes[`0`..`8`].copy_from_slice(&self.hasher.k0.to_le_bytes());
227	bytes[`8`..`16`].copy_from_slice(&self.hasher.k1.to_le_bytes());
228	bytes
229	}
230
231	/// Hash a byte array - This is the easiest and safest way to use SipHash.
232	#[inline]
233	pub fn hash(&self, bytes: &[u8]) -> u64 {
234	let mut hasher = self.hasher;
235	hasher.write(bytes);
236	hasher.finish()
237	}
238	}
239
240	impl SipHasher24 {
241	/// Creates a new `SipHasher24` with the two initial keys set to 0.
242	#[inline]
243	pub fn new() -> SipHasher24 {
244	SipHasher24::new_with_keys(`0`, `0`)
245	}
246
247	/// Creates a `SipHasher24` that is keyed off the provided keys.
248	#[inline]
249	pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
250	SipHasher24 {
251	hasher: Hasher::new_with_keys(key0, key1),
252	}
253	}
254
255	/// Creates a `SipHasher24` from a 16 byte key.
256	pub fn new_with_key(key: &[u8; `16`]) -> SipHasher24 {
257	let mut b0 = [`0u8`; `8`];
258	let mut b1 = [`0u8`; `8`];
259	b0.copy_from_slice(&key[`0`..`8`]);
260	b1.copy_from_slice(&key[`8`..`16`]);
261	let key0 = u64::from_le_bytes(b0);
262	let key1 = u64::from_le_bytes(b1);
263	Self::new_with_keys(key0, key1)
264	}
265
266	/// Get the keys used by this hasher
267	pub fn keys(&self) -> (u64, u64) {
268	(self.hasher.k0, self.hasher.k1)
269	}
270
271	/// Get the key used by this hasher as a 16 byte vector
272	pub fn key(&self) -> [u8; `16`] {
273	let mut bytes = [`0u8`; `16`];
274	bytes[`0`..`8`].copy_from_slice(&self.hasher.k0.to_le_bytes());
275	bytes[`8`..`16`].copy_from_slice(&self.hasher.k1.to_le_bytes());
276	bytes
277	}
278
279	/// Hash a byte array - This is the easiest and safest way to use SipHash.
280	#[inline]
281	pub fn hash(&self, bytes: &[u8]) -> u64 {
282	let mut hasher = self.hasher;
283	hasher.write(bytes);
284	hasher.finish()
285	}
286	}
287
288	impl<S: Sip> Hasher<S> {
289	#[inline]
290	fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
291	let mut state = Hasher {
292	k0: key0,
293	k1: key1,
294	length: `0`,
295	state: State {
296	v0: `0`,
297	v1: `0`,
298	v2: `0`,
299	v3: `0`,
300	},
301	tail: `0`,
302	ntail: `0`,
303	_marker: PhantomData,
304	};
305	state.reset();
306	state
307	}
308
309	#[inline]
310	fn reset(&mut self) {
311	self.length = `0`;
312	self.state.v0 = self.k0 ^ `0x736f6d6570736575`;
313	self.state.v1 = self.k1 ^ `0x646f72616e646f6d`;
314	self.state.v2 = self.k0 ^ `0x6c7967656e657261`;
315	self.state.v3 = self.k1 ^ `0x7465646279746573`;
316	self.ntail = `0`;
317	}
318
319	// A specialized write function for values with size <= 8.
320	//
321	// The hashing of multi-byte integers depends on endianness. E.g.:
322	// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
323	// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
324	//
325	// This function does the right thing for little-endian hardware. On
326	// big-endian hardware `x` must be byte-swapped first to give the right
327	// behaviour. After any byte-swapping, the input must be zero-extended to
328	// 64-bits. The caller is responsible for the byte-swapping and
329	// zero-extension.
330	#[inline]
331	fn short_write<T>(&mut self, _x: T, x: u64) {
332	let size = mem::size_of::<T>();
333	self.length += size;
334
335	// The original number must be zero-extended, not sign-extended.
336	debug_assert!(if size < `8` { x >> (`8` * size) == `0` } else { `true` });
337
338	// The number of bytes needed to fill `self.tail`.
339	let needed = `8` - self.ntail;
340
341	self.tail \|= x << (`8` * self.ntail);
342	if size < needed {
343	self.ntail += size;
344	return;
345	}
346
347	// `self.tail` is full, process it.
348	self.state.v3 ^= self.tail;
349	S::c_rounds(&mut self.state);
350	self.state.v0 ^= self.tail;
351
352	self.ntail = size - needed;
353	self.tail = if needed < `8` { x >> (`8` * needed) } else { `0` };
354	}
355	}
356
357	impl hash::Hasher for SipHasher {
358	#[inline]
359	fn write(&mut self, msg: &[u8]) {
360	self.0.write(msg)
361	}
362
363	#[inline]
364	fn finish(&self) -> u64 {
365	self.0.finish()
366	}
367
368	#[inline]
369	fn write_usize(&mut self, i: usize) {
370	self.0.write_usize(i);
371	}
372
373	#[inline]
374	fn write_u8(&mut self, i: u8) {
375	self.0.write_u8(i);
376	}
377
378	#[inline]
379	fn write_u16(&mut self, i: u16) {
380	self.0.write_u16(i);
381	}
382
383	#[inline]
384	fn write_u32(&mut self, i: u32) {
385	self.0.write_u32(i);
386	}
387
388	#[inline]
389	fn write_u64(&mut self, i: u64) {
390	self.0.write_u64(i);
391	}
392	}
393
394	impl hash::Hasher for SipHasher13 {
395	#[inline]
396	fn write(&mut self, msg: &[u8]) {
397	self.hasher.write(msg)
398	}
399
400	#[inline]
401	fn finish(&self) -> u64 {
402	self.hasher.finish()
403	}
404
405	#[inline]
406	fn write_usize(&mut self, i: usize) {
407	self.hasher.write_usize(i);
408	}
409
410	#[inline]
411	fn write_u8(&mut self, i: u8) {
412	self.hasher.write_u8(i);
413	}
414
415	#[inline]
416	fn write_u16(&mut self, i: u16) {
417	self.hasher.write_u16(i);
418	}
419
420	#[inline]
421	fn write_u32(&mut self, i: u32) {
422	self.hasher.write_u32(i);
423	}
424
425	#[inline]
426	fn write_u64(&mut self, i: u64) {
427	self.hasher.write_u64(i);
428	}
429	}
430
431	impl hash::Hasher for SipHasher24 {
432	#[inline]
433	fn write(&mut self, msg: &[u8]) {
434	self.hasher.write(msg)
435	}
436
437	#[inline]
438	fn finish(&self) -> u64 {
439	self.hasher.finish()
440	}
441
442	#[inline]
443	fn write_usize(&mut self, i: usize) {
444	self.hasher.write_usize(i);
445	}
446
447	#[inline]
448	fn write_u8(&mut self, i: u8) {
449	self.hasher.write_u8(i);
450	}
451
452	#[inline]
453	fn write_u16(&mut self, i: u16) {
454	self.hasher.write_u16(i);
455	}
456
457	#[inline]
458	fn write_u32(&mut self, i: u32) {
459	self.hasher.write_u32(i);
460	}
461
462	#[inline]
463	fn write_u64(&mut self, i: u64) {
464	self.hasher.write_u64(i);
465	}
466	}
467
468	impl<S: Sip> hash::Hasher for Hasher<S> {
469	#[inline]
470	fn write_usize(&mut self, i: usize) {
471	self.short_write(i, i.to_le() as u64);
472	}
473
474	#[inline]
475	fn write_u8(&mut self, i: u8) {
476	self.short_write(i, i as u64);
477	}
478
479	#[inline]
480	fn write_u32(&mut self, i: u32) {
481	self.short_write(i, i.to_le() as u64);
482	}
483
484	#[inline]
485	fn write_u64(&mut self, i: u64) {
486	self.short_write(i, i.to_le());
487	}
488
489	#[inline]
490	fn write(&mut self, msg: &[u8]) {
491	let length = msg.len();
492	self.length += length;
493
494	let mut needed = `0`;
495
496	if self.ntail != `0` {
497	needed = `8` - self.ntail;
498	self.tail \|= unsafe { u8to64_le(msg, `0`, cmp::min(length, needed)) } << (`8` * self.ntail);
499	if length < needed {
500	self.ntail += length;
501	return;
502	} else {
503	self.state.v3 ^= self.tail;
504	S::c_rounds(&mut self.state);
505	self.state.v0 ^= self.tail;
506	self.ntail = `0`;
507	}
508	}
509
510	// Buffered tail is now flushed, process new input.
511	let len = length - needed;
512	let left = len & `0x7`;
513
514	let mut i = needed;
515	while i < len - left {
516	let mi = unsafe { load_int_le!(msg, i, u64) };
517
518	self.state.v3 ^= mi;
519	S::c_rounds(&mut self.state);
520	self.state.v0 ^= mi;
521
522	i += `8`;
523	}
524
525	self.tail = unsafe { u8to64_le(msg, i, left) };
526	self.ntail = left;
527	}
528
529	#[inline]
530	fn finish(&self) -> u64 {
531	let mut state = self.state;
532
533	let b: u64 = ((self.length as u64 & `0xff`) << `56`) \| self.tail;
534
535	state.v3 ^= b;
536	S::c_rounds(&mut state);
537	state.v0 ^= b;
538
539	state.v2 ^= `0xff`;
540	S::d_rounds(&mut state);
541
542	state.v0 ^ state.v1 ^ state.v2 ^ state.v3
543	}
544	}
545
546	impl<S: Sip> Default for Hasher<S> {
547	/// Creates a `Hasher<S>` with the two initial keys set to 0.
548	#[inline]
549	fn default() -> Hasher<S> {
550	Hasher::new_with_keys(key0:`0`, key1:`0`)
551	}
552	}
553
554	#[doc(hidden)]
555	trait Sip {
556	fn c_rounds(_: &mut State);
557	fn d_rounds(_: &mut State);
558	}
559
560	#[derive(Debug, Clone, Copy, Default)]
561	struct Sip13Rounds;
562
563	impl Sip for Sip13Rounds {
564	#[inline]
565	fn c_rounds(state: &mut State) {
566	compress!(state);
567	}
568
569	#[inline]
570	fn d_rounds(state: &mut State) {
571	compress!(state);
572	compress!(state);
573	compress!(state);
574	}
575	}
576
577	#[derive(Debug, Clone, Copy, Default)]
578	struct Sip24Rounds;
579
580	impl Sip for Sip24Rounds {
581	#[inline]
582	fn c_rounds(state: &mut State) {
583	compress!(state);
584	compress!(state);
585	}
586
587	#[inline]
588	fn d_rounds(state: &mut State) {
589	compress!(state);
590	compress!(state);
591	compress!(state);
592	compress!(state);
593	}
594	}
595

Provided by KDAB

Definitions