murmur3.rs source code [crates/hash32/src/murmur3.rs]

1	use core::slice;
2	use core::mem::MaybeUninit;
3
4	use byteorder::{ByteOrder, LE};
5
6	use crate::Hasher as _;
7
8	/// 32-bit MurmurHash3 hasher
9	pub struct Hasher {
10	buf: Buffer,
11	index: Index,
12	processed: u32,
13	state: State,
14	}
15
16	struct State(u32);
17
18	#[derive(Clone, Copy)]
19	#[repr(align(`4`))]
20	struct Buffer {
21	bytes: MaybeUninit<[u8; `4`]>,
22	}
23
24	#[derive(Clone, Copy, PartialEq)]
25	enum Index {
26	_0,
27	_1,
28	_2,
29	_3,
30	}
31
32	impl Index {
33	fn usize(&self) -> usize {
34	match *self {
35	Index::_0 => `0`,
36	Index::_1 => `1`,
37	Index::_2 => `2`,
38	Index::_3 => `3`,
39	}
40	}
41	}
42
43	impl From<usize> for Index {
44	fn from(x: usize) -> Self {
45	match x % `4` {
46	`0` => Index::_0,
47	`1` => Index::_1,
48	`2` => Index::_2,
49	`3` => Index::_3,
50	_ => unreachable!(),
51	}
52	}
53	}
54
55	impl Hasher {
56	fn push(&mut self, buf: &[u8]) {
57	let start: usize = self.index.usize();
58	let len: usize = buf.len();
59	// NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
60	// self.buf.bytes[start..start+len].copy_from(buf);
61	for i: usize in `0`..len {
62	unsafe {
63	self.buf.bytes.assume_init_mut().get_unchecked_mut(index:start + i) = buf.get_unchecked(index:i);
64	}
65	}
66	self.index = Index::from(start + len);
67	}
68	}
69
70	impl Default for Hasher {
71	#[allow(deprecated)]
72	fn default() -> Self {
73	Hasher {
74	buf: Buffer { bytes: MaybeUninit::uninit() },
75	index: Index::_0,
76	processed: `0`,
77	state: State(`0`),
78	}
79	}
80	}
81
82	impl crate::Hasher for Hasher {
83	fn finish32(&self) -> u32 {
84	// tail
85	let mut state = match self.index {
86	Index::_3 => {
87	let mut block = `0`;
88	unsafe {
89	block ^= u32::from(self.buf.bytes.assume_init_ref()[`2`]) << `16`;
90	block ^= u32::from(self.buf.bytes.assume_init_ref()[`1`]) << `8`;
91	block ^= u32::from(self.buf.bytes.assume_init_ref()[`0`]);
92	}
93	self.state.0 ^ pre_mix(block)
94	}
95	Index::_2 => {
96	let mut block = `0`;
97	unsafe {
98	block ^= u32::from(self.buf.bytes.assume_init_ref()[`1`]) << `8`;
99	block ^= u32::from(self.buf.bytes.assume_init_ref()[`0`]);
100	}
101	self.state.0 ^ pre_mix(block)
102	}
103	Index::_1 => {
104	let mut block = `0`;
105	unsafe {
106	block ^= u32::from(self.buf.bytes.assume_init_ref()[`0`]);
107	}
108	self.state.0 ^ pre_mix(block)
109	}
110	Index::_0 => self.state.0,
111	};
112
113	// finalization mix
114	state ^= self.processed;
115	state ^= state >> `16`;
116	state = state.wrapping_mul(`0x85ebca6b`);
117	state ^= state >> `13`;
118	state = state.wrapping_mul(`0xc2b2ae35`);
119	state ^= state >> `16`;
120
121	state
122	}
123	}
124
125	impl core::hash::Hasher for Hasher {
126	#[inline]
127	fn write(&mut self, bytes: &[u8]) {
128	let len = bytes.len();
129	self.processed += len as u32;
130
131	let body = if self.index == Index::_0 {
132	bytes
133	} else {
134	let index = self.index.usize();
135	if len + index >= `4` {
136	// we can complete a block using the data left in the buffer
137	// NOTE(unsafe) avoid panicking branch (`slice_index_len_fail`)
138	// let (head, body) = bytes.split_at(4 - index);
139	let mid = `4` - index;
140	let head = unsafe { slice::from_raw_parts(bytes.as_ptr(), mid) };
141	let body = unsafe {
142	slice::from_raw_parts(bytes.as_ptr().offset(mid as isize), len - mid)
143	};
144
145	// NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
146	// self.buf.bytes[index..].copy_from_slice(head);
147	for i in `0`..`4` - index {
148	unsafe {
149	self.buf.bytes.assume_init_mut().get_unchecked_mut(index + i) = head.get_unchecked(i);
150	}
151	}
152
153	self.index = Index::_0;
154
155	self.state.process_block(&self.buf.bytes);
156
157	body
158	} else {
159	bytes
160	}
161	};
162
163	for block in body.chunks(`4`) {
164	if block.len() == `4` {
165	self.state
166	.process_block(unsafe { &(block.as_ptr() as const _) });
167	} else {
168	self.push(block);
169	}
170	}
171
172	// XXX is this faster?
173	// for block in body.exact_chunks(4) {
174	// self.state
175	// .process_block(unsafe { &(block.as_ptr() as const _) });
176	// }
177
178	// let tail = body.split_at(body.len() / 4 4).1;*
179
180	// self.push(tail);
181	}
182
183	#[inline]
184	fn finish(&self) -> u64 {
185	self.finish32().into()
186	}
187	}
188
189	const C1: u32 = `0xcc9e2d51`;
190	const C2: u32 = `0x1b873593`;
191	const R1: u32 = `15`;
192
193	impl State {
194	fn process_block(&mut self, block: &MaybeUninit<[u8; `4`]>) {
195	self.0 ^= pre_mix(block:LE::read_u32(buf:unsafe { block.assume_init_ref() }));
196	self.0 = self.0.rotate_left(`13`);
197	self.0 = `5u32`.wrapping_mul(self.0).wrapping_add(`0xe6546b64`);
198	}
199	}
200
201	fn pre_mix(mut block: u32) -> u32 {
202	block = block.wrapping_mul(C1);
203	block = block.rotate_left(R1);
204	block = block.wrapping_mul(C2);
205	block
206	}
207