raw_table.rs source code [crates/odht/src/raw_table.rs]

1	//! This module implements the actual hash table logic. It works solely with
2	//! byte arrays and does not know anything about the unencoded key and value
3	//! types.
4	//!
5	//! The implementation makes sure that the encoded data contains no padding
6	//! bytes (which makes it deterministic) and nothing in it requires any specific
7	//! alignment.
8	//!
9	//! Many functions in this module are marked as `#[inline]`. This is allows
10	//! LLVM to retain the information about byte array sizes, even though they are
11	//! converted to slices (of unknown size) from time to time.
12
13	use crate::swisstable_group_query::{GroupQuery, GROUP_SIZE, REFERENCE_GROUP_SIZE};
14	use crate::{error::Error, HashFn};
15	use std::convert::TryInto;
16	use std::{fmt, marker::PhantomData, mem::size_of};
17
18	/// Values of this type represent key-value pairs as they are stored on-disk.
19	/// `#[repr(C)]` makes sure we have deterministic field order and the fields
20	/// being byte arrays makes sure that there are no padding bytes, alignment is
21	/// not restricted, and the data is endian-independent.
22	///
23	/// It is a strict requirement that any `&[Entry<K, V>]` can be transmuted
24	/// into a `&[u8]` and back, regardless of whether the byte array has been
25	/// moved in the meantime.
26	#[repr(C)]
27	#[derive(PartialEq, Eq, Clone, Copy, Debug)]
28	pub(crate) struct Entry<K: ByteArray, V: ByteArray> {
29	pub key: K,
30	pub value: V,
31	}
32
33	impl<K: ByteArray, V: ByteArray> Entry<K, V> {
34	#[inline]
35	fn new(key: K, value: V) -> Entry<K, V> {
36	Entry { key, value }
37	}
38	}
39
40	impl<K: ByteArray, V: ByteArray> Default for Entry<K, V> {
41	#[inline]
42	fn default() -> Entry<K, V> {
43	Entry {
44	key: K::zeroed(),
45	value: V::zeroed(),
46	}
47	}
48	}
49
50	impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTable<'a, K, V, H> {
51	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52	writeln!(
53	f,
54	"RawTable (slot_count={}, hash_fn={}) `{{`",
55	self.data.len(),
56	std::any::type_name::<H>(),
57	)?;
58	for (index: usize, (&metadata: u8, entry: &Entry)) in self.metadata.iter().zip(self.data.iter()).enumerate() {
59	if is_empty_or_deleted(control_byte:metadata) {
60	writeln!(f, "slot {}: empty", index)?;
61	} else {
62	writeln!(
63	f,
64	"slot {}: key={:?}, value={:?}, control_byte={}",
65	index, entry.key, entry.value, metadata
66	)?;
67	}
68	}
69	writeln!(f, "`}}`")
70	}
71	}
72
73	pub(crate) type EntryMetadata = u8;
74
75	type HashValue = u32;
76
77	#[inline]
78	fn h1(h: HashValue) -> usize {
79	h as usize
80	}
81
82	#[inline]
83	fn h2(h: HashValue) -> u8 {
84	const SHIFT: usize = size_of::<HashValue>() * `8` - `7`;
85	(h >> SHIFT) as u8
86	}
87
88	/// This type implements the sequence in which slots are probed when resolving
89	/// hash value conflicts. Note that probing is always done as if `GROUP_SIZE`
90	/// was 16, even though on targets without sse2 `GROUP_SIZE` is going to be
91	/// smaller. By keeping the probing pattern constant (i.e. always visiting
92	/// the same slots in the same order, independently of `GROUP_SIZE`) we enable
93	/// the hash table layout to be target-independent. In other words: A hash
94	/// table created and persisted on a target with `GROUP_SIZE` x can always
95	/// be loaded and read on a target with a different `GROUP_SIZE` y.
96	struct ProbeSeq<const GROUP_SIZE: usize> {
97	index: usize,
98	base: usize,
99	chunk_index: usize,
100	stride: usize,
101	}
102
103	impl<const GROUP_SIZE: usize> ProbeSeq<GROUP_SIZE> {
104	#[inline]
105	fn new(h1: usize, mask: usize) -> Self {
106	let initial_index = h1 & mask;
107
108	ProbeSeq {
109	index: initial_index,
110	base: initial_index,
111	chunk_index: `0`,
112	stride: `0`,
113	}
114	}
115
116	#[inline]
117	fn advance(&mut self, mask: usize) {
118	debug_assert!(GROUP_SIZE <= REFERENCE_GROUP_SIZE);
119	debug_assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == `0`);
120
121	// The effect of the code in the two branches is
122	// identical if GROUP_SIZE==REFERENCE_GROUP_SIZE
123	// but the if statement should make it very easy
124	// for the compiler to discard the more costly
125	// version and only emit the simplified version.
126
127	if GROUP_SIZE == REFERENCE_GROUP_SIZE {
128	self.stride += REFERENCE_GROUP_SIZE;
129	self.index += self.stride;
130	self.index &= mask;
131	} else {
132	self.chunk_index += GROUP_SIZE;
133
134	if self.chunk_index == REFERENCE_GROUP_SIZE {
135	self.chunk_index = `0`;
136	self.stride += REFERENCE_GROUP_SIZE;
137	self.base += self.stride;
138	}
139
140	self.index = (self.base + self.chunk_index) & mask;
141	}
142	}
143	}
144
145	#[inline]
146	fn group_starting_at<'a>(control_bytes: &'a [u8], index: usize) -> &'a [u8; GROUP_SIZE] {
147	debug_assert!(index < control_bytes.len() - GROUP_SIZE);
148	unsafe {
149	stdResult<&[u8; 8], TryFromSliceError>::slice::from_raw_parts(data:control_bytes.as_ptr().offset(index as isize), GROUP_SIZE)
150	.try_into()
151	.unwrap()
152	}
153	}
154
155	#[inline]
156	fn is_empty_or_deleted(control_byte: u8) -> bool {
157	(control_byte & EMPTY_CONTROL_BYTE) != `0`
158	}
159
160	const EMPTY_CONTROL_BYTE: u8 = `0b1000_0000`;
161
162	/// This type provides a readonly view of the given table data.
163	#[derive(PartialEq, Eq)]
164	pub(crate) struct RawTable<'a, K, V, H>
165	where
166	K: ByteArray,
167	V: ByteArray,
168	H: HashFn,
169	{
170	metadata: &'a [EntryMetadata],
171	data: &'a [Entry<K, V>],
172	_hash_fn: PhantomData<H>,
173	}
174
175	impl<'a, K, V, H> RawTable<'a, K, V, H>
176	where
177	K: ByteArray,
178	V: ByteArray,
179	H: HashFn,
180	{
181	#[inline]
182	pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> Self {
183	// Make sure Entry<K, V> does not contain any padding bytes and can be
184	// stored at arbitrary adresses.
185	assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
186	assert!(std::mem::align_of::<Entry<K, V>>() == `1`);
187
188	debug_assert!(data.len().is_power_of_two());
189	debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
190
191	Self {
192	metadata,
193	data,
194	_hash_fn: PhantomData::default(),
195	}
196	}
197
198	#[inline]
199	pub(crate) fn find(&self, key: &K) -> Option<&V> {
200	debug_assert!(self.data.len().is_power_of_two());
201	debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
202
203	let mask = self.data.len() - `1`;
204	let hash = H::hash(key.as_slice());
205	let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
206	let h2 = h2(hash);
207
208	loop {
209	let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
210
211	for m in &mut group_query {
212	let index = (ps.index + m) & mask;
213
214	let entry = entry_at(self.data, index);
215
216	if likely!(entry.key.equals(key)) {
217	return Some(&entry.value);
218	}
219	}
220
221	if likely!(group_query.any_empty()) {
222	return None;
223	}
224
225	ps.advance(mask);
226	}
227	}
228
229	#[inline]
230	pub(crate) fn iter(&'a self) -> RawIter<'a, K, V> {
231	RawIter::new(self.metadata, self.data)
232	}
233
234	/// Check (for the first `entries_to_check` entries) if the computed and
235	/// the stored hash value match.
236	///
237	/// A mismatch is an indication that the table has been deserialized with
238	/// the wrong hash function.
239	pub(crate) fn sanity_check_hashes(&self, slots_to_check: usize) -> Result<(), Error> {
240	let slots_to_check = std::cmp::min(slots_to_check, self.data.len());
241
242	for i in `0`..slots_to_check {
243	let metadata = self.metadata[i];
244	let entry = &self.data[i];
245
246	if is_empty_or_deleted(metadata) {
247	if !entry.key.is_all_zeros() \|\| !entry.value.is_all_zeros() {
248	let message = format!("Found empty entry with non-zero contents at {}", i);
249
250	return Err(Error(message));
251	}
252	} else {
253	let hash = H::hash(entry.key.as_slice());
254	let h2 = h2(hash);
255
256	if metadata != h2 {
257	let message = format!(
258	"Control byte does not match hash value for entry {}. Expected {}, found {}.",
259	i, h2, metadata
260	);
261
262	return Err(Error(message));
263	}
264	}
265	}
266
267	Ok(())
268	}
269	}
270
271	#[inline]
272	fn entry_at<K: ByteArray, V: ByteArray>(data: &[Entry<K, V>], index: usize) -> &Entry<K, V> {
273	debug_assert!(index < data.len());
274	unsafe { data.get_unchecked(index) }
275	}
276
277	#[inline]
278	fn metadata_at(metadata: &[EntryMetadata], index: usize) -> &EntryMetadata {
279	debug_assert!(index < metadata.len());
280	unsafe { metadata.get_unchecked(index) }
281	}
282
283	/// This type provides a mutable view of the given table data. It allows for
284	/// inserting new entries but does not* allow for growing the table.*
285	#[derive(PartialEq, Eq)]
286	pub(crate) struct RawTableMut<'a, K, V, H>
287	where
288	K: ByteArray,
289	V: ByteArray,
290	H: HashFn,
291	{
292	metadata: &'a mut [EntryMetadata],
293	data: &'a mut [Entry<K, V>],
294	_hash_fn: PhantomData<H>,
295	}
296
297	impl<'a, K, V, H> RawTableMut<'a, K, V, H>
298	where
299	K: ByteArray,
300	V: ByteArray,
301	H: HashFn,
302	{
303	#[inline]
304	pub(crate) fn new(metadata: &'a mut [EntryMetadata], data: &'a mut [Entry<K, V>]) -> Self {
305	// Make sure Entry<K, V> does not contain any padding bytes and can be
306	// stored at arbitrary adresses.
307	assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
308	assert!(std::mem::align_of::<Entry<K, V>>() == `1`);
309
310	debug_assert!(data.len().is_power_of_two());
311	debug_assert_eq!(metadata.len(), data.len() + REFERENCE_GROUP_SIZE);
312
313	Self {
314	metadata,
315	data,
316	_hash_fn: PhantomData::default(),
317	}
318	}
319
320	/// Inserts the given key value pair into the hash table.
321	///
322	/// WARNING: This method assumes that there is free space in the hash table
323	/// somewhere. If there isn't it will end up in an infinite loop.
324	#[inline]
325	pub(crate) fn insert(&mut self, key: K, value: V) -> Option<V> {
326	debug_assert!(self.data.len().is_power_of_two());
327	debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
328
329	let mask = self.data.len() - `1`;
330	let hash = H::hash(key.as_slice());
331
332	let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
333	let h2 = h2(hash);
334
335	loop {
336	let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
337
338	for m in &mut group_query {
339	let index = (ps.index + m) & mask;
340
341	let entry = entry_at_mut(self.data, index);
342
343	if likely!(entry.key.equals(&key)) {
344	let ret = Some(entry.value);
345	entry.value = value;
346	return ret;
347	}
348	}
349
350	if let Some(first_empty) = group_query.first_empty() {
351	let index = (ps.index + first_empty) & mask;
352	*entry_at_mut(self.data, index) = Entry::new(key, value);
353	*metadata_at_mut(self.metadata, index) = h2;
354
355	if index < REFERENCE_GROUP_SIZE {
356	let first_mirror = self.data.len();
357	*metadata_at_mut(self.metadata, first_mirror + index) = h2;
358	debug_assert_eq!(
359	self.metadata[..REFERENCE_GROUP_SIZE],
360	self.metadata[self.data.len()..]
361	);
362	}
363
364	return None;
365	}
366
367	ps.advance(mask);
368	}
369	}
370	}
371
372	#[inline]
373	fn entry_at_mut<K: ByteArray, V: ByteArray>(
374	data: &mut [Entry<K, V>],
375	index: usize,
376	) -> &mut Entry<K, V> {
377	debug_assert!(index < data.len());
378	unsafe { data.get_unchecked_mut(index) }
379	}
380
381	#[inline]
382	fn metadata_at_mut(metadata: &mut [EntryMetadata], index: usize) -> &mut EntryMetadata {
383	debug_assert!(index < metadata.len());
384	unsafe { metadata.get_unchecked_mut(index) }
385	}
386
387	impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTableMut<'a, K, V, H> {
388	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
389	let readonly: RawTable<'_, K, V, H> = RawTable::<'_, K, V, H>::new(self.metadata, self.data);
390	write!(f, "{:?}", readonly)
391	}
392	}
393
394	pub(crate) struct RawIter<'a, K, V>
395	where
396	K: ByteArray,
397	V: ByteArray,
398	{
399	metadata: &'a [EntryMetadata],
400	data: &'a [Entry<K, V>],
401	current_index: usize,
402	}
403
404	impl<'a, K, V> RawIter<'a, K, V>
405	where
406	K: ByteArray,
407	V: ByteArray,
408	{
409	pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> RawIter<'a, K, V> {
410	debug_assert!(data.len().is_power_of_two());
411	debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
412
413	RawIter {
414	metadata,
415	data,
416	current_index: `0`,
417	}
418	}
419	}
420
421	impl<'a, K, V> Iterator for RawIter<'a, K, V>
422	where
423	K: ByteArray,
424	V: ByteArray,
425	{
426	type Item = (EntryMetadata, &'a Entry<K, V>);
427
428	fn next(&mut self) -> Option<Self::Item> {
429	loop {
430	if self.current_index >= self.data.len() {
431	return None;
432	}
433
434	let index: usize = self.current_index;
435
436	self.current_index += `1`;
437
438	let entry_metadata: u8 = *metadata_at(self.metadata, index);
439	if !is_empty_or_deleted(control_byte:entry_metadata) {
440	return Some((entry_metadata, entry_at(self.data, index)));
441	}
442	}
443	}
444	}
445
446	/// A trait that lets us abstract over different lengths of fixed size byte
447	/// arrays. Don't implement it for anything other than fixed size byte arrays!
448	pub trait ByteArray: Sized + Copy + Eq + Clone + PartialEq + fmt::Debug + 'static {
449	fn zeroed() -> Self;
450	fn as_slice(&self) -> &[u8];
451	fn equals(&self, other: &Self) -> bool;
452	fn is_all_zeros(&self) -> bool {
453	self.as_slice().iter().all(\|b: &u8\| *b == `0`)
454	}
455	}
456
457	impl<const LEN: usize> ByteArray for [u8; LEN] {
458	#[inline(always)]
459	fn zeroed() -> Self {
460	[`0u8`; LEN]
461	}
462
463	#[inline(always)]
464	fn as_slice(&self) -> &[u8] {
465	&self[..]
466	}
467
468	// This custom implementation of comparing the fixed size arrays
469	// seems make a big difference for performance (at least for
470	// 16+ byte keys)
471	#[inline]
472	fn equals(&self, other: &Self) -> bool {
473	// Most branches here are optimized away at compile time
474	// because they depend on values known at compile time.
475
476	const USIZE: usize = size_of::<usize>();
477
478	// Special case a few cases we care about
479	if size_of::<Self>() == USIZE {
480	return read_usize(&self[..], `0`) == read_usize(&other[..], `0`);
481	}
482
483	if size_of::<Self>() == USIZE * `2` {
484	return read_usize(&self[..], `0`) == read_usize(&other[..], `0`)
485	&& read_usize(&self[..], `1`) == read_usize(&other[..], `1`);
486	}
487
488	if size_of::<Self>() == USIZE * `3` {
489	return read_usize(&self[..], `0`) == read_usize(&other[..], `0`)
490	&& read_usize(&self[..], `1`) == read_usize(&other[..], `1`)
491	&& read_usize(&self[..], `2`) == read_usize(&other[..], `2`);
492	}
493
494	if size_of::<Self>() == USIZE * `4` {
495	return read_usize(&self[..], `0`) == read_usize(&other[..], `0`)
496	&& read_usize(&self[..], `1`) == read_usize(&other[..], `1`)
497	&& read_usize(&self[..], `2`) == read_usize(&other[..], `2`)
498	&& read_usize(&self[..], `3`) == read_usize(&other[..], `3`);
499	}
500
501	// fallback
502	return &self[..] == &other[..];
503
504	#[inline(always)]
505	fn read_usize(bytes: &[u8], index: usize) -> usize {
506	const STRIDE: usize = size_of::<usize>();
507	usize::from_le_bytes(
508	bytes[STRIDE * index..STRIDE * (index + `1`)]
509	.try_into()
510	.unwrap(),
511	)
512	}
513	}
514	}
515
516	#[cfg(test)]
517	#[rustfmt::skip]
518	mod tests {
519	use super::*;
520	use crate::FxHashFn;
521
522	fn make_table<
523	I: Iterator<Item = (K, V)> + ExactSizeIterator,
524	K: ByteArray,
525	V: ByteArray,
526	H: HashFn,
527	>(
528	xs: I,
529	) -> (Vec<EntryMetadata>, Vec<Entry<K, V>>) {
530	let size = xs.size_hint().0.next_power_of_two();
531	let mut data = vec![Entry::default(); size];
532	let mut metadata = vec![`255`; size + REFERENCE_GROUP_SIZE];
533
534	assert!(metadata.iter().all(\|b\| is_empty_or_deleted(*b)));
535
536	{
537	let mut table: RawTableMut<K, V, H> = RawTableMut {
538	metadata: &mut metadata[..],
539	data: &mut data[..],
540	_hash_fn: Default::default(),
541	};
542
543	for (k, v) in xs {
544	table.insert(k, v);
545	}
546	}
547
548	(metadata, data)
549	}
550
551	#[test]
552	fn stress() {
553	let xs: Vec<[u8; `2`]> = (`0` ..= u16::MAX).map(\|x\| x.to_le_bytes()).collect();
554
555	let (mut metadata, mut data) = make_table::<_, _, _, FxHashFn>(xs.iter().map(\|x\| (x, x)));
556
557	{
558	let table: RawTable<_, _, FxHashFn> = RawTable {
559	metadata: &metadata[..],
560	data: &data[..],
561	_hash_fn: PhantomData::default(),
562	};
563
564	for x in xs.iter() {
565	assert_eq!(table.find(x), Some(x));
566	}
567	}
568
569	// overwrite all values
570	{
571	let mut table: RawTableMut<_, _, FxHashFn> = RawTableMut {
572	metadata: &mut metadata[..],
573	data: &mut data[..],
574	_hash_fn: PhantomData::default(),
575	};
576
577	for (i, x) in xs.iter().enumerate() {
578	assert_eq!(table.insert(x, [i as u8, (i + `1`) as u8]), Some(x));
579	}
580	}
581
582	// Check if we find the new expected values
583	{
584	let table: RawTable<_, _, FxHashFn> = RawTable {
585	metadata: &metadata[..],
586	data: &data[..],
587	_hash_fn: PhantomData::default(),
588	};
589
590	for (i, x) in xs.iter().enumerate() {
591	assert_eq!(table.find(x), Some(&[i as u8, (i + `1`) as u8]));
592	}
593	}
594	}
595
596	// This test makes sure that ProbeSeq will always visit the same slots
597	// in the same order, regardless of the actual GROUP_SIZE.
598	#[test]
599	fn probe_seq() {
600	struct ReferenceProbeSeq {
601	index: usize,
602	stride: usize,
603	}
604
605	impl ReferenceProbeSeq {
606	fn new(h1: usize, mask: usize) -> ReferenceProbeSeq {
607	ReferenceProbeSeq {
608	index: h1 & mask,
609	stride: `0`,
610	}
611	}
612
613	fn advance(&mut self, mask: usize) {
614	self.stride += REFERENCE_GROUP_SIZE;
615	self.index += self.stride;
616	self.index &= mask;
617	}
618	}
619
620	fn test_with_group_size<const GROUP_SIZE: usize>() {
621	assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == `0`);
622	assert!(REFERENCE_GROUP_SIZE >= GROUP_SIZE);
623
624	for i in `4` .. `17` {
625	let item_count = `1` << i;
626	assert!(item_count % REFERENCE_GROUP_SIZE == `0`);
627	assert!(item_count % GROUP_SIZE == `0`);
628
629	let mask = item_count - `1`;
630
631	let mut expected = Vec::with_capacity(item_count);
632
633	let mut refseq = ReferenceProbeSeq::new(`0`, mask);
634	for _ in `0` .. item_count / REFERENCE_GROUP_SIZE {
635	for index in refseq.index .. refseq.index + REFERENCE_GROUP_SIZE {
636	expected.push(index & mask);
637	}
638	refseq.advance(mask);
639	}
640
641	let mut actual = Vec::with_capacity(item_count);
642
643	let mut seq = ProbeSeq::<GROUP_SIZE>::new(`0`, mask);
644	for _ in `0` .. item_count / GROUP_SIZE {
645	for index in seq.index .. seq.index + GROUP_SIZE {
646	actual.push(index & mask);
647	}
648	seq.advance(mask);
649	}
650
651	assert_eq!(expected, actual);
652	}
653	}
654
655	test_with_group_size::<`4`>();
656	test_with_group_size::<`8`>();
657	test_with_group_size::<`16`>();
658	}
659	}
660