lib.rs source code [crates/odht/src/lib.rs]

1	//! This crate implements a hash table that can be used as is in its binary, on-disk format.
2	//! The goal is to provide a high performance data structure that can be used without any significant up-front decoding.
3	//! The implementation makes no assumptions about alignment or endianess of the underlying data,
4	//! so a table encoded on one platform can be used on any other platform and
5	//! the binary data can be mapped into memory at arbitrary addresses.
6	//!
7	//!
8	//! ## Usage
9	//!
10	//! In order to use the hash table one needs to implement the `Config` trait.
11	//! This trait defines how the table is encoded and what hash function is used.
12	//! With a `Config` in place the `HashTableOwned` type can be used to build and serialize a hash table.
13	//! The `HashTable` type can then be used to create an almost zero-cost view of the serialized hash table.
14	//!
15	//! ```rust
16	//!
17	//! use odht::{HashTable, HashTableOwned, Config, FxHashFn};
18	//!
19	//! struct MyConfig;
20	//!
21	//! impl Config for MyConfig {
22	//!
23	//! type Key = u64;
24	//! type Value = u32;
25	//!
26	//! type EncodedKey = [u8; `8`];
27	//! type EncodedValue = [u8; `4`];
28	//!
29	//! type H = FxHashFn;
30	//!
31	//! #[inline] fn encode_key(k: &Self::Key) -> Self::EncodedKey { k.to_le_bytes() }
32	//! #[inline] fn encode_value(v: &Self::Value) -> Self::EncodedValue { v.to_le_bytes() }
33	//! #[inline] fn decode_key(k: &Self::EncodedKey) -> Self::Key { u64::from_le_bytes(*k) }
34	//! #[inline] fn decode_value(v: &Self::EncodedValue) -> Self::Value { u32::from_le_bytes(*v)}
35	//! }
36	//!
37	//! fn main() {
38	//! let mut builder = HashTableOwned::<MyConfig>::with_capacity(`3`, `95`);
39	//!
40	//! builder.insert(&`1`, &`2`);
41	//! builder.insert(&`3`, &`4`);
42	//! builder.insert(&`5`, &`6`);
43	//!
44	//! let serialized = builder.raw_bytes().to_owned();
45	//!
46	//! let table = HashTable::<MyConfig, &[u8]>::from_raw_bytes(
47	//! &serialized[..]
48	//! ).unwrap();
49	//!
50	//! assert_eq!(table.get(&`1`), Some(`2`));
51	//! assert_eq!(table.get(&`3`), Some(`4`));
52	//! assert_eq!(table.get(&`5`), Some(`6`));
53	//! }
54	//! ```
55
56	#![cfg_attr(feature = "nightly", feature(core_intrinsics))]
57
58	#[cfg(test)]
59	extern crate quickcheck;
60
61	#[cfg(feature = "nightly")]
62	macro_rules! likely {
63	($x:expr) => {
64	std::intrinsics::likely($x)
65	};
66	}
67
68	#[cfg(not(feature = "nightly"))]
69	macro_rules! likely {
70	($x:expr) => {
71	$x
72	};
73	}
74
75	#[cfg(feature = "nightly")]
76	macro_rules! unlikely {
77	($x:expr) => {
78	std::intrinsics::unlikely($x)
79	};
80	}
81
82	#[cfg(not(feature = "nightly"))]
83	macro_rules! unlikely {
84	($x:expr) => {
85	$x
86	};
87	}
88
89	mod error;
90	mod fxhash;
91	mod memory_layout;
92	mod raw_table;
93	mod swisstable_group_query;
94	mod unhash;
95
96	use error::Error;
97	use memory_layout::Header;
98	use std::borrow::{Borrow, BorrowMut};
99	use swisstable_group_query::REFERENCE_GROUP_SIZE;
100
101	pub use crate::fxhash::FxHashFn;
102	pub use crate::unhash::UnHashFn;
103
104	use crate::raw_table::{ByteArray, RawIter, RawTable, RawTableMut};
105
106	/// This trait provides a complete "configuration" for a hash table, i.e. it
107	/// defines the key and value types, how these are encoded and what hash
108	/// function is being used.
109	///
110	/// Implementations of the `encode_key` and `encode_value` methods must encode
111	/// the given key/value into a fixed size array. The encoding must be
112	/// deterministic (i.e. no random padding bytes) and must be independent of
113	/// platform endianess. It is always highly recommended to mark these methods
114	/// as `#[inline]`.
115	pub trait Config {
116	type Key;
117	type Value;
118
119	// The EncodedKey and EncodedValue types must always be a fixed size array of bytes,
120	// e.g. [u8; 4].
121	type EncodedKey: ByteArray;
122	type EncodedValue: ByteArray;
123
124	type H: HashFn;
125
126	/// Implementations of the `encode_key` and `encode_value` methods must encode
127	/// the given key/value into a fixed size array. See above for requirements.
128	fn encode_key(k: &Self::Key) -> Self::EncodedKey;
129
130	/// Implementations of the `encode_key` and `encode_value` methods must encode
131	/// the given key/value into a fixed size array. See above for requirements.
132	fn encode_value(v: &Self::Value) -> Self::EncodedValue;
133
134	fn decode_key(k: &Self::EncodedKey) -> Self::Key;
135	fn decode_value(v: &Self::EncodedValue) -> Self::Value;
136	}
137
138	/// This trait represents hash functions as used by HashTable and
139	/// HashTableOwned.
140	pub trait HashFn: Eq {
141	fn hash(bytes: &[u8]) -> u32;
142	}
143
144	/// A [HashTableOwned] keeps the underlying data on the heap and
145	/// can resize itself on demand.
146	#[derive(Clone)]
147	pub struct HashTableOwned<C: Config> {
148	allocation: memory_layout::Allocation<C, Box<[u8]>>,
149	}
150
151	impl<C: Config> Default for HashTableOwned<C> {
152	fn default() -> Self {
153	HashTableOwned::with_capacity(max_item_count:`12`, max_load_factor_percent:`87`)
154	}
155	}
156
157	impl<C: Config> HashTableOwned<C> {
158	/// Creates a new [HashTableOwned] that can hold at least `max_item_count`
159	/// items while maintaining the specified load factor.
160	pub fn with_capacity(max_item_count: usize, max_load_factor_percent: u8) -> HashTableOwned<C> {
161	assert!(max_load_factor_percent <= `100`);
162	assert!(max_load_factor_percent > `0`);
163
164	Self::with_capacity_internal(
165	max_item_count,
166	Factor::from_percent(max_load_factor_percent),
167	)
168	}
169
170	fn with_capacity_internal(max_item_count: usize, max_load_factor: Factor) -> HashTableOwned<C> {
171	let slots_needed = slots_needed(max_item_count, max_load_factor);
172	assert!(slots_needed > `0`);
173
174	let allocation = memory_layout::allocate(slots_needed, `0`, max_load_factor);
175
176	HashTableOwned { allocation }
177	}
178
179	/// Retrieves the value for the given key. Returns `None` if no entry is found.
180	#[inline]
181	pub fn get(&self, key: &C::Key) -> Option<C::Value> {
182	let encoded_key = C::encode_key(key);
183	if let Some(encoded_value) = self.as_raw().find(&encoded_key) {
184	Some(C::decode_value(encoded_value))
185	} else {
186	None
187	}
188	}
189
190	#[inline]
191	pub fn contains_key(&self, key: &C::Key) -> bool {
192	let encoded_key = C::encode_key(key);
193	self.as_raw().find(&encoded_key).is_some()
194	}
195
196	/// Inserts the given key-value pair into the table.
197	/// Grows the table if necessary.
198	#[inline]
199	pub fn insert(&mut self, key: &C::Key, value: &C::Value) -> Option<C::Value> {
200	let (item_count, max_item_count) = {
201	let header = self.allocation.header();
202	let max_item_count = max_item_count_for(header.slot_count(), header.max_load_factor());
203	(header.item_count(), max_item_count)
204	};
205
206	if unlikely!(item_count == max_item_count) {
207	self.grow();
208	}
209
210	debug_assert!(
211	item_count
212	< max_item_count_for(
213	self.allocation.header().slot_count(),
214	self.allocation.header().max_load_factor()
215	)
216	);
217
218	let encoded_key = C::encode_key(key);
219	let encoded_value = C::encode_value(value);
220
221	with_raw_mut(&mut self.allocation, \|header, mut raw_table\| {
222	if let Some(old_value) = raw_table.insert(encoded_key, encoded_value) {
223	Some(C::decode_value(&old_value))
224	} else {
225	header.set_item_count(item_count + `1`);
226	None
227	}
228	})
229	}
230
231	#[inline]
232	pub fn iter(&self) -> Iter<'_, C> {
233	let (entry_metadata, entry_data) = self.allocation.data_slices();
234	Iter(RawIter::new(entry_metadata, entry_data))
235	}
236
237	pub fn from_iterator<I: IntoIterator<Item = (C::Key, C::Value)>>(
238	it: I,
239	max_load_factor_percent: u8,
240	) -> Self {
241	let it = it.into_iter();
242
243	let known_size = match it.size_hint() {
244	(min, Some(max)) => {
245	if min == max {
246	Some(max)
247	} else {
248	None
249	}
250	}
251	_ => None,
252	};
253
254	if let Some(known_size) = known_size {
255	let mut table = HashTableOwned::with_capacity(known_size, max_load_factor_percent);
256
257	let initial_slot_count = table.allocation.header().slot_count();
258
259	for (k, v) in it {
260	table.insert(&k, &v);
261	}
262
263	// duplicates
264	assert!(table.len() <= known_size);
265	assert_eq!(table.allocation.header().slot_count(), initial_slot_count);
266
267	table
268	} else {
269	let items: Vec<_> = it.collect();
270	Self::from_iterator(items, max_load_factor_percent)
271	}
272	}
273
274	/// Constructs a [HashTableOwned] from its raw byte representation.
275	/// The provided data must have the exact right number of bytes.
276	///
277	/// This method has linear time complexity as it needs to make its own
278	/// copy of the given data.
279	///
280	/// The method will verify the header of the given data and return an
281	/// error if the verification fails.
282	pub fn from_raw_bytes(data: &[u8]) -> Result<HashTableOwned<C>, Box<dyn std::error::Error>> {
283	let data = data.to_owned().into_boxed_slice();
284	let allocation = memory_layout::Allocation::from_raw_bytes(data)?;
285
286	Ok(HashTableOwned { allocation })
287	}
288
289	#[inline]
290	pub unsafe fn from_raw_bytes_unchecked(data: &[u8]) -> HashTableOwned<C> {
291	let data = data.to_owned().into_boxed_slice();
292	let allocation = memory_layout::Allocation::from_raw_bytes_unchecked(data);
293
294	HashTableOwned { allocation }
295	}
296
297	/// Returns the number of items stored in the hash table.
298	#[inline]
299	pub fn len(&self) -> usize {
300	self.allocation.header().item_count()
301	}
302
303	#[inline]
304	pub fn raw_bytes(&self) -> &[u8] {
305	self.allocation.raw_bytes()
306	}
307
308	#[inline]
309	fn as_raw(&self) -> RawTable<'_, C::EncodedKey, C::EncodedValue, C::H> {
310	let (entry_metadata, entry_data) = self.allocation.data_slices();
311	RawTable::new(entry_metadata, entry_data)
312	}
313
314	#[inline(never)]
315	#[cold]
316	fn grow(&mut self) {
317	let initial_slot_count = self.allocation.header().slot_count();
318	let initial_item_count = self.allocation.header().item_count();
319	let initial_max_load_factor = self.allocation.header().max_load_factor();
320
321	let mut new_table =
322	Self::with_capacity_internal(initial_item_count * `2`, initial_max_load_factor);
323
324	// Copy the entries over with the internal `insert_entry()` method,
325	// which allows us to do insertions without hashing everything again.
326	{
327	with_raw_mut(&mut new_table.allocation, \|header, mut raw_table\| {
328	for (_, entry_data) in self.as_raw().iter() {
329	raw_table.insert(entry_data.key, entry_data.value);
330	}
331
332	header.set_item_count(initial_item_count);
333	});
334	}
335
336	*self = new_table;
337
338	assert!(
339	self.allocation.header().slot_count() >= `2` * initial_slot_count,
340	"Allocation did not grow properly. Slot count is {} but was expected to be \
341	at least {}",
342	self.allocation.header().slot_count(),
343	`2` * initial_slot_count
344	);
345	assert_eq!(self.allocation.header().item_count(), initial_item_count);
346	assert_eq!(
347	self.allocation.header().max_load_factor(),
348	initial_max_load_factor
349	);
350	}
351	}
352
353	impl<C: Config> std::fmt::Debug for HashTableOwned<C> {
354	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355	let header: &Header = self.allocation.header();
356
357	writeln!(
358	f,
359	"(item_count={}, max_item_count={}, max_load_factor={}%)",
360	header.item_count(),
361	max_item_count_for(header.slot_count(), header.max_load_factor()),
362	header.max_load_factor().to_percent(),
363	)?;
364
365	writeln!(f, "{:?}", self.as_raw())
366	}
367	}
368
369	/// The [HashTable] type provides a cheap way to construct a non-resizable view
370	/// of a persisted hash table. If the underlying data storage `D` implements
371	/// `BorrowMut<[u8]>` then the table can be modified in place.
372	#[derive(Clone, Copy)]
373	pub struct HashTable<C: Config, D: Borrow<[u8]>> {
374	allocation: memory_layout::Allocation<C, D>,
375	}
376
377	impl<C: Config, D: Borrow<[u8]>> HashTable<C, D> {
378	/// Constructs a [HashTable] from its raw byte representation.
379	/// The provided data must have the exact right number of bytes.
380	///
381	/// This method has constant time complexity and will only verify the header
382	/// data of the hash table. It will not copy any data.
383	pub fn from_raw_bytes(data: D) -> Result<HashTable<C, D>, Box<dyn std::error::Error>> {
384	let allocation = memory_layout::Allocation::from_raw_bytes(data)?;
385	Ok(HashTable { allocation })
386	}
387
388	/// Constructs a [HashTable] from its raw byte representation without doing
389	/// any verification of the underlying data. It is the user's responsibility
390	/// to make sure that the underlying data is actually a valid hash table.
391	///
392	/// The [HashTable::from_raw_bytes] method provides a safe alternative to this
393	/// method.
394	#[inline]
395	pub unsafe fn from_raw_bytes_unchecked(data: D) -> HashTable<C, D> {
396	HashTable {
397	allocation: memory_layout::Allocation::from_raw_bytes_unchecked(data),
398	}
399	}
400
401	#[inline]
402	pub fn get(&self, key: &C::Key) -> Option<C::Value> {
403	let encoded_key = C::encode_key(key);
404	self.as_raw().find(&encoded_key).map(C::decode_value)
405	}
406
407	#[inline]
408	pub fn contains_key(&self, key: &C::Key) -> bool {
409	let encoded_key = C::encode_key(key);
410	self.as_raw().find(&encoded_key).is_some()
411	}
412
413	#[inline]
414	pub fn iter(&self) -> Iter<'_, C> {
415	let (entry_metadata, entry_data) = self.allocation.data_slices();
416	Iter(RawIter::new(entry_metadata, entry_data))
417	}
418
419	/// Returns the number of items stored in the hash table.
420	#[inline]
421	pub fn len(&self) -> usize {
422	self.allocation.header().item_count()
423	}
424
425	#[inline]
426	pub fn raw_bytes(&self) -> &[u8] {
427	self.allocation.raw_bytes()
428	}
429
430	#[inline]
431	fn as_raw(&self) -> RawTable<'_, C::EncodedKey, C::EncodedValue, C::H> {
432	let (entry_metadata, entry_data) = self.allocation.data_slices();
433	RawTable::new(entry_metadata, entry_data)
434	}
435	}
436
437	impl<C: Config, D: Borrow<[u8]> + BorrowMut<[u8]>> HashTable<C, D> {
438	pub fn init_in_place(
439	mut data: D,
440	max_item_count: usize,
441	max_load_factor_percent: u8,
442	) -> Result<HashTable<C, D>, Box<dyn std::error::Error>> {
443	let max_load_factor = Factor::from_percent(max_load_factor_percent);
444	let byte_count = bytes_needed_internal::<C>(max_item_count, max_load_factor);
445	if data.borrow_mut().len() != byte_count {
446	return Err(Error(format!(
447	"byte slice to initialize has wrong length ({} instead of {})",
448	data.borrow_mut().len(),
449	byte_count
450	)))?;
451	}
452
453	let slot_count = slots_needed(max_item_count, max_load_factor);
454	let allocation = memory_layout::init_in_place::<C, _>(data, slot_count, `0`, max_load_factor);
455	Ok(HashTable { allocation })
456	}
457
458	/// Inserts the given key-value pair into the table.
459	/// Unlike [HashTableOwned::insert] this method cannot grow the underlying table
460	/// if there is not enough space for the new item. Instead the call will panic.
461	#[inline]
462	pub fn insert(&mut self, key: &C::Key, value: &C::Value) -> Option<C::Value> {
463	let item_count = self.allocation.header().item_count();
464	let max_load_factor = self.allocation.header().max_load_factor();
465	let slot_count = self.allocation.header().slot_count();
466	// FIXME: This is actually a bit to conservative because it does not account for
467	// cases where an entry is overwritten and thus the item count does not
468	// change.
469	assert!(item_count < max_item_count_for(slot_count, max_load_factor));
470
471	let encoded_key = C::encode_key(key);
472	let encoded_value = C::encode_value(value);
473
474	with_raw_mut(&mut self.allocation, \|header, mut raw_table\| {
475	if let Some(old_value) = raw_table.insert(encoded_key, encoded_value) {
476	Some(C::decode_value(&old_value))
477	} else {
478	header.set_item_count(item_count + `1`);
479	None
480	}
481	})
482	}
483	}
484
485	/// Computes the exact number of bytes needed for storing a HashTable with the
486	/// given max item count and load factor. The result can be used for allocating
487	/// storage to be passed into [HashTable::init_in_place].
488	pub fn bytes_needed<C: Config>(max_item_count: usize, max_load_factor_percent: u8) -> usize {
489	let max_load_factor: Factor = Factor::from_percent(max_load_factor_percent);
490	bytes_needed_internal::<C>(max_item_count, max_load_factor)
491	}
492
493	fn bytes_needed_internal<C: Config>(max_item_count: usize, max_load_factor: Factor) -> usize {
494	let slot_count: usize = slots_needed(max_item_count, max_load_factor);
495	memory_layout::bytes_needed::<C>(slot_count)
496	}
497
498	pub struct Iter<'a, C: Config>(RawIter<'a, C::EncodedKey, C::EncodedValue>);
499
500	impl<'a, C: Config> Iterator for Iter<'a, C> {
501	type Item = (C::Key, C::Value);
502
503	fn next(&mut self) -> Option<Self::Item> {
504	self.0.next().map(\|(_, entry: &'a Entry<::EncodedKey, …>)\| {
505	let key: ::Key = C::decode_key(&entry.key);
506	let value: ::Value = C::decode_value(&entry.value);
507
508	(key, value)
509	})
510	}
511	}
512
513	// We use integer math here as not to run into any issues with
514	// platform-specific floating point math implementation.
515	fn slots_needed(item_count: usize, max_load_factor: Factor) -> usize {
516	// Note: we round up here
517	let slots_needed: usize = max_load_factor.apply_inverse(item_count);
518	std::cmp::max(
519	v1:slots_needed.checked_next_power_of_two().unwrap(),
520	REFERENCE_GROUP_SIZE,
521	)
522	}
523
524	fn max_item_count_for(slot_count: usize, max_load_factor: Factor) -> usize {
525	// Note: we round down here
526	max_load_factor.apply(slot_count)
527	}
528
529	#[inline]
530	fn with_raw_mut<C, M, F, R>(allocation: &mut memory_layout::Allocation<C, M>, f: F) -> R
531	where
532	C: Config,
533	M: BorrowMut<[u8]>,
534	F: FnOnce(&mut Header, RawTableMut<'_, C::EncodedKey, C::EncodedValue, C::H>) -> R,
535	{
536	allocation.with_mut_parts(\|header: &mut Header, entry_metadata: &mut [u8], entry_data: &mut [Entry<::EncodedKey, …>]\| {
537	f(header, RawTableMut::new(entry_metadata, entry_data))
538	})
539	}
540
541	/// This type is used for computing max item counts for a given load factor
542	/// efficiently. We use integer math here so that things are the same on
543	/// all platforms and with all compiler settings.
544	#[derive(Debug, Clone, Copy, PartialEq, Eq)]
545	struct Factor(pub u16);
546
547	impl Factor {
548	const BASE: usize = u16::MAX as usize;
549
550	#[inline]
551	fn from_percent(percent: u8) -> Factor {
552	let percent = percent as usize;
553	Factor(((percent * Self::BASE) / `100`) as u16)
554	}
555
556	fn to_percent(self) -> usize {
557	(self.0 as usize * `100`) / Self::BASE
558	}
559
560	// Note: we round down here
561	#[inline]
562	fn apply(self, x: usize) -> usize {
563	// Let's make sure there's no overflow during the
564	// calculation below by doing everything with 128 bits.
565	let x = x as u128;
566	let factor = self.0 as u128;
567	((x * factor) >> `16`) as usize
568	}
569
570	// Note: we round up here
571	#[inline]
572	fn apply_inverse(self, x: usize) -> usize {
573	// Let's make sure there's no overflow during the
574	// calculation below by doing everything with 128 bits.
575	let x = x as u128;
576	let factor = self.0 as u128;
577	let base = Self::BASE as u128;
578	((base * x + factor - `1`) / factor) as usize
579	}
580	}
581
582	#[cfg(test)]
583	mod tests {
584	use super::*;
585	use std::convert::TryInto;
586
587	enum TestConfig {}
588
589	impl Config for TestConfig {
590	type EncodedKey = [u8; `4`];
591	type EncodedValue = [u8; `4`];
592
593	type Key = u32;
594	type Value = u32;
595
596	type H = FxHashFn;
597
598	fn encode_key(k: &Self::Key) -> Self::EncodedKey {
599	k.to_le_bytes()
600	}
601
602	fn encode_value(v: &Self::Value) -> Self::EncodedValue {
603	v.to_le_bytes()
604	}
605
606	fn decode_key(k: &Self::EncodedKey) -> Self::Key {
607	u32::from_le_bytes(k[..].try_into().unwrap())
608	}
609
610	fn decode_value(v: &Self::EncodedValue) -> Self::Value {
611	u32::from_le_bytes(v[..].try_into().unwrap())
612	}
613	}
614
615	fn make_test_items(count: usize) -> Vec<(u32, u32)> {
616	if count == `0` {
617	return vec![];
618	}
619
620	let mut items = vec![];
621
622	if count > `1` {
623	let steps = (count - `1`) as u32;
624	let step = u32::MAX / steps;
625
626	for i in `0`..steps {
627	let x = i * step;
628	items.push((x, u32::MAX - x));
629	}
630	}
631
632	items.push((u32::MAX, `0`));
633
634	items.sort();
635	items.dedup();
636	assert_eq!(items.len(), count);
637
638	items
639	}
640
641	#[test]
642	fn from_iterator() {
643	for count in `0`..`33` {
644	let items = make_test_items(count);
645	let table = HashTableOwned::<TestConfig>::from_iterator(items.clone(), `95`);
646	assert_eq!(table.len(), items.len());
647
648	let mut actual_items: Vec<_> = table.iter().collect();
649	actual_items.sort();
650
651	assert_eq!(items, actual_items);
652	}
653	}
654
655	#[test]
656	fn init_in_place() {
657	for count in `0`..`33` {
658	let items = make_test_items(count);
659	let byte_count = bytes_needed::<TestConfig>(items.len(), `87`);
660	let data = vec![`0u8`; byte_count];
661
662	let mut table =
663	HashTable::<TestConfig, _>::init_in_place(data, items.len(), `87`).unwrap();
664
665	for (i, (k, v)) in items.iter().enumerate() {
666	assert_eq!(table.len(), i);
667	assert_eq!(table.insert(k, v), None);
668	assert_eq!(table.len(), i + `1`);
669
670	// Make sure we still can find all items previously inserted.
671	for (k, v) in items.iter().take(i) {
672	assert_eq!(table.get(k), Some(*v));
673	}
674	}
675
676	let mut actual_items: Vec<_> = table.iter().collect();
677	actual_items.sort();
678
679	assert_eq!(items, actual_items);
680	}
681	}
682
683	#[test]
684	fn hash_table_at_different_alignments() {
685	let items = make_test_items(`33`);
686
687	let mut serialized = {
688	let table: HashTableOwned<TestConfig> =
689	HashTableOwned::from_iterator(items.clone(), `95`);
690
691	assert_eq!(table.len(), items.len());
692
693	table.raw_bytes().to_owned()
694	};
695
696	for alignment_shift in `0`..`4` {
697	let data = &serialized[alignment_shift..];
698
699	let table = HashTable::<TestConfig, _>::from_raw_bytes(data).unwrap();
700
701	assert_eq!(table.len(), items.len());
702
703	for (key, value) in items.iter() {
704	assert_eq!(table.get(key), Some(*value));
705	}
706
707	serialized.insert(`0`, `0xFFu8`);
708	}
709	}
710
711	#[test]
712	fn load_factor_and_item_count() {
713	assert_eq!(
714	slots_needed(`0`, Factor::from_percent(`100`)),
715	REFERENCE_GROUP_SIZE
716	);
717	assert_eq!(slots_needed(`6`, Factor::from_percent(`60`)), `16`);
718	assert_eq!(slots_needed(`5`, Factor::from_percent(`50`)), `16`);
719	assert_eq!(slots_needed(`5`, Factor::from_percent(`49`)), `16`);
720	assert_eq!(slots_needed(`1000`, Factor::from_percent(`100`)), `1024`);
721
722	// Factor cannot never be a full 100% because of the rounding involved.
723	assert_eq!(max_item_count_for(`10`, Factor::from_percent(`100`)), `9`);
724	assert_eq!(max_item_count_for(`10`, Factor::from_percent(`50`)), `4`);
725	assert_eq!(max_item_count_for(`11`, Factor::from_percent(`50`)), `5`);
726	assert_eq!(max_item_count_for(`12`, Factor::from_percent(`50`)), `5`);
727	}
728
729	#[test]
730	fn grow() {
731	let items = make_test_items(`100`);
732	let mut table = HashTableOwned::<TestConfig>::with_capacity(`10`, `87`);
733
734	for (key, value) in items.iter() {
735	assert_eq!(table.insert(key, value), None);
736	}
737	}
738
739	#[test]
740	fn factor_from_percent() {
741	assert_eq!(Factor::from_percent(`100`), Factor(u16::MAX));
742	assert_eq!(Factor::from_percent(`0`), Factor(`0`));
743	assert_eq!(Factor::from_percent(`50`), Factor(u16::MAX / `2`));
744	}
745
746	#[test]
747	fn factor_apply() {
748	assert_eq!(Factor::from_percent(`100`).apply(`12345`), `12344`);
749	assert_eq!(Factor::from_percent(`0`).apply(`12345`), `0`);
750	assert_eq!(Factor::from_percent(`50`).apply(`66`), `32`);
751
752	// Make sure we can handle large numbers without overflow
753	assert_basically_equal(Factor::from_percent(`100`).apply(usize::MAX), usize::MAX);
754	}
755
756	#[test]
757	fn factor_apply_inverse() {
758	assert_eq!(Factor::from_percent(`100`).apply_inverse(`12345`), `12345`);
759	assert_eq!(Factor::from_percent(`10`).apply_inverse(`100`), `1001`);
760	assert_eq!(Factor::from_percent(`50`).apply_inverse(`33`), `67`);
761
762	// // Make sure we can handle large numbers without overflow
763	assert_basically_equal(
764	Factor::from_percent(`100`).apply_inverse(usize::MAX),
765	usize::MAX,
766	);
767	}
768
769	fn assert_basically_equal(x: usize, y: usize) {
770	let larger_number = std::cmp::max(x, y) as f64;
771	let abs_difference = (x as f64 - y as f64).abs();
772	let difference_in_percent = (abs_difference / larger_number) * `100.0`;
773
774	const MAX_ALLOWED_DIFFERENCE_IN_PERCENT: f64 = `0.01`;
775
776	assert!(
777	difference_in_percent < MAX_ALLOWED_DIFFERENCE_IN_PERCENT,
778	"{} and {} differ by {:.4} percent but the maximally allowed difference \
779	is {:.2} percent. Large differences might be caused by integer overflow.",
780	x,
781	y,
782	difference_in_percent,
783	MAX_ALLOWED_DIFFERENCE_IN_PERCENT
784	);
785	}
786
787	mod quickchecks {
788	use super::*;
789	use crate::raw_table::ByteArray;
790	use quickcheck::{Arbitrary, Gen};
791	use rustc_hash::FxHashMap;
792
793	#[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
794	struct Bytes<const BYTE_COUNT: usize>([u8; BYTE_COUNT]);
795
796	impl<const L: usize> Arbitrary for Bytes<L> {
797	fn arbitrary(gen: &mut Gen) -> Self {
798	let mut xs = [`0`; L];
799	for x in xs.iter_mut() {
800	*x = u8::arbitrary(gen);
801	}
802	Bytes(xs)
803	}
804	}
805
806	impl<const L: usize> Default for Bytes<L> {
807	fn default() -> Self {
808	Bytes([`0`; L])
809	}
810	}
811
812	impl<const L: usize> ByteArray for Bytes<L> {
813	#[inline(always)]
814	fn zeroed() -> Self {
815	Bytes([`0u8`; L])
816	}
817
818	#[inline(always)]
819	fn as_slice(&self) -> &[u8] {
820	&self.0[..]
821	}
822
823	#[inline(always)]
824	fn equals(&self, other: &Self) -> bool {
825	self.as_slice() == other.as_slice()
826	}
827	}
828
829	macro_rules! mk_quick_tests {
830	($name: ident, $key_len:expr, $value_len:expr) => {
831	mod $name {
832	use super::*;
833	use quickcheck::quickcheck;
834
835	struct Cfg;
836
837	type Key = Bytes<$key_len>;
838	type Value = Bytes<$value_len>;
839
840	impl Config for Cfg {
841	type EncodedKey = Key;
842	type EncodedValue = Value;
843
844	type Key = Key;
845	type Value = Value;
846
847	type H = FxHashFn;
848
849	fn encode_key(k: &Self::Key) -> Self::EncodedKey {
850	*k
851	}
852
853	fn encode_value(v: &Self::Value) -> Self::EncodedValue {
854	*v
855	}
856
857	fn decode_key(k: &Self::EncodedKey) -> Self::Key {
858	*k
859	}
860
861	fn decode_value(v: &Self::EncodedValue) -> Self::Value {
862	*v
863	}
864	}
865
866	fn from_std_hashmap(m: &FxHashMap<Key, Value>) -> HashTableOwned<Cfg> {
867	HashTableOwned::<Cfg>::from_iterator(m.iter().map(\|(x, y)\| (x, y)), `87`)
868	}
869
870	quickcheck! {
871	fn len(xs: FxHashMap<Key, Value>) -> bool {
872	let table = from_std_hashmap(&xs);
873
874	xs.len() == table.len()
875	}
876	}
877
878	quickcheck! {
879	fn lookup(xs: FxHashMap<Key, Value>) -> bool {
880	let table = from_std_hashmap(&xs);
881	xs.iter().all(\|(k, v)\| table.get(k) == Some(*v))
882	}
883	}
884
885	quickcheck! {
886	fn insert_with_duplicates(xs: Vec<(Key, Value)>) -> bool {
887	let mut reference = FxHashMap::default();
888	let mut table = HashTableOwned::<Cfg>::default();
889
890	for (k, v) in xs {
891	let expected = reference.insert(k, v);
892	let actual = table.insert(&k, &v);
893
894	if expected != actual {
895	return `false`;
896	}
897	}
898
899	`true`
900	}
901	}
902
903	quickcheck! {
904	fn bytes_deterministic(xs: FxHashMap<Key, Value>) -> bool {
905	// NOTE: We only guarantee this given the exact same
906	// insertion order.
907	let table0 = from_std_hashmap(&xs);
908	let table1 = from_std_hashmap(&xs);
909
910	table0.raw_bytes() == table1.raw_bytes()
911	}
912	}
913
914	quickcheck! {
915	fn from_iterator_vs_manual_insertion(xs: Vec<(Key, Value)>) -> bool {
916	let mut table0 = HashTableOwned::<Cfg>::with_capacity(xs.len(), `87`);
917
918	for (k, v) in xs.iter() {
919	table0.insert(k, v);
920	}
921
922	let table1 = HashTableOwned::<Cfg>::from_iterator(xs.into_iter(), `87`);
923
924	// Requiring bit for bit equality might be a bit too much in this case,
925	// as long as it works ...
926	table0.raw_bytes() == table1.raw_bytes()
927	}
928	}
929	}
930	};
931	}
932
933	// Test zero sized key and values
934	mk_quick_tests!(k0_v0, `0`, `0`);
935	mk_quick_tests!(k1_v0, `1`, `0`);
936	mk_quick_tests!(k2_v0, `2`, `0`);
937	mk_quick_tests!(k3_v0, `3`, `0`);
938	mk_quick_tests!(k4_v0, `4`, `0`);
939	mk_quick_tests!(k8_v0, `8`, `0`);
940	mk_quick_tests!(k15_v0, `15`, `0`);
941	mk_quick_tests!(k16_v0, `16`, `0`);
942	mk_quick_tests!(k17_v0, `17`, `0`);
943	mk_quick_tests!(k63_v0, `63`, `0`);
944	mk_quick_tests!(k64_v0, `64`, `0`);
945
946	// Test a few different key sizes
947	mk_quick_tests!(k2_v4, `2`, `4`);
948	mk_quick_tests!(k4_v4, `4`, `4`);
949	mk_quick_tests!(k8_v4, `8`, `4`);
950	mk_quick_tests!(k17_v4, `17`, `4`);
951	mk_quick_tests!(k20_v4, `20`, `4`);
952	mk_quick_tests!(k64_v4, `64`, `4`);
953
954	// Test a few different value sizes
955	mk_quick_tests!(k16_v1, `16`, `1`);
956	mk_quick_tests!(k16_v2, `16`, `2`);
957	mk_quick_tests!(k16_v3, `16`, `3`);
958	mk_quick_tests!(k16_v4, `16`, `4`);
959	mk_quick_tests!(k16_v8, `16`, `8`);
960	mk_quick_tests!(k16_v16, `16`, `16`);
961	mk_quick_tests!(k16_v17, `16`, `17`);
962	}
963	}
964