1//! This crate implements a hash table that can be used as is in its binary, on-disk format.
2//! The goal is to provide a high performance data structure that can be used without any significant up-front decoding.
3//! The implementation makes no assumptions about alignment or endianess of the underlying data,
4//! so a table encoded on one platform can be used on any other platform and
5//! the binary data can be mapped into memory at arbitrary addresses.
6//!
7//!
8//! ## Usage
9//!
10//! In order to use the hash table one needs to implement the `Config` trait.
11//! This trait defines how the table is encoded and what hash function is used.
12//! With a `Config` in place the `HashTableOwned` type can be used to build and serialize a hash table.
13//! The `HashTable` type can then be used to create an almost zero-cost view of the serialized hash table.
14//!
15//! ```rust
16//!
17//! use odht::{HashTable, HashTableOwned, Config, FxHashFn};
18//!
19//! struct MyConfig;
20//!
21//! impl Config for MyConfig {
22//!
23//! type Key = u64;
24//! type Value = u32;
25//!
26//! type EncodedKey = [u8; 8];
27//! type EncodedValue = [u8; 4];
28//!
29//! type H = FxHashFn;
30//!
31//! #[inline] fn encode_key(k: &Self::Key) -> Self::EncodedKey { k.to_le_bytes() }
32//! #[inline] fn encode_value(v: &Self::Value) -> Self::EncodedValue { v.to_le_bytes() }
33//! #[inline] fn decode_key(k: &Self::EncodedKey) -> Self::Key { u64::from_le_bytes(*k) }
34//! #[inline] fn decode_value(v: &Self::EncodedValue) -> Self::Value { u32::from_le_bytes(*v)}
35//! }
36//!
37//! fn main() {
38//! let mut builder = HashTableOwned::<MyConfig>::with_capacity(3, 95);
39//!
40//! builder.insert(&1, &2);
41//! builder.insert(&3, &4);
42//! builder.insert(&5, &6);
43//!
44//! let serialized = builder.raw_bytes().to_owned();
45//!
46//! let table = HashTable::<MyConfig, &[u8]>::from_raw_bytes(
47//! &serialized[..]
48//! ).unwrap();
49//!
50//! assert_eq!(table.get(&1), Some(2));
51//! assert_eq!(table.get(&3), Some(4));
52//! assert_eq!(table.get(&5), Some(6));
53//! }
54//! ```
55
56#![cfg_attr(feature = "nightly", feature(core_intrinsics))]
57
58#[cfg(test)]
59extern crate quickcheck;
60
61#[cfg(feature = "nightly")]
62macro_rules! likely {
63 ($x:expr) => {
64 std::intrinsics::likely($x)
65 };
66}
67
68#[cfg(not(feature = "nightly"))]
69macro_rules! likely {
70 ($x:expr) => {
71 $x
72 };
73}
74
75#[cfg(feature = "nightly")]
76macro_rules! unlikely {
77 ($x:expr) => {
78 std::intrinsics::unlikely($x)
79 };
80}
81
82#[cfg(not(feature = "nightly"))]
83macro_rules! unlikely {
84 ($x:expr) => {
85 $x
86 };
87}
88
89mod error;
90mod fxhash;
91mod memory_layout;
92mod raw_table;
93mod swisstable_group_query;
94mod unhash;
95
96use error::Error;
97use memory_layout::Header;
98use std::borrow::{Borrow, BorrowMut};
99use swisstable_group_query::REFERENCE_GROUP_SIZE;
100
101pub use crate::fxhash::FxHashFn;
102pub use crate::unhash::UnHashFn;
103
104use crate::raw_table::{ByteArray, RawIter, RawTable, RawTableMut};
105
106/// This trait provides a complete "configuration" for a hash table, i.e. it
107/// defines the key and value types, how these are encoded and what hash
108/// function is being used.
109///
110/// Implementations of the `encode_key` and `encode_value` methods must encode
111/// the given key/value into a fixed size array. The encoding must be
112/// deterministic (i.e. no random padding bytes) and must be independent of
113/// platform endianess. It is always highly recommended to mark these methods
114/// as `#[inline]`.
115pub trait Config {
116 type Key;
117 type Value;
118
119 // The EncodedKey and EncodedValue types must always be a fixed size array of bytes,
120 // e.g. [u8; 4].
121 type EncodedKey: ByteArray;
122 type EncodedValue: ByteArray;
123
124 type H: HashFn;
125
126 /// Implementations of the `encode_key` and `encode_value` methods must encode
127 /// the given key/value into a fixed size array. See above for requirements.
128 fn encode_key(k: &Self::Key) -> Self::EncodedKey;
129
130 /// Implementations of the `encode_key` and `encode_value` methods must encode
131 /// the given key/value into a fixed size array. See above for requirements.
132 fn encode_value(v: &Self::Value) -> Self::EncodedValue;
133
134 fn decode_key(k: &Self::EncodedKey) -> Self::Key;
135 fn decode_value(v: &Self::EncodedValue) -> Self::Value;
136}
137
138/// This trait represents hash functions as used by HashTable and
139/// HashTableOwned.
140pub trait HashFn: Eq {
141 fn hash(bytes: &[u8]) -> u32;
142}
143
144/// A [HashTableOwned] keeps the underlying data on the heap and
145/// can resize itself on demand.
146#[derive(Clone)]
147pub struct HashTableOwned<C: Config> {
148 allocation: memory_layout::Allocation<C, Box<[u8]>>,
149}
150
151impl<C: Config> Default for HashTableOwned<C> {
152 fn default() -> Self {
153 HashTableOwned::with_capacity(max_item_count:12, max_load_factor_percent:87)
154 }
155}
156
157impl<C: Config> HashTableOwned<C> {
158 /// Creates a new [HashTableOwned] that can hold at least `max_item_count`
159 /// items while maintaining the specified load factor.
160 pub fn with_capacity(max_item_count: usize, max_load_factor_percent: u8) -> HashTableOwned<C> {
161 assert!(max_load_factor_percent <= 100);
162 assert!(max_load_factor_percent > 0);
163
164 Self::with_capacity_internal(
165 max_item_count,
166 Factor::from_percent(max_load_factor_percent),
167 )
168 }
169
170 fn with_capacity_internal(max_item_count: usize, max_load_factor: Factor) -> HashTableOwned<C> {
171 let slots_needed = slots_needed(max_item_count, max_load_factor);
172 assert!(slots_needed > 0);
173
174 let allocation = memory_layout::allocate(slots_needed, 0, max_load_factor);
175
176 HashTableOwned { allocation }
177 }
178
179 /// Retrieves the value for the given key. Returns `None` if no entry is found.
180 #[inline]
181 pub fn get(&self, key: &C::Key) -> Option<C::Value> {
182 let encoded_key = C::encode_key(key);
183 if let Some(encoded_value) = self.as_raw().find(&encoded_key) {
184 Some(C::decode_value(encoded_value))
185 } else {
186 None
187 }
188 }
189
190 #[inline]
191 pub fn contains_key(&self, key: &C::Key) -> bool {
192 let encoded_key = C::encode_key(key);
193 self.as_raw().find(&encoded_key).is_some()
194 }
195
196 /// Inserts the given key-value pair into the table.
197 /// Grows the table if necessary.
198 #[inline]
199 pub fn insert(&mut self, key: &C::Key, value: &C::Value) -> Option<C::Value> {
200 let (item_count, max_item_count) = {
201 let header = self.allocation.header();
202 let max_item_count = max_item_count_for(header.slot_count(), header.max_load_factor());
203 (header.item_count(), max_item_count)
204 };
205
206 if unlikely!(item_count == max_item_count) {
207 self.grow();
208 }
209
210 debug_assert!(
211 item_count
212 < max_item_count_for(
213 self.allocation.header().slot_count(),
214 self.allocation.header().max_load_factor()
215 )
216 );
217
218 let encoded_key = C::encode_key(key);
219 let encoded_value = C::encode_value(value);
220
221 with_raw_mut(&mut self.allocation, |header, mut raw_table| {
222 if let Some(old_value) = raw_table.insert(encoded_key, encoded_value) {
223 Some(C::decode_value(&old_value))
224 } else {
225 header.set_item_count(item_count + 1);
226 None
227 }
228 })
229 }
230
231 #[inline]
232 pub fn iter(&self) -> Iter<'_, C> {
233 let (entry_metadata, entry_data) = self.allocation.data_slices();
234 Iter(RawIter::new(entry_metadata, entry_data))
235 }
236
237 pub fn from_iterator<I: IntoIterator<Item = (C::Key, C::Value)>>(
238 it: I,
239 max_load_factor_percent: u8,
240 ) -> Self {
241 let it = it.into_iter();
242
243 let known_size = match it.size_hint() {
244 (min, Some(max)) => {
245 if min == max {
246 Some(max)
247 } else {
248 None
249 }
250 }
251 _ => None,
252 };
253
254 if let Some(known_size) = known_size {
255 let mut table = HashTableOwned::with_capacity(known_size, max_load_factor_percent);
256
257 let initial_slot_count = table.allocation.header().slot_count();
258
259 for (k, v) in it {
260 table.insert(&k, &v);
261 }
262
263 // duplicates
264 assert!(table.len() <= known_size);
265 assert_eq!(table.allocation.header().slot_count(), initial_slot_count);
266
267 table
268 } else {
269 let items: Vec<_> = it.collect();
270 Self::from_iterator(items, max_load_factor_percent)
271 }
272 }
273
274 /// Constructs a [HashTableOwned] from its raw byte representation.
275 /// The provided data must have the exact right number of bytes.
276 ///
277 /// This method has linear time complexity as it needs to make its own
278 /// copy of the given data.
279 ///
280 /// The method will verify the header of the given data and return an
281 /// error if the verification fails.
282 pub fn from_raw_bytes(data: &[u8]) -> Result<HashTableOwned<C>, Box<dyn std::error::Error>> {
283 let data = data.to_owned().into_boxed_slice();
284 let allocation = memory_layout::Allocation::from_raw_bytes(data)?;
285
286 Ok(HashTableOwned { allocation })
287 }
288
289 #[inline]
290 pub unsafe fn from_raw_bytes_unchecked(data: &[u8]) -> HashTableOwned<C> {
291 let data = data.to_owned().into_boxed_slice();
292 let allocation = memory_layout::Allocation::from_raw_bytes_unchecked(data);
293
294 HashTableOwned { allocation }
295 }
296
297 /// Returns the number of items stored in the hash table.
298 #[inline]
299 pub fn len(&self) -> usize {
300 self.allocation.header().item_count()
301 }
302
303 #[inline]
304 pub fn raw_bytes(&self) -> &[u8] {
305 self.allocation.raw_bytes()
306 }
307
308 #[inline]
309 fn as_raw(&self) -> RawTable<'_, C::EncodedKey, C::EncodedValue, C::H> {
310 let (entry_metadata, entry_data) = self.allocation.data_slices();
311 RawTable::new(entry_metadata, entry_data)
312 }
313
314 #[inline(never)]
315 #[cold]
316 fn grow(&mut self) {
317 let initial_slot_count = self.allocation.header().slot_count();
318 let initial_item_count = self.allocation.header().item_count();
319 let initial_max_load_factor = self.allocation.header().max_load_factor();
320
321 let mut new_table =
322 Self::with_capacity_internal(initial_item_count * 2, initial_max_load_factor);
323
324 // Copy the entries over with the internal `insert_entry()` method,
325 // which allows us to do insertions without hashing everything again.
326 {
327 with_raw_mut(&mut new_table.allocation, |header, mut raw_table| {
328 for (_, entry_data) in self.as_raw().iter() {
329 raw_table.insert(entry_data.key, entry_data.value);
330 }
331
332 header.set_item_count(initial_item_count);
333 });
334 }
335
336 *self = new_table;
337
338 assert!(
339 self.allocation.header().slot_count() >= 2 * initial_slot_count,
340 "Allocation did not grow properly. Slot count is {} but was expected to be \
341 at least {}",
342 self.allocation.header().slot_count(),
343 2 * initial_slot_count
344 );
345 assert_eq!(self.allocation.header().item_count(), initial_item_count);
346 assert_eq!(
347 self.allocation.header().max_load_factor(),
348 initial_max_load_factor
349 );
350 }
351}
352
353impl<C: Config> std::fmt::Debug for HashTableOwned<C> {
354 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
355 let header: &Header = self.allocation.header();
356
357 writeln!(
358 f,
359 "(item_count={}, max_item_count={}, max_load_factor={}%)",
360 header.item_count(),
361 max_item_count_for(header.slot_count(), header.max_load_factor()),
362 header.max_load_factor().to_percent(),
363 )?;
364
365 writeln!(f, "{:?}", self.as_raw())
366 }
367}
368
369/// The [HashTable] type provides a cheap way to construct a non-resizable view
370/// of a persisted hash table. If the underlying data storage `D` implements
371/// `BorrowMut<[u8]>` then the table can be modified in place.
372#[derive(Clone, Copy)]
373pub struct HashTable<C: Config, D: Borrow<[u8]>> {
374 allocation: memory_layout::Allocation<C, D>,
375}
376
377impl<C: Config, D: Borrow<[u8]>> HashTable<C, D> {
378 /// Constructs a [HashTable] from its raw byte representation.
379 /// The provided data must have the exact right number of bytes.
380 ///
381 /// This method has constant time complexity and will only verify the header
382 /// data of the hash table. It will not copy any data.
383 pub fn from_raw_bytes(data: D) -> Result<HashTable<C, D>, Box<dyn std::error::Error>> {
384 let allocation = memory_layout::Allocation::from_raw_bytes(data)?;
385 Ok(HashTable { allocation })
386 }
387
388 /// Constructs a [HashTable] from its raw byte representation without doing
389 /// any verification of the underlying data. It is the user's responsibility
390 /// to make sure that the underlying data is actually a valid hash table.
391 ///
392 /// The [HashTable::from_raw_bytes] method provides a safe alternative to this
393 /// method.
394 #[inline]
395 pub unsafe fn from_raw_bytes_unchecked(data: D) -> HashTable<C, D> {
396 HashTable {
397 allocation: memory_layout::Allocation::from_raw_bytes_unchecked(data),
398 }
399 }
400
401 #[inline]
402 pub fn get(&self, key: &C::Key) -> Option<C::Value> {
403 let encoded_key = C::encode_key(key);
404 self.as_raw().find(&encoded_key).map(C::decode_value)
405 }
406
407 #[inline]
408 pub fn contains_key(&self, key: &C::Key) -> bool {
409 let encoded_key = C::encode_key(key);
410 self.as_raw().find(&encoded_key).is_some()
411 }
412
413 #[inline]
414 pub fn iter(&self) -> Iter<'_, C> {
415 let (entry_metadata, entry_data) = self.allocation.data_slices();
416 Iter(RawIter::new(entry_metadata, entry_data))
417 }
418
419 /// Returns the number of items stored in the hash table.
420 #[inline]
421 pub fn len(&self) -> usize {
422 self.allocation.header().item_count()
423 }
424
425 #[inline]
426 pub fn raw_bytes(&self) -> &[u8] {
427 self.allocation.raw_bytes()
428 }
429
430 #[inline]
431 fn as_raw(&self) -> RawTable<'_, C::EncodedKey, C::EncodedValue, C::H> {
432 let (entry_metadata, entry_data) = self.allocation.data_slices();
433 RawTable::new(entry_metadata, entry_data)
434 }
435}
436
437impl<C: Config, D: Borrow<[u8]> + BorrowMut<[u8]>> HashTable<C, D> {
438 pub fn init_in_place(
439 mut data: D,
440 max_item_count: usize,
441 max_load_factor_percent: u8,
442 ) -> Result<HashTable<C, D>, Box<dyn std::error::Error>> {
443 let max_load_factor = Factor::from_percent(max_load_factor_percent);
444 let byte_count = bytes_needed_internal::<C>(max_item_count, max_load_factor);
445 if data.borrow_mut().len() != byte_count {
446 return Err(Error(format!(
447 "byte slice to initialize has wrong length ({} instead of {})",
448 data.borrow_mut().len(),
449 byte_count
450 )))?;
451 }
452
453 let slot_count = slots_needed(max_item_count, max_load_factor);
454 let allocation = memory_layout::init_in_place::<C, _>(data, slot_count, 0, max_load_factor);
455 Ok(HashTable { allocation })
456 }
457
458 /// Inserts the given key-value pair into the table.
459 /// Unlike [HashTableOwned::insert] this method cannot grow the underlying table
460 /// if there is not enough space for the new item. Instead the call will panic.
461 #[inline]
462 pub fn insert(&mut self, key: &C::Key, value: &C::Value) -> Option<C::Value> {
463 let item_count = self.allocation.header().item_count();
464 let max_load_factor = self.allocation.header().max_load_factor();
465 let slot_count = self.allocation.header().slot_count();
466 // FIXME: This is actually a bit to conservative because it does not account for
467 // cases where an entry is overwritten and thus the item count does not
468 // change.
469 assert!(item_count < max_item_count_for(slot_count, max_load_factor));
470
471 let encoded_key = C::encode_key(key);
472 let encoded_value = C::encode_value(value);
473
474 with_raw_mut(&mut self.allocation, |header, mut raw_table| {
475 if let Some(old_value) = raw_table.insert(encoded_key, encoded_value) {
476 Some(C::decode_value(&old_value))
477 } else {
478 header.set_item_count(item_count + 1);
479 None
480 }
481 })
482 }
483}
484
485/// Computes the exact number of bytes needed for storing a HashTable with the
486/// given max item count and load factor. The result can be used for allocating
487/// storage to be passed into [HashTable::init_in_place].
488pub fn bytes_needed<C: Config>(max_item_count: usize, max_load_factor_percent: u8) -> usize {
489 let max_load_factor: Factor = Factor::from_percent(max_load_factor_percent);
490 bytes_needed_internal::<C>(max_item_count, max_load_factor)
491}
492
493fn bytes_needed_internal<C: Config>(max_item_count: usize, max_load_factor: Factor) -> usize {
494 let slot_count: usize = slots_needed(max_item_count, max_load_factor);
495 memory_layout::bytes_needed::<C>(slot_count)
496}
497
498pub struct Iter<'a, C: Config>(RawIter<'a, C::EncodedKey, C::EncodedValue>);
499
500impl<'a, C: Config> Iterator for Iter<'a, C> {
501 type Item = (C::Key, C::Value);
502
503 fn next(&mut self) -> Option<Self::Item> {
504 self.0.next().map(|(_, entry: &Entry<::EncodedKey, …>)| {
505 let key: ::Key = C::decode_key(&entry.key);
506 let value: ::Value = C::decode_value(&entry.value);
507
508 (key, value)
509 })
510 }
511}
512
513// We use integer math here as not to run into any issues with
514// platform-specific floating point math implementation.
515fn slots_needed(item_count: usize, max_load_factor: Factor) -> usize {
516 // Note: we round up here
517 let slots_needed: usize = max_load_factor.apply_inverse(item_count);
518 std::cmp::max(
519 v1:slots_needed.checked_next_power_of_two().unwrap(),
520 REFERENCE_GROUP_SIZE,
521 )
522}
523
524fn max_item_count_for(slot_count: usize, max_load_factor: Factor) -> usize {
525 // Note: we round down here
526 max_load_factor.apply(slot_count)
527}
528
529#[inline]
530fn with_raw_mut<C, M, F, R>(allocation: &mut memory_layout::Allocation<C, M>, f: F) -> R
531where
532 C: Config,
533 M: BorrowMut<[u8]>,
534 F: FnOnce(&mut Header, RawTableMut<'_, C::EncodedKey, C::EncodedValue, C::H>) -> R,
535{
536 allocation.with_mut_parts(|header: &mut Header, entry_metadata: &mut [u8], entry_data: &mut [Entry<::EncodedKey, …>]| {
537 f(header, RawTableMut::new(entry_metadata, entry_data))
538 })
539}
540
541/// This type is used for computing max item counts for a given load factor
542/// efficiently. We use integer math here so that things are the same on
543/// all platforms and with all compiler settings.
544#[derive(Debug, Clone, Copy, PartialEq, Eq)]
545struct Factor(pub u16);
546
547impl Factor {
548 const BASE: usize = u16::MAX as usize;
549
550 #[inline]
551 fn from_percent(percent: u8) -> Factor {
552 let percent = percent as usize;
553 Factor(((percent * Self::BASE) / 100) as u16)
554 }
555
556 fn to_percent(self) -> usize {
557 (self.0 as usize * 100) / Self::BASE
558 }
559
560 // Note: we round down here
561 #[inline]
562 fn apply(self, x: usize) -> usize {
563 // Let's make sure there's no overflow during the
564 // calculation below by doing everything with 128 bits.
565 let x = x as u128;
566 let factor = self.0 as u128;
567 ((x * factor) >> 16) as usize
568 }
569
570 // Note: we round up here
571 #[inline]
572 fn apply_inverse(self, x: usize) -> usize {
573 // Let's make sure there's no overflow during the
574 // calculation below by doing everything with 128 bits.
575 let x = x as u128;
576 let factor = self.0 as u128;
577 let base = Self::BASE as u128;
578 ((base * x + factor - 1) / factor) as usize
579 }
580}
581
582#[cfg(test)]
583mod tests {
584 use super::*;
585 use std::convert::TryInto;
586
587 enum TestConfig {}
588
589 impl Config for TestConfig {
590 type EncodedKey = [u8; 4];
591 type EncodedValue = [u8; 4];
592
593 type Key = u32;
594 type Value = u32;
595
596 type H = FxHashFn;
597
598 fn encode_key(k: &Self::Key) -> Self::EncodedKey {
599 k.to_le_bytes()
600 }
601
602 fn encode_value(v: &Self::Value) -> Self::EncodedValue {
603 v.to_le_bytes()
604 }
605
606 fn decode_key(k: &Self::EncodedKey) -> Self::Key {
607 u32::from_le_bytes(k[..].try_into().unwrap())
608 }
609
610 fn decode_value(v: &Self::EncodedValue) -> Self::Value {
611 u32::from_le_bytes(v[..].try_into().unwrap())
612 }
613 }
614
615 fn make_test_items(count: usize) -> Vec<(u32, u32)> {
616 if count == 0 {
617 return vec![];
618 }
619
620 let mut items = vec![];
621
622 if count > 1 {
623 let steps = (count - 1) as u32;
624 let step = u32::MAX / steps;
625
626 for i in 0..steps {
627 let x = i * step;
628 items.push((x, u32::MAX - x));
629 }
630 }
631
632 items.push((u32::MAX, 0));
633
634 items.sort();
635 items.dedup();
636 assert_eq!(items.len(), count);
637
638 items
639 }
640
641 #[test]
642 fn from_iterator() {
643 for count in 0..33 {
644 let items = make_test_items(count);
645 let table = HashTableOwned::<TestConfig>::from_iterator(items.clone(), 95);
646 assert_eq!(table.len(), items.len());
647
648 let mut actual_items: Vec<_> = table.iter().collect();
649 actual_items.sort();
650
651 assert_eq!(items, actual_items);
652 }
653 }
654
655 #[test]
656 fn init_in_place() {
657 for count in 0..33 {
658 let items = make_test_items(count);
659 let byte_count = bytes_needed::<TestConfig>(items.len(), 87);
660 let data = vec![0u8; byte_count];
661
662 let mut table =
663 HashTable::<TestConfig, _>::init_in_place(data, items.len(), 87).unwrap();
664
665 for (i, (k, v)) in items.iter().enumerate() {
666 assert_eq!(table.len(), i);
667 assert_eq!(table.insert(k, v), None);
668 assert_eq!(table.len(), i + 1);
669
670 // Make sure we still can find all items previously inserted.
671 for (k, v) in items.iter().take(i) {
672 assert_eq!(table.get(k), Some(*v));
673 }
674 }
675
676 let mut actual_items: Vec<_> = table.iter().collect();
677 actual_items.sort();
678
679 assert_eq!(items, actual_items);
680 }
681 }
682
683 #[test]
684 fn hash_table_at_different_alignments() {
685 let items = make_test_items(33);
686
687 let mut serialized = {
688 let table: HashTableOwned<TestConfig> =
689 HashTableOwned::from_iterator(items.clone(), 95);
690
691 assert_eq!(table.len(), items.len());
692
693 table.raw_bytes().to_owned()
694 };
695
696 for alignment_shift in 0..4 {
697 let data = &serialized[alignment_shift..];
698
699 let table = HashTable::<TestConfig, _>::from_raw_bytes(data).unwrap();
700
701 assert_eq!(table.len(), items.len());
702
703 for (key, value) in items.iter() {
704 assert_eq!(table.get(key), Some(*value));
705 }
706
707 serialized.insert(0, 0xFFu8);
708 }
709 }
710
711 #[test]
712 fn load_factor_and_item_count() {
713 assert_eq!(
714 slots_needed(0, Factor::from_percent(100)),
715 REFERENCE_GROUP_SIZE
716 );
717 assert_eq!(slots_needed(6, Factor::from_percent(60)), 16);
718 assert_eq!(slots_needed(5, Factor::from_percent(50)), 16);
719 assert_eq!(slots_needed(5, Factor::from_percent(49)), 16);
720 assert_eq!(slots_needed(1000, Factor::from_percent(100)), 1024);
721
722 // Factor cannot never be a full 100% because of the rounding involved.
723 assert_eq!(max_item_count_for(10, Factor::from_percent(100)), 9);
724 assert_eq!(max_item_count_for(10, Factor::from_percent(50)), 4);
725 assert_eq!(max_item_count_for(11, Factor::from_percent(50)), 5);
726 assert_eq!(max_item_count_for(12, Factor::from_percent(50)), 5);
727 }
728
729 #[test]
730 fn grow() {
731 let items = make_test_items(100);
732 let mut table = HashTableOwned::<TestConfig>::with_capacity(10, 87);
733
734 for (key, value) in items.iter() {
735 assert_eq!(table.insert(key, value), None);
736 }
737 }
738
739 #[test]
740 fn factor_from_percent() {
741 assert_eq!(Factor::from_percent(100), Factor(u16::MAX));
742 assert_eq!(Factor::from_percent(0), Factor(0));
743 assert_eq!(Factor::from_percent(50), Factor(u16::MAX / 2));
744 }
745
746 #[test]
747 fn factor_apply() {
748 assert_eq!(Factor::from_percent(100).apply(12345), 12344);
749 assert_eq!(Factor::from_percent(0).apply(12345), 0);
750 assert_eq!(Factor::from_percent(50).apply(66), 32);
751
752 // Make sure we can handle large numbers without overflow
753 assert_basically_equal(Factor::from_percent(100).apply(usize::MAX), usize::MAX);
754 }
755
756 #[test]
757 fn factor_apply_inverse() {
758 assert_eq!(Factor::from_percent(100).apply_inverse(12345), 12345);
759 assert_eq!(Factor::from_percent(10).apply_inverse(100), 1001);
760 assert_eq!(Factor::from_percent(50).apply_inverse(33), 67);
761
762 // // Make sure we can handle large numbers without overflow
763 assert_basically_equal(
764 Factor::from_percent(100).apply_inverse(usize::MAX),
765 usize::MAX,
766 );
767 }
768
769 fn assert_basically_equal(x: usize, y: usize) {
770 let larger_number = std::cmp::max(x, y) as f64;
771 let abs_difference = (x as f64 - y as f64).abs();
772 let difference_in_percent = (abs_difference / larger_number) * 100.0;
773
774 const MAX_ALLOWED_DIFFERENCE_IN_PERCENT: f64 = 0.01;
775
776 assert!(
777 difference_in_percent < MAX_ALLOWED_DIFFERENCE_IN_PERCENT,
778 "{} and {} differ by {:.4} percent but the maximally allowed difference \
779 is {:.2} percent. Large differences might be caused by integer overflow.",
780 x,
781 y,
782 difference_in_percent,
783 MAX_ALLOWED_DIFFERENCE_IN_PERCENT
784 );
785 }
786
787 mod quickchecks {
788 use super::*;
789 use crate::raw_table::ByteArray;
790 use quickcheck::{Arbitrary, Gen};
791 use rustc_hash::FxHashMap;
792
793 #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug)]
794 struct Bytes<const BYTE_COUNT: usize>([u8; BYTE_COUNT]);
795
796 impl<const L: usize> Arbitrary for Bytes<L> {
797 fn arbitrary(gen: &mut Gen) -> Self {
798 let mut xs = [0; L];
799 for x in xs.iter_mut() {
800 *x = u8::arbitrary(gen);
801 }
802 Bytes(xs)
803 }
804 }
805
806 impl<const L: usize> Default for Bytes<L> {
807 fn default() -> Self {
808 Bytes([0; L])
809 }
810 }
811
812 impl<const L: usize> ByteArray for Bytes<L> {
813 #[inline(always)]
814 fn zeroed() -> Self {
815 Bytes([0u8; L])
816 }
817
818 #[inline(always)]
819 fn as_slice(&self) -> &[u8] {
820 &self.0[..]
821 }
822
823 #[inline(always)]
824 fn equals(&self, other: &Self) -> bool {
825 self.as_slice() == other.as_slice()
826 }
827 }
828
829 macro_rules! mk_quick_tests {
830 ($name: ident, $key_len:expr, $value_len:expr) => {
831 mod $name {
832 use super::*;
833 use quickcheck::quickcheck;
834
835 struct Cfg;
836
837 type Key = Bytes<$key_len>;
838 type Value = Bytes<$value_len>;
839
840 impl Config for Cfg {
841 type EncodedKey = Key;
842 type EncodedValue = Value;
843
844 type Key = Key;
845 type Value = Value;
846
847 type H = FxHashFn;
848
849 fn encode_key(k: &Self::Key) -> Self::EncodedKey {
850 *k
851 }
852
853 fn encode_value(v: &Self::Value) -> Self::EncodedValue {
854 *v
855 }
856
857 fn decode_key(k: &Self::EncodedKey) -> Self::Key {
858 *k
859 }
860
861 fn decode_value(v: &Self::EncodedValue) -> Self::Value {
862 *v
863 }
864 }
865
866 fn from_std_hashmap(m: &FxHashMap<Key, Value>) -> HashTableOwned<Cfg> {
867 HashTableOwned::<Cfg>::from_iterator(m.iter().map(|(x, y)| (*x, *y)), 87)
868 }
869
870 quickcheck! {
871 fn len(xs: FxHashMap<Key, Value>) -> bool {
872 let table = from_std_hashmap(&xs);
873
874 xs.len() == table.len()
875 }
876 }
877
878 quickcheck! {
879 fn lookup(xs: FxHashMap<Key, Value>) -> bool {
880 let table = from_std_hashmap(&xs);
881 xs.iter().all(|(k, v)| table.get(k) == Some(*v))
882 }
883 }
884
885 quickcheck! {
886 fn insert_with_duplicates(xs: Vec<(Key, Value)>) -> bool {
887 let mut reference = FxHashMap::default();
888 let mut table = HashTableOwned::<Cfg>::default();
889
890 for (k, v) in xs {
891 let expected = reference.insert(k, v);
892 let actual = table.insert(&k, &v);
893
894 if expected != actual {
895 return false;
896 }
897 }
898
899 true
900 }
901 }
902
903 quickcheck! {
904 fn bytes_deterministic(xs: FxHashMap<Key, Value>) -> bool {
905 // NOTE: We only guarantee this given the exact same
906 // insertion order.
907 let table0 = from_std_hashmap(&xs);
908 let table1 = from_std_hashmap(&xs);
909
910 table0.raw_bytes() == table1.raw_bytes()
911 }
912 }
913
914 quickcheck! {
915 fn from_iterator_vs_manual_insertion(xs: Vec<(Key, Value)>) -> bool {
916 let mut table0 = HashTableOwned::<Cfg>::with_capacity(xs.len(), 87);
917
918 for (k, v) in xs.iter() {
919 table0.insert(k, v);
920 }
921
922 let table1 = HashTableOwned::<Cfg>::from_iterator(xs.into_iter(), 87);
923
924 // Requiring bit for bit equality might be a bit too much in this case,
925 // as long as it works ...
926 table0.raw_bytes() == table1.raw_bytes()
927 }
928 }
929 }
930 };
931 }
932
933 // Test zero sized key and values
934 mk_quick_tests!(k0_v0, 0, 0);
935 mk_quick_tests!(k1_v0, 1, 0);
936 mk_quick_tests!(k2_v0, 2, 0);
937 mk_quick_tests!(k3_v0, 3, 0);
938 mk_quick_tests!(k4_v0, 4, 0);
939 mk_quick_tests!(k8_v0, 8, 0);
940 mk_quick_tests!(k15_v0, 15, 0);
941 mk_quick_tests!(k16_v0, 16, 0);
942 mk_quick_tests!(k17_v0, 17, 0);
943 mk_quick_tests!(k63_v0, 63, 0);
944 mk_quick_tests!(k64_v0, 64, 0);
945
946 // Test a few different key sizes
947 mk_quick_tests!(k2_v4, 2, 4);
948 mk_quick_tests!(k4_v4, 4, 4);
949 mk_quick_tests!(k8_v4, 8, 4);
950 mk_quick_tests!(k17_v4, 17, 4);
951 mk_quick_tests!(k20_v4, 20, 4);
952 mk_quick_tests!(k64_v4, 64, 4);
953
954 // Test a few different value sizes
955 mk_quick_tests!(k16_v1, 16, 1);
956 mk_quick_tests!(k16_v2, 16, 2);
957 mk_quick_tests!(k16_v3, 16, 3);
958 mk_quick_tests!(k16_v4, 16, 4);
959 mk_quick_tests!(k16_v8, 16, 8);
960 mk_quick_tests!(k16_v16, 16, 16);
961 mk_quick_tests!(k16_v17, 16, 17);
962 }
963}
964