1//! This module implements the actual hash table logic. It works solely with
2//! byte arrays and does not know anything about the unencoded key and value
3//! types.
4//!
5//! The implementation makes sure that the encoded data contains no padding
6//! bytes (which makes it deterministic) and nothing in it requires any specific
7//! alignment.
8//!
9//! Many functions in this module are marked as `#[inline]`. This is allows
10//! LLVM to retain the information about byte array sizes, even though they are
11//! converted to slices (of unknown size) from time to time.
12
13use crate::swisstable_group_query::{GroupQuery, GROUP_SIZE, REFERENCE_GROUP_SIZE};
14use crate::{error::Error, HashFn};
15use std::convert::TryInto;
16use std::{fmt, marker::PhantomData, mem::size_of};
17
18/// Values of this type represent key-value pairs *as they are stored on-disk*.
19/// `#[repr(C)]` makes sure we have deterministic field order and the fields
20/// being byte arrays makes sure that there are no padding bytes, alignment is
21/// not restricted, and the data is endian-independent.
22///
23/// It is a strict requirement that any `&[Entry<K, V>]` can be transmuted
24/// into a `&[u8]` and back, regardless of whether the byte array has been
25/// moved in the meantime.
26#[repr(C)]
27#[derive(PartialEq, Eq, Clone, Copy, Debug)]
28pub(crate) struct Entry<K: ByteArray, V: ByteArray> {
29 pub key: K,
30 pub value: V,
31}
32
33impl<K: ByteArray, V: ByteArray> Entry<K, V> {
34 #[inline]
35 fn new(key: K, value: V) -> Entry<K, V> {
36 Entry { key, value }
37 }
38}
39
40impl<K: ByteArray, V: ByteArray> Default for Entry<K, V> {
41 #[inline]
42 fn default() -> Entry<K, V> {
43 Entry {
44 key: K::zeroed(),
45 value: V::zeroed(),
46 }
47 }
48}
49
50impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTable<'a, K, V, H> {
51 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52 writeln!(
53 f,
54 "RawTable (slot_count={}, hash_fn={}) {{",
55 self.data.len(),
56 std::any::type_name::<H>(),
57 )?;
58 for (index: usize, (&metadata: u8, entry: &Entry)) in self.metadata.iter().zip(self.data.iter()).enumerate() {
59 if is_empty_or_deleted(control_byte:metadata) {
60 writeln!(f, "slot {}: empty", index)?;
61 } else {
62 writeln!(
63 f,
64 "slot {}: key={:?}, value={:?}, control_byte={}",
65 index, entry.key, entry.value, metadata
66 )?;
67 }
68 }
69 writeln!(f, "}}")
70 }
71}
72
73pub(crate) type EntryMetadata = u8;
74
75type HashValue = u32;
76
77#[inline]
78fn h1(h: HashValue) -> usize {
79 h as usize
80}
81
82#[inline]
83fn h2(h: HashValue) -> u8 {
84 const SHIFT: usize = size_of::<HashValue>() * 8 - 7;
85 (h >> SHIFT) as u8
86}
87
88/// This type implements the sequence in which slots are probed when resolving
89/// hash value conflicts. Note that probing is always done as if `GROUP_SIZE`
90/// was 16, even though on targets without sse2 `GROUP_SIZE` is going to be
91/// smaller. By keeping the probing pattern constant (i.e. always visiting
92/// the same slots in the same order, independently of `GROUP_SIZE`) we enable
93/// the hash table layout to be target-independent. In other words: A hash
94/// table created and persisted on a target with `GROUP_SIZE` x can always
95/// be loaded and read on a target with a different `GROUP_SIZE` y.
96struct ProbeSeq<const GROUP_SIZE: usize> {
97 index: usize,
98 base: usize,
99 chunk_index: usize,
100 stride: usize,
101}
102
103impl<const GROUP_SIZE: usize> ProbeSeq<GROUP_SIZE> {
104 #[inline]
105 fn new(h1: usize, mask: usize) -> Self {
106 let initial_index = h1 & mask;
107
108 ProbeSeq {
109 index: initial_index,
110 base: initial_index,
111 chunk_index: 0,
112 stride: 0,
113 }
114 }
115
116 #[inline]
117 fn advance(&mut self, mask: usize) {
118 debug_assert!(GROUP_SIZE <= REFERENCE_GROUP_SIZE);
119 debug_assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0);
120
121 // The effect of the code in the two branches is
122 // identical if GROUP_SIZE==REFERENCE_GROUP_SIZE
123 // but the if statement should make it very easy
124 // for the compiler to discard the more costly
125 // version and only emit the simplified version.
126
127 if GROUP_SIZE == REFERENCE_GROUP_SIZE {
128 self.stride += REFERENCE_GROUP_SIZE;
129 self.index += self.stride;
130 self.index &= mask;
131 } else {
132 self.chunk_index += GROUP_SIZE;
133
134 if self.chunk_index == REFERENCE_GROUP_SIZE {
135 self.chunk_index = 0;
136 self.stride += REFERENCE_GROUP_SIZE;
137 self.base += self.stride;
138 }
139
140 self.index = (self.base + self.chunk_index) & mask;
141 }
142 }
143}
144
145#[inline]
146fn group_starting_at<'a>(control_bytes: &'a [u8], index: usize) -> &'a [u8; GROUP_SIZE] {
147 debug_assert!(index < control_bytes.len() - GROUP_SIZE);
148 unsafe {
149 stdResult<&[u8; 8], TryFromSliceError>::slice::from_raw_parts(data:control_bytes.as_ptr().offset(index as isize), GROUP_SIZE)
150 .try_into()
151 .unwrap()
152 }
153}
154
155#[inline]
156fn is_empty_or_deleted(control_byte: u8) -> bool {
157 (control_byte & EMPTY_CONTROL_BYTE) != 0
158}
159
160const EMPTY_CONTROL_BYTE: u8 = 0b1000_0000;
161
162/// This type provides a readonly view of the given table data.
163#[derive(PartialEq, Eq)]
164pub(crate) struct RawTable<'a, K, V, H>
165where
166 K: ByteArray,
167 V: ByteArray,
168 H: HashFn,
169{
170 metadata: &'a [EntryMetadata],
171 data: &'a [Entry<K, V>],
172 _hash_fn: PhantomData<H>,
173}
174
175impl<'a, K, V, H> RawTable<'a, K, V, H>
176where
177 K: ByteArray,
178 V: ByteArray,
179 H: HashFn,
180{
181 #[inline]
182 pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> Self {
183 // Make sure Entry<K, V> does not contain any padding bytes and can be
184 // stored at arbitrary adresses.
185 assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
186 assert!(std::mem::align_of::<Entry<K, V>>() == 1);
187
188 debug_assert!(data.len().is_power_of_two());
189 debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
190
191 Self {
192 metadata,
193 data,
194 _hash_fn: PhantomData::default(),
195 }
196 }
197
198 #[inline]
199 pub(crate) fn find(&self, key: &K) -> Option<&V> {
200 debug_assert!(self.data.len().is_power_of_two());
201 debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
202
203 let mask = self.data.len() - 1;
204 let hash = H::hash(key.as_slice());
205 let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
206 let h2 = h2(hash);
207
208 loop {
209 let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
210
211 for m in &mut group_query {
212 let index = (ps.index + m) & mask;
213
214 let entry = entry_at(self.data, index);
215
216 if likely!(entry.key.equals(key)) {
217 return Some(&entry.value);
218 }
219 }
220
221 if likely!(group_query.any_empty()) {
222 return None;
223 }
224
225 ps.advance(mask);
226 }
227 }
228
229 #[inline]
230 pub(crate) fn iter(&'a self) -> RawIter<'a, K, V> {
231 RawIter::new(self.metadata, self.data)
232 }
233
234 /// Check (for the first `entries_to_check` entries) if the computed and
235 /// the stored hash value match.
236 ///
237 /// A mismatch is an indication that the table has been deserialized with
238 /// the wrong hash function.
239 pub(crate) fn sanity_check_hashes(&self, slots_to_check: usize) -> Result<(), Error> {
240 let slots_to_check = std::cmp::min(slots_to_check, self.data.len());
241
242 for i in 0..slots_to_check {
243 let metadata = self.metadata[i];
244 let entry = &self.data[i];
245
246 if is_empty_or_deleted(metadata) {
247 if !entry.key.is_all_zeros() || !entry.value.is_all_zeros() {
248 let message = format!("Found empty entry with non-zero contents at {}", i);
249
250 return Err(Error(message));
251 }
252 } else {
253 let hash = H::hash(entry.key.as_slice());
254 let h2 = h2(hash);
255
256 if metadata != h2 {
257 let message = format!(
258 "Control byte does not match hash value for entry {}. Expected {}, found {}.",
259 i, h2, metadata
260 );
261
262 return Err(Error(message));
263 }
264 }
265 }
266
267 Ok(())
268 }
269}
270
271#[inline]
272fn entry_at<K: ByteArray, V: ByteArray>(data: &[Entry<K, V>], index: usize) -> &Entry<K, V> {
273 debug_assert!(index < data.len());
274 unsafe { data.get_unchecked(index) }
275}
276
277#[inline]
278fn metadata_at(metadata: &[EntryMetadata], index: usize) -> &EntryMetadata {
279 debug_assert!(index < metadata.len());
280 unsafe { metadata.get_unchecked(index) }
281}
282
283/// This type provides a mutable view of the given table data. It allows for
284/// inserting new entries but does *not* allow for growing the table.
285#[derive(PartialEq, Eq)]
286pub(crate) struct RawTableMut<'a, K, V, H>
287where
288 K: ByteArray,
289 V: ByteArray,
290 H: HashFn,
291{
292 metadata: &'a mut [EntryMetadata],
293 data: &'a mut [Entry<K, V>],
294 _hash_fn: PhantomData<H>,
295}
296
297impl<'a, K, V, H> RawTableMut<'a, K, V, H>
298where
299 K: ByteArray,
300 V: ByteArray,
301 H: HashFn,
302{
303 #[inline]
304 pub(crate) fn new(metadata: &'a mut [EntryMetadata], data: &'a mut [Entry<K, V>]) -> Self {
305 // Make sure Entry<K, V> does not contain any padding bytes and can be
306 // stored at arbitrary adresses.
307 assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>());
308 assert!(std::mem::align_of::<Entry<K, V>>() == 1);
309
310 debug_assert!(data.len().is_power_of_two());
311 debug_assert_eq!(metadata.len(), data.len() + REFERENCE_GROUP_SIZE);
312
313 Self {
314 metadata,
315 data,
316 _hash_fn: PhantomData::default(),
317 }
318 }
319
320 /// Inserts the given key value pair into the hash table.
321 ///
322 /// WARNING: This method assumes that there is free space in the hash table
323 /// somewhere. If there isn't it will end up in an infinite loop.
324 #[inline]
325 pub(crate) fn insert(&mut self, key: K, value: V) -> Option<V> {
326 debug_assert!(self.data.len().is_power_of_two());
327 debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE);
328
329 let mask = self.data.len() - 1;
330 let hash = H::hash(key.as_slice());
331
332 let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask);
333 let h2 = h2(hash);
334
335 loop {
336 let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2);
337
338 for m in &mut group_query {
339 let index = (ps.index + m) & mask;
340
341 let entry = entry_at_mut(self.data, index);
342
343 if likely!(entry.key.equals(&key)) {
344 let ret = Some(entry.value);
345 entry.value = value;
346 return ret;
347 }
348 }
349
350 if let Some(first_empty) = group_query.first_empty() {
351 let index = (ps.index + first_empty) & mask;
352 *entry_at_mut(self.data, index) = Entry::new(key, value);
353 *metadata_at_mut(self.metadata, index) = h2;
354
355 if index < REFERENCE_GROUP_SIZE {
356 let first_mirror = self.data.len();
357 *metadata_at_mut(self.metadata, first_mirror + index) = h2;
358 debug_assert_eq!(
359 self.metadata[..REFERENCE_GROUP_SIZE],
360 self.metadata[self.data.len()..]
361 );
362 }
363
364 return None;
365 }
366
367 ps.advance(mask);
368 }
369 }
370}
371
372#[inline]
373fn entry_at_mut<K: ByteArray, V: ByteArray>(
374 data: &mut [Entry<K, V>],
375 index: usize,
376) -> &mut Entry<K, V> {
377 debug_assert!(index < data.len());
378 unsafe { data.get_unchecked_mut(index) }
379}
380
381#[inline]
382fn metadata_at_mut(metadata: &mut [EntryMetadata], index: usize) -> &mut EntryMetadata {
383 debug_assert!(index < metadata.len());
384 unsafe { metadata.get_unchecked_mut(index) }
385}
386
387impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTableMut<'a, K, V, H> {
388 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
389 let readonly: RawTable<'_, K, V, H> = RawTable::<'_, K, V, H>::new(self.metadata, self.data);
390 write!(f, "{:?}", readonly)
391 }
392}
393
394pub(crate) struct RawIter<'a, K, V>
395where
396 K: ByteArray,
397 V: ByteArray,
398{
399 metadata: &'a [EntryMetadata],
400 data: &'a [Entry<K, V>],
401 current_index: usize,
402}
403
404impl<'a, K, V> RawIter<'a, K, V>
405where
406 K: ByteArray,
407 V: ByteArray,
408{
409 pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> RawIter<'a, K, V> {
410 debug_assert!(data.len().is_power_of_two());
411 debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE);
412
413 RawIter {
414 metadata,
415 data,
416 current_index: 0,
417 }
418 }
419}
420
421impl<'a, K, V> Iterator for RawIter<'a, K, V>
422where
423 K: ByteArray,
424 V: ByteArray,
425{
426 type Item = (EntryMetadata, &'a Entry<K, V>);
427
428 fn next(&mut self) -> Option<Self::Item> {
429 loop {
430 if self.current_index >= self.data.len() {
431 return None;
432 }
433
434 let index: usize = self.current_index;
435
436 self.current_index += 1;
437
438 let entry_metadata: u8 = *metadata_at(self.metadata, index);
439 if !is_empty_or_deleted(control_byte:entry_metadata) {
440 return Some((entry_metadata, entry_at(self.data, index)));
441 }
442 }
443 }
444}
445
446/// A trait that lets us abstract over different lengths of fixed size byte
447/// arrays. Don't implement it for anything other than fixed size byte arrays!
448pub trait ByteArray: Sized + Copy + Eq + Clone + PartialEq + fmt::Debug + 'static {
449 fn zeroed() -> Self;
450 fn as_slice(&self) -> &[u8];
451 fn equals(&self, other: &Self) -> bool;
452 fn is_all_zeros(&self) -> bool {
453 self.as_slice().iter().all(|b: &u8| *b == 0)
454 }
455}
456
457impl<const LEN: usize> ByteArray for [u8; LEN] {
458 #[inline(always)]
459 fn zeroed() -> Self {
460 [0u8; LEN]
461 }
462
463 #[inline(always)]
464 fn as_slice(&self) -> &[u8] {
465 &self[..]
466 }
467
468 // This custom implementation of comparing the fixed size arrays
469 // seems make a big difference for performance (at least for
470 // 16+ byte keys)
471 #[inline]
472 fn equals(&self, other: &Self) -> bool {
473 // Most branches here are optimized away at compile time
474 // because they depend on values known at compile time.
475
476 const USIZE: usize = size_of::<usize>();
477
478 // Special case a few cases we care about
479 if size_of::<Self>() == USIZE {
480 return read_usize(&self[..], 0) == read_usize(&other[..], 0);
481 }
482
483 if size_of::<Self>() == USIZE * 2 {
484 return read_usize(&self[..], 0) == read_usize(&other[..], 0)
485 && read_usize(&self[..], 1) == read_usize(&other[..], 1);
486 }
487
488 if size_of::<Self>() == USIZE * 3 {
489 return read_usize(&self[..], 0) == read_usize(&other[..], 0)
490 && read_usize(&self[..], 1) == read_usize(&other[..], 1)
491 && read_usize(&self[..], 2) == read_usize(&other[..], 2);
492 }
493
494 if size_of::<Self>() == USIZE * 4 {
495 return read_usize(&self[..], 0) == read_usize(&other[..], 0)
496 && read_usize(&self[..], 1) == read_usize(&other[..], 1)
497 && read_usize(&self[..], 2) == read_usize(&other[..], 2)
498 && read_usize(&self[..], 3) == read_usize(&other[..], 3);
499 }
500
501 // fallback
502 return &self[..] == &other[..];
503
504 #[inline(always)]
505 fn read_usize(bytes: &[u8], index: usize) -> usize {
506 const STRIDE: usize = size_of::<usize>();
507 usize::from_le_bytes(
508 bytes[STRIDE * index..STRIDE * (index + 1)]
509 .try_into()
510 .unwrap(),
511 )
512 }
513 }
514}
515
516#[cfg(test)]
517#[rustfmt::skip]
518mod tests {
519 use super::*;
520 use crate::FxHashFn;
521
522 fn make_table<
523 I: Iterator<Item = (K, V)> + ExactSizeIterator,
524 K: ByteArray,
525 V: ByteArray,
526 H: HashFn,
527 >(
528 xs: I,
529 ) -> (Vec<EntryMetadata>, Vec<Entry<K, V>>) {
530 let size = xs.size_hint().0.next_power_of_two();
531 let mut data = vec![Entry::default(); size];
532 let mut metadata = vec![255; size + REFERENCE_GROUP_SIZE];
533
534 assert!(metadata.iter().all(|b| is_empty_or_deleted(*b)));
535
536 {
537 let mut table: RawTableMut<K, V, H> = RawTableMut {
538 metadata: &mut metadata[..],
539 data: &mut data[..],
540 _hash_fn: Default::default(),
541 };
542
543 for (k, v) in xs {
544 table.insert(k, v);
545 }
546 }
547
548 (metadata, data)
549 }
550
551 #[test]
552 fn stress() {
553 let xs: Vec<[u8; 2]> = (0 ..= u16::MAX).map(|x| x.to_le_bytes()).collect();
554
555 let (mut metadata, mut data) = make_table::<_, _, _, FxHashFn>(xs.iter().map(|x| (*x, *x)));
556
557 {
558 let table: RawTable<_, _, FxHashFn> = RawTable {
559 metadata: &metadata[..],
560 data: &data[..],
561 _hash_fn: PhantomData::default(),
562 };
563
564 for x in xs.iter() {
565 assert_eq!(table.find(x), Some(x));
566 }
567 }
568
569 // overwrite all values
570 {
571 let mut table: RawTableMut<_, _, FxHashFn> = RawTableMut {
572 metadata: &mut metadata[..],
573 data: &mut data[..],
574 _hash_fn: PhantomData::default(),
575 };
576
577 for (i, x) in xs.iter().enumerate() {
578 assert_eq!(table.insert(*x, [i as u8, (i + 1) as u8]), Some(*x));
579 }
580 }
581
582 // Check if we find the new expected values
583 {
584 let table: RawTable<_, _, FxHashFn> = RawTable {
585 metadata: &metadata[..],
586 data: &data[..],
587 _hash_fn: PhantomData::default(),
588 };
589
590 for (i, x) in xs.iter().enumerate() {
591 assert_eq!(table.find(x), Some(&[i as u8, (i + 1) as u8]));
592 }
593 }
594 }
595
596 // This test makes sure that ProbeSeq will always visit the same slots
597 // in the same order, regardless of the actual GROUP_SIZE.
598 #[test]
599 fn probe_seq() {
600 struct ReferenceProbeSeq {
601 index: usize,
602 stride: usize,
603 }
604
605 impl ReferenceProbeSeq {
606 fn new(h1: usize, mask: usize) -> ReferenceProbeSeq {
607 ReferenceProbeSeq {
608 index: h1 & mask,
609 stride: 0,
610 }
611 }
612
613 fn advance(&mut self, mask: usize) {
614 self.stride += REFERENCE_GROUP_SIZE;
615 self.index += self.stride;
616 self.index &= mask;
617 }
618 }
619
620 fn test_with_group_size<const GROUP_SIZE: usize>() {
621 assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0);
622 assert!(REFERENCE_GROUP_SIZE >= GROUP_SIZE);
623
624 for i in 4 .. 17 {
625 let item_count = 1 << i;
626 assert!(item_count % REFERENCE_GROUP_SIZE == 0);
627 assert!(item_count % GROUP_SIZE == 0);
628
629 let mask = item_count - 1;
630
631 let mut expected = Vec::with_capacity(item_count);
632
633 let mut refseq = ReferenceProbeSeq::new(0, mask);
634 for _ in 0 .. item_count / REFERENCE_GROUP_SIZE {
635 for index in refseq.index .. refseq.index + REFERENCE_GROUP_SIZE {
636 expected.push(index & mask);
637 }
638 refseq.advance(mask);
639 }
640
641 let mut actual = Vec::with_capacity(item_count);
642
643 let mut seq = ProbeSeq::<GROUP_SIZE>::new(0, mask);
644 for _ in 0 .. item_count / GROUP_SIZE {
645 for index in seq.index .. seq.index + GROUP_SIZE {
646 actual.push(index & mask);
647 }
648 seq.advance(mask);
649 }
650
651 assert_eq!(expected, actual);
652 }
653 }
654
655 test_with_group_size::<4>();
656 test_with_group_size::<8>();
657 test_with_group_size::<16>();
658 }
659}
660