| 1 | //! This module implements the actual hash table logic. It works solely with |
|---|---|
| 2 | //! byte arrays and does not know anything about the unencoded key and value |
| 3 | //! types. |
| 4 | //! |
| 5 | //! The implementation makes sure that the encoded data contains no padding |
| 6 | //! bytes (which makes it deterministic) and nothing in it requires any specific |
| 7 | //! alignment. |
| 8 | //! |
| 9 | //! Many functions in this module are marked as `#[inline]`. This is allows |
| 10 | //! LLVM to retain the information about byte array sizes, even though they are |
| 11 | //! converted to slices (of unknown size) from time to time. |
| 12 | |
| 13 | use crate::swisstable_group_query::{GroupQuery, GROUP_SIZE, REFERENCE_GROUP_SIZE}; |
| 14 | use crate::{error::Error, HashFn}; |
| 15 | use std::convert::TryInto; |
| 16 | use std::{fmt, marker::PhantomData, mem::size_of}; |
| 17 | |
| 18 | /// Values of this type represent key-value pairs *as they are stored on-disk*. |
| 19 | /// `#[repr(C)]` makes sure we have deterministic field order and the fields |
| 20 | /// being byte arrays makes sure that there are no padding bytes, alignment is |
| 21 | /// not restricted, and the data is endian-independent. |
| 22 | /// |
| 23 | /// It is a strict requirement that any `&[Entry<K, V>]` can be transmuted |
| 24 | /// into a `&[u8]` and back, regardless of whether the byte array has been |
| 25 | /// moved in the meantime. |
| 26 | #[repr(C)] |
| 27 | #[derive(PartialEq, Eq, Clone, Copy, Debug)] |
| 28 | pub(crate) struct Entry<K: ByteArray, V: ByteArray> { |
| 29 | pub key: K, |
| 30 | pub value: V, |
| 31 | } |
| 32 | |
| 33 | impl<K: ByteArray, V: ByteArray> Entry<K, V> { |
| 34 | #[inline] |
| 35 | fn new(key: K, value: V) -> Entry<K, V> { |
| 36 | Entry { key, value } |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | impl<K: ByteArray, V: ByteArray> Default for Entry<K, V> { |
| 41 | #[inline] |
| 42 | fn default() -> Entry<K, V> { |
| 43 | Entry { |
| 44 | key: K::zeroed(), |
| 45 | value: V::zeroed(), |
| 46 | } |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTable<'a, K, V, H> { |
| 51 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 52 | writeln!( |
| 53 | f, |
| 54 | "RawTable (slot_count={} , hash_fn={} ){{ ", |
| 55 | self.data.len(), |
| 56 | std::any::type_name::<H>(), |
| 57 | )?; |
| 58 | for (index: usize, (&metadata: u8, entry: &Entry |
| 59 | if is_empty_or_deleted(control_byte:metadata) { |
| 60 | writeln!(f, "slot{} : empty", index)?; |
| 61 | } else { |
| 62 | writeln!( |
| 63 | f, |
| 64 | "slot{} : key={:?} , value={:?} , control_byte={} ", |
| 65 | index, entry.key, entry.value, metadata |
| 66 | )?; |
| 67 | } |
| 68 | } |
| 69 | writeln!(f, "}} ") |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | pub(crate) type EntryMetadata = u8; |
| 74 | |
| 75 | type HashValue = u32; |
| 76 | |
| 77 | #[inline] |
| 78 | fn h1(h: HashValue) -> usize { |
| 79 | h as usize |
| 80 | } |
| 81 | |
| 82 | #[inline] |
| 83 | fn h2(h: HashValue) -> u8 { |
| 84 | const SHIFT: usize = size_of::<HashValue>() * 8 - 7; |
| 85 | (h >> SHIFT) as u8 |
| 86 | } |
| 87 | |
| 88 | /// This type implements the sequence in which slots are probed when resolving |
| 89 | /// hash value conflicts. Note that probing is always done as if `GROUP_SIZE` |
| 90 | /// was 16, even though on targets without sse2 `GROUP_SIZE` is going to be |
| 91 | /// smaller. By keeping the probing pattern constant (i.e. always visiting |
| 92 | /// the same slots in the same order, independently of `GROUP_SIZE`) we enable |
| 93 | /// the hash table layout to be target-independent. In other words: A hash |
| 94 | /// table created and persisted on a target with `GROUP_SIZE` x can always |
| 95 | /// be loaded and read on a target with a different `GROUP_SIZE` y. |
| 96 | struct ProbeSeq<const GROUP_SIZE: usize> { |
| 97 | index: usize, |
| 98 | base: usize, |
| 99 | chunk_index: usize, |
| 100 | stride: usize, |
| 101 | } |
| 102 | |
| 103 | impl<const GROUP_SIZE: usize> ProbeSeq<GROUP_SIZE> { |
| 104 | #[inline] |
| 105 | fn new(h1: usize, mask: usize) -> Self { |
| 106 | let initial_index = h1 & mask; |
| 107 | |
| 108 | ProbeSeq { |
| 109 | index: initial_index, |
| 110 | base: initial_index, |
| 111 | chunk_index: 0, |
| 112 | stride: 0, |
| 113 | } |
| 114 | } |
| 115 | |
| 116 | #[inline] |
| 117 | fn advance(&mut self, mask: usize) { |
| 118 | debug_assert!(GROUP_SIZE <= REFERENCE_GROUP_SIZE); |
| 119 | debug_assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0); |
| 120 | |
| 121 | // The effect of the code in the two branches is |
| 122 | // identical if GROUP_SIZE==REFERENCE_GROUP_SIZE |
| 123 | // but the if statement should make it very easy |
| 124 | // for the compiler to discard the more costly |
| 125 | // version and only emit the simplified version. |
| 126 | |
| 127 | if GROUP_SIZE == REFERENCE_GROUP_SIZE { |
| 128 | self.stride += REFERENCE_GROUP_SIZE; |
| 129 | self.index += self.stride; |
| 130 | self.index &= mask; |
| 131 | } else { |
| 132 | self.chunk_index += GROUP_SIZE; |
| 133 | |
| 134 | if self.chunk_index == REFERENCE_GROUP_SIZE { |
| 135 | self.chunk_index = 0; |
| 136 | self.stride += REFERENCE_GROUP_SIZE; |
| 137 | self.base += self.stride; |
| 138 | } |
| 139 | |
| 140 | self.index = (self.base + self.chunk_index) & mask; |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | #[inline] |
| 146 | fn group_starting_at<'a>(control_bytes: &'a [u8], index: usize) -> &'a [u8; GROUP_SIZE] { |
| 147 | debug_assert!(index < control_bytes.len() - GROUP_SIZE); |
| 148 | unsafe { |
| 149 | stdResult<&'a [u8; 16], TryFromSliceError>::slice::from_raw_parts(data:control_bytes.as_ptr().offset(index as isize), GROUP_SIZE) |
| 150 | .try_into() |
| 151 | .unwrap() |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | #[inline] |
| 156 | fn is_empty_or_deleted(control_byte: u8) -> bool { |
| 157 | (control_byte & EMPTY_CONTROL_BYTE) != 0 |
| 158 | } |
| 159 | |
| 160 | const EMPTY_CONTROL_BYTE: u8 = 0b1000_0000; |
| 161 | |
| 162 | /// This type provides a readonly view of the given table data. |
| 163 | #[derive(PartialEq, Eq)] |
| 164 | pub(crate) struct RawTable<'a, K, V, H> |
| 165 | where |
| 166 | K: ByteArray, |
| 167 | V: ByteArray, |
| 168 | H: HashFn, |
| 169 | { |
| 170 | metadata: &'a [EntryMetadata], |
| 171 | data: &'a [Entry<K, V>], |
| 172 | _hash_fn: PhantomData<H>, |
| 173 | } |
| 174 | |
| 175 | impl<'a, K, V, H> RawTable<'a, K, V, H> |
| 176 | where |
| 177 | K: ByteArray, |
| 178 | V: ByteArray, |
| 179 | H: HashFn, |
| 180 | { |
| 181 | #[inline] |
| 182 | pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> Self { |
| 183 | // Make sure Entry<K, V> does not contain any padding bytes and can be |
| 184 | // stored at arbitrary adresses. |
| 185 | assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>()); |
| 186 | assert!(std::mem::align_of::<Entry<K, V>>() == 1); |
| 187 | |
| 188 | debug_assert!(data.len().is_power_of_two()); |
| 189 | debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE); |
| 190 | |
| 191 | Self { |
| 192 | metadata, |
| 193 | data, |
| 194 | _hash_fn: PhantomData::default(), |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | #[inline] |
| 199 | pub(crate) fn find(&self, key: &K) -> Option<&V> { |
| 200 | debug_assert!(self.data.len().is_power_of_two()); |
| 201 | debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE); |
| 202 | |
| 203 | let mask = self.data.len() - 1; |
| 204 | let hash = H::hash(key.as_slice()); |
| 205 | let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask); |
| 206 | let h2 = h2(hash); |
| 207 | |
| 208 | loop { |
| 209 | let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2); |
| 210 | |
| 211 | for m in &mut group_query { |
| 212 | let index = (ps.index + m) & mask; |
| 213 | |
| 214 | let entry = entry_at(self.data, index); |
| 215 | |
| 216 | if likely!(entry.key.equals(key)) { |
| 217 | return Some(&entry.value); |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | if likely!(group_query.any_empty()) { |
| 222 | return None; |
| 223 | } |
| 224 | |
| 225 | ps.advance(mask); |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | #[inline] |
| 230 | pub(crate) fn iter(&'a self) -> RawIter<'a, K, V> { |
| 231 | RawIter::new(self.metadata, self.data) |
| 232 | } |
| 233 | |
| 234 | /// Check (for the first `entries_to_check` entries) if the computed and |
| 235 | /// the stored hash value match. |
| 236 | /// |
| 237 | /// A mismatch is an indication that the table has been deserialized with |
| 238 | /// the wrong hash function. |
| 239 | pub(crate) fn sanity_check_hashes(&self, slots_to_check: usize) -> Result<(), Error> { |
| 240 | let slots_to_check = std::cmp::min(slots_to_check, self.data.len()); |
| 241 | |
| 242 | for i in 0..slots_to_check { |
| 243 | let metadata = self.metadata[i]; |
| 244 | let entry = &self.data[i]; |
| 245 | |
| 246 | if is_empty_or_deleted(metadata) { |
| 247 | if !entry.key.is_all_zeros() || !entry.value.is_all_zeros() { |
| 248 | let message = format!("Found empty entry with non-zero contents at{} ", i); |
| 249 | |
| 250 | return Err(Error(message)); |
| 251 | } |
| 252 | } else { |
| 253 | let hash = H::hash(entry.key.as_slice()); |
| 254 | let h2 = h2(hash); |
| 255 | |
| 256 | if metadata != h2 { |
| 257 | let message = format!( |
| 258 | "Control byte does not match hash value for entry{} . Expected{} , found{} .", |
| 259 | i, h2, metadata |
| 260 | ); |
| 261 | |
| 262 | return Err(Error(message)); |
| 263 | } |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | Ok(()) |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | #[inline] |
| 272 | fn entry_at<K: ByteArray, V: ByteArray>(data: &[Entry<K, V>], index: usize) -> &Entry<K, V> { |
| 273 | debug_assert!(index < data.len()); |
| 274 | unsafe { data.get_unchecked(index) } |
| 275 | } |
| 276 | |
| 277 | #[inline] |
| 278 | fn metadata_at(metadata: &[EntryMetadata], index: usize) -> &EntryMetadata { |
| 279 | debug_assert!(index < metadata.len()); |
| 280 | unsafe { metadata.get_unchecked(index) } |
| 281 | } |
| 282 | |
| 283 | /// This type provides a mutable view of the given table data. It allows for |
| 284 | /// inserting new entries but does *not* allow for growing the table. |
| 285 | #[derive(PartialEq, Eq)] |
| 286 | pub(crate) struct RawTableMut<'a, K, V, H> |
| 287 | where |
| 288 | K: ByteArray, |
| 289 | V: ByteArray, |
| 290 | H: HashFn, |
| 291 | { |
| 292 | metadata: &'a mut [EntryMetadata], |
| 293 | data: &'a mut [Entry<K, V>], |
| 294 | _hash_fn: PhantomData<H>, |
| 295 | } |
| 296 | |
| 297 | impl<'a, K, V, H> RawTableMut<'a, K, V, H> |
| 298 | where |
| 299 | K: ByteArray, |
| 300 | V: ByteArray, |
| 301 | H: HashFn, |
| 302 | { |
| 303 | #[inline] |
| 304 | pub(crate) fn new(metadata: &'a mut [EntryMetadata], data: &'a mut [Entry<K, V>]) -> Self { |
| 305 | // Make sure Entry<K, V> does not contain any padding bytes and can be |
| 306 | // stored at arbitrary adresses. |
| 307 | assert!(size_of::<Entry<K, V>>() == size_of::<K>() + size_of::<V>()); |
| 308 | assert!(std::mem::align_of::<Entry<K, V>>() == 1); |
| 309 | |
| 310 | debug_assert!(data.len().is_power_of_two()); |
| 311 | debug_assert_eq!(metadata.len(), data.len() + REFERENCE_GROUP_SIZE); |
| 312 | |
| 313 | Self { |
| 314 | metadata, |
| 315 | data, |
| 316 | _hash_fn: PhantomData::default(), |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | /// Inserts the given key value pair into the hash table. |
| 321 | /// |
| 322 | /// WARNING: This method assumes that there is free space in the hash table |
| 323 | /// somewhere. If there isn't it will end up in an infinite loop. |
| 324 | #[inline] |
| 325 | pub(crate) fn insert(&mut self, key: K, value: V) -> Option<V> { |
| 326 | debug_assert!(self.data.len().is_power_of_two()); |
| 327 | debug_assert!(self.metadata.len() == self.data.len() + REFERENCE_GROUP_SIZE); |
| 328 | |
| 329 | let mask = self.data.len() - 1; |
| 330 | let hash = H::hash(key.as_slice()); |
| 331 | |
| 332 | let mut ps = ProbeSeq::<GROUP_SIZE>::new(h1(hash), mask); |
| 333 | let h2 = h2(hash); |
| 334 | |
| 335 | loop { |
| 336 | let mut group_query = GroupQuery::from(group_starting_at(self.metadata, ps.index), h2); |
| 337 | |
| 338 | for m in &mut group_query { |
| 339 | let index = (ps.index + m) & mask; |
| 340 | |
| 341 | let entry = entry_at_mut(self.data, index); |
| 342 | |
| 343 | if likely!(entry.key.equals(&key)) { |
| 344 | let ret = Some(entry.value); |
| 345 | entry.value = value; |
| 346 | return ret; |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | if let Some(first_empty) = group_query.first_empty() { |
| 351 | let index = (ps.index + first_empty) & mask; |
| 352 | *entry_at_mut(self.data, index) = Entry::new(key, value); |
| 353 | *metadata_at_mut(self.metadata, index) = h2; |
| 354 | |
| 355 | if index < REFERENCE_GROUP_SIZE { |
| 356 | let first_mirror = self.data.len(); |
| 357 | *metadata_at_mut(self.metadata, first_mirror + index) = h2; |
| 358 | debug_assert_eq!( |
| 359 | self.metadata[..REFERENCE_GROUP_SIZE], |
| 360 | self.metadata[self.data.len()..] |
| 361 | ); |
| 362 | } |
| 363 | |
| 364 | return None; |
| 365 | } |
| 366 | |
| 367 | ps.advance(mask); |
| 368 | } |
| 369 | } |
| 370 | } |
| 371 | |
| 372 | #[inline] |
| 373 | fn entry_at_mut<K: ByteArray, V: ByteArray>( |
| 374 | data: &mut [Entry<K, V>], |
| 375 | index: usize, |
| 376 | ) -> &mut Entry<K, V> { |
| 377 | debug_assert!(index < data.len()); |
| 378 | unsafe { data.get_unchecked_mut(index) } |
| 379 | } |
| 380 | |
| 381 | #[inline] |
| 382 | fn metadata_at_mut(metadata: &mut [EntryMetadata], index: usize) -> &mut EntryMetadata { |
| 383 | debug_assert!(index < metadata.len()); |
| 384 | unsafe { metadata.get_unchecked_mut(index) } |
| 385 | } |
| 386 | |
| 387 | impl<'a, K: ByteArray, V: ByteArray, H: HashFn> fmt::Debug for RawTableMut<'a, K, V, H> { |
| 388 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 389 | let readonly: RawTable<'_, K, V, H> = RawTable::<'_, K, V, H>::new(self.metadata, self.data); |
| 390 | write!(f, "{:?} ", readonly) |
| 391 | } |
| 392 | } |
| 393 | |
| 394 | pub(crate) struct RawIter<'a, K, V> |
| 395 | where |
| 396 | K: ByteArray, |
| 397 | V: ByteArray, |
| 398 | { |
| 399 | metadata: &'a [EntryMetadata], |
| 400 | data: &'a [Entry<K, V>], |
| 401 | current_index: usize, |
| 402 | } |
| 403 | |
| 404 | impl<'a, K, V> RawIter<'a, K, V> |
| 405 | where |
| 406 | K: ByteArray, |
| 407 | V: ByteArray, |
| 408 | { |
| 409 | pub(crate) fn new(metadata: &'a [EntryMetadata], data: &'a [Entry<K, V>]) -> RawIter<'a, K, V> { |
| 410 | debug_assert!(data.len().is_power_of_two()); |
| 411 | debug_assert!(metadata.len() == data.len() + REFERENCE_GROUP_SIZE); |
| 412 | |
| 413 | RawIter { |
| 414 | metadata, |
| 415 | data, |
| 416 | current_index: 0, |
| 417 | } |
| 418 | } |
| 419 | } |
| 420 | |
| 421 | impl<'a, K, V> Iterator for RawIter<'a, K, V> |
| 422 | where |
| 423 | K: ByteArray, |
| 424 | V: ByteArray, |
| 425 | { |
| 426 | type Item = (EntryMetadata, &'a Entry<K, V>); |
| 427 | |
| 428 | fn next(&mut self) -> Option<Self::Item> { |
| 429 | loop { |
| 430 | if self.current_index >= self.data.len() { |
| 431 | return None; |
| 432 | } |
| 433 | |
| 434 | let index: usize = self.current_index; |
| 435 | |
| 436 | self.current_index += 1; |
| 437 | |
| 438 | let entry_metadata: u8 = *metadata_at(self.metadata, index); |
| 439 | if !is_empty_or_deleted(control_byte:entry_metadata) { |
| 440 | return Some((entry_metadata, entry_at(self.data, index))); |
| 441 | } |
| 442 | } |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | /// A trait that lets us abstract over different lengths of fixed size byte |
| 447 | /// arrays. Don't implement it for anything other than fixed size byte arrays! |
| 448 | pub trait ByteArray: Sized + Copy + Eq + Clone + PartialEq + fmt::Debug + 'static { |
| 449 | fn zeroed() -> Self; |
| 450 | fn as_slice(&self) -> &[u8]; |
| 451 | fn equals(&self, other: &Self) -> bool; |
| 452 | fn is_all_zeros(&self) -> bool { |
| 453 | self.as_slice().iter().all(|b: &u8| *b == 0) |
| 454 | } |
| 455 | } |
| 456 | |
| 457 | impl<const LEN: usize> ByteArray for [u8; LEN] { |
| 458 | #[inline(always)] |
| 459 | fn zeroed() -> Self { |
| 460 | [0u8; LEN] |
| 461 | } |
| 462 | |
| 463 | #[inline(always)] |
| 464 | fn as_slice(&self) -> &[u8] { |
| 465 | &self[..] |
| 466 | } |
| 467 | |
| 468 | // This custom implementation of comparing the fixed size arrays |
| 469 | // seems make a big difference for performance (at least for |
| 470 | // 16+ byte keys) |
| 471 | #[inline] |
| 472 | fn equals(&self, other: &Self) -> bool { |
| 473 | // Most branches here are optimized away at compile time |
| 474 | // because they depend on values known at compile time. |
| 475 | |
| 476 | const USIZE: usize = size_of::<usize>(); |
| 477 | |
| 478 | // Special case a few cases we care about |
| 479 | if size_of::<Self>() == USIZE { |
| 480 | return read_usize(&self[..], 0) == read_usize(&other[..], 0); |
| 481 | } |
| 482 | |
| 483 | if size_of::<Self>() == USIZE * 2 { |
| 484 | return read_usize(&self[..], 0) == read_usize(&other[..], 0) |
| 485 | && read_usize(&self[..], 1) == read_usize(&other[..], 1); |
| 486 | } |
| 487 | |
| 488 | if size_of::<Self>() == USIZE * 3 { |
| 489 | return read_usize(&self[..], 0) == read_usize(&other[..], 0) |
| 490 | && read_usize(&self[..], 1) == read_usize(&other[..], 1) |
| 491 | && read_usize(&self[..], 2) == read_usize(&other[..], 2); |
| 492 | } |
| 493 | |
| 494 | if size_of::<Self>() == USIZE * 4 { |
| 495 | return read_usize(&self[..], 0) == read_usize(&other[..], 0) |
| 496 | && read_usize(&self[..], 1) == read_usize(&other[..], 1) |
| 497 | && read_usize(&self[..], 2) == read_usize(&other[..], 2) |
| 498 | && read_usize(&self[..], 3) == read_usize(&other[..], 3); |
| 499 | } |
| 500 | |
| 501 | // fallback |
| 502 | return &self[..] == &other[..]; |
| 503 | |
| 504 | #[inline(always)] |
| 505 | fn read_usize(bytes: &[u8], index: usize) -> usize { |
| 506 | const STRIDE: usize = size_of::<usize>(); |
| 507 | usize::from_le_bytes( |
| 508 | bytes[STRIDE * index..STRIDE * (index + 1)] |
| 509 | .try_into() |
| 510 | .unwrap(), |
| 511 | ) |
| 512 | } |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | #[cfg(test)] |
| 517 | #[rustfmt::skip] |
| 518 | mod tests { |
| 519 | use super::*; |
| 520 | use crate::FxHashFn; |
| 521 | |
| 522 | fn make_table< |
| 523 | I: Iterator<Item = (K, V)> + ExactSizeIterator, |
| 524 | K: ByteArray, |
| 525 | V: ByteArray, |
| 526 | H: HashFn, |
| 527 | >( |
| 528 | xs: I, |
| 529 | ) -> (Vec<EntryMetadata>, Vec<Entry<K, V>>) { |
| 530 | let size = xs.size_hint().0.next_power_of_two(); |
| 531 | let mut data = vec![Entry::default(); size]; |
| 532 | let mut metadata = vec![255; size + REFERENCE_GROUP_SIZE]; |
| 533 | |
| 534 | assert!(metadata.iter().all(|b| is_empty_or_deleted(*b))); |
| 535 | |
| 536 | { |
| 537 | let mut table: RawTableMut<K, V, H> = RawTableMut { |
| 538 | metadata: &mut metadata[..], |
| 539 | data: &mut data[..], |
| 540 | _hash_fn: Default::default(), |
| 541 | }; |
| 542 | |
| 543 | for (k, v) in xs { |
| 544 | table.insert(k, v); |
| 545 | } |
| 546 | } |
| 547 | |
| 548 | (metadata, data) |
| 549 | } |
| 550 | |
| 551 | #[test] |
| 552 | fn stress() { |
| 553 | let xs: Vec<[u8; 2]> = (0 ..= u16::MAX).map(|x| x.to_le_bytes()).collect(); |
| 554 | |
| 555 | let (mut metadata, mut data) = make_table::<_, _, _, FxHashFn>(xs.iter().map(|x| (*x, *x))); |
| 556 | |
| 557 | { |
| 558 | let table: RawTable<_, _, FxHashFn> = RawTable { |
| 559 | metadata: &metadata[..], |
| 560 | data: &data[..], |
| 561 | _hash_fn: PhantomData::default(), |
| 562 | }; |
| 563 | |
| 564 | for x in xs.iter() { |
| 565 | assert_eq!(table.find(x), Some(x)); |
| 566 | } |
| 567 | } |
| 568 | |
| 569 | // overwrite all values |
| 570 | { |
| 571 | let mut table: RawTableMut<_, _, FxHashFn> = RawTableMut { |
| 572 | metadata: &mut metadata[..], |
| 573 | data: &mut data[..], |
| 574 | _hash_fn: PhantomData::default(), |
| 575 | }; |
| 576 | |
| 577 | for (i, x) in xs.iter().enumerate() { |
| 578 | assert_eq!(table.insert(*x, [i as u8, (i + 1) as u8]), Some(*x)); |
| 579 | } |
| 580 | } |
| 581 | |
| 582 | // Check if we find the new expected values |
| 583 | { |
| 584 | let table: RawTable<_, _, FxHashFn> = RawTable { |
| 585 | metadata: &metadata[..], |
| 586 | data: &data[..], |
| 587 | _hash_fn: PhantomData::default(), |
| 588 | }; |
| 589 | |
| 590 | for (i, x) in xs.iter().enumerate() { |
| 591 | assert_eq!(table.find(x), Some(&[i as u8, (i + 1) as u8])); |
| 592 | } |
| 593 | } |
| 594 | } |
| 595 | |
| 596 | // This test makes sure that ProbeSeq will always visit the same slots |
| 597 | // in the same order, regardless of the actual GROUP_SIZE. |
| 598 | #[test] |
| 599 | fn probe_seq() { |
| 600 | struct ReferenceProbeSeq { |
| 601 | index: usize, |
| 602 | stride: usize, |
| 603 | } |
| 604 | |
| 605 | impl ReferenceProbeSeq { |
| 606 | fn new(h1: usize, mask: usize) -> ReferenceProbeSeq { |
| 607 | ReferenceProbeSeq { |
| 608 | index: h1 & mask, |
| 609 | stride: 0, |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | fn advance(&mut self, mask: usize) { |
| 614 | self.stride += REFERENCE_GROUP_SIZE; |
| 615 | self.index += self.stride; |
| 616 | self.index &= mask; |
| 617 | } |
| 618 | } |
| 619 | |
| 620 | fn test_with_group_size<const GROUP_SIZE: usize>() { |
| 621 | assert!(REFERENCE_GROUP_SIZE % GROUP_SIZE == 0); |
| 622 | assert!(REFERENCE_GROUP_SIZE >= GROUP_SIZE); |
| 623 | |
| 624 | for i in 4 .. 17 { |
| 625 | let item_count = 1 << i; |
| 626 | assert!(item_count % REFERENCE_GROUP_SIZE == 0); |
| 627 | assert!(item_count % GROUP_SIZE == 0); |
| 628 | |
| 629 | let mask = item_count - 1; |
| 630 | |
| 631 | let mut expected = Vec::with_capacity(item_count); |
| 632 | |
| 633 | let mut refseq = ReferenceProbeSeq::new(0, mask); |
| 634 | for _ in 0 .. item_count / REFERENCE_GROUP_SIZE { |
| 635 | for index in refseq.index .. refseq.index + REFERENCE_GROUP_SIZE { |
| 636 | expected.push(index & mask); |
| 637 | } |
| 638 | refseq.advance(mask); |
| 639 | } |
| 640 | |
| 641 | let mut actual = Vec::with_capacity(item_count); |
| 642 | |
| 643 | let mut seq = ProbeSeq::<GROUP_SIZE>::new(0, mask); |
| 644 | for _ in 0 .. item_count / GROUP_SIZE { |
| 645 | for index in seq.index .. seq.index + GROUP_SIZE { |
| 646 | actual.push(index & mask); |
| 647 | } |
| 648 | seq.advance(mask); |
| 649 | } |
| 650 | |
| 651 | assert_eq!(expected, actual); |
| 652 | } |
| 653 | } |
| 654 | |
| 655 | test_with_group_size::<4>(); |
| 656 | test_with_group_size::<8>(); |
| 657 | test_with_group_size::<16>(); |
| 658 | } |
| 659 | } |
| 660 |
Definitions
- Entry
- key
- value
- new
- default
- fmt
- EntryMetadata
- HashValue
- h1
- h2
- ProbeSeq
- index
- base
- chunk_index
- stride
- new
- advance
- group_starting_at
- is_empty_or_deleted
- RawTable
- metadata
- data
- _hash_fn
- new
- find
- iter
- sanity_check_hashes
- entry_at
- metadata_at
- RawTableMut
- metadata
- data
- _hash_fn
- new
- insert
- entry_at_mut
- metadata_at_mut
- fmt
- RawIter
- metadata
- data
- current_index
- new
- Item
- next
- ByteArray
- zeroed
- as_slice
- equals
- is_all_zeros
- zeroed
- as_slice
- equals
Learn Rust with the experts
Find out more
