| 1 | use std::{ |
|---|---|
| 2 | borrow::{Borrow, BorrowMut}, |
| 3 | convert::TryInto, |
| 4 | marker::PhantomData, |
| 5 | mem::{align_of, size_of}, |
| 6 | }; |
| 7 | |
| 8 | use crate::{ |
| 9 | error::Error, |
| 10 | raw_table::{Entry, EntryMetadata, RawTable}, |
| 11 | Factor, |
| 12 | }; |
| 13 | use crate::{swisstable_group_query::REFERENCE_GROUP_SIZE, Config}; |
| 14 | |
| 15 | const CURRENT_FILE_FORMAT_VERSION: [u8; 4] = [0, 0, 0, 2]; |
| 16 | |
| 17 | #[repr(C)] |
| 18 | #[derive(Clone)] |
| 19 | pub(crate) struct Header { |
| 20 | tag: [u8; 4], |
| 21 | size_of_metadata: u8, |
| 22 | size_of_key: u8, |
| 23 | size_of_value: u8, |
| 24 | size_of_header: u8, |
| 25 | |
| 26 | item_count: [u8; 8], |
| 27 | slot_count: [u8; 8], |
| 28 | |
| 29 | file_format_version: [u8; 4], |
| 30 | max_load_factor: [u8; 2], |
| 31 | |
| 32 | // Let's keep things at least 8 byte aligned |
| 33 | padding: [u8; 2], |
| 34 | } |
| 35 | |
| 36 | const HEADER_TAG: [u8; 4] = *b"ODHT"; |
| 37 | const HEADER_SIZE: usize = size_of::<Header>(); |
| 38 | |
| 39 | impl Header { |
| 40 | pub fn sanity_check<C: Config>(&self, raw_bytes_len: usize) -> Result<(), Error> { |
| 41 | assert!(align_of::<Header>() == 1); |
| 42 | assert!(HEADER_SIZE % 8 == 0); |
| 43 | |
| 44 | if self.tag != HEADER_TAG { |
| 45 | return Err(Error(format!( |
| 46 | "Expected header tag{:?} but found{:?} ", |
| 47 | HEADER_TAG, self.tag |
| 48 | ))); |
| 49 | } |
| 50 | |
| 51 | if self.file_format_version != CURRENT_FILE_FORMAT_VERSION { |
| 52 | return Err(Error(format!( |
| 53 | "Expected file format version{:?} but found{:?} ", |
| 54 | CURRENT_FILE_FORMAT_VERSION, self.file_format_version |
| 55 | ))); |
| 56 | } |
| 57 | |
| 58 | check_expected_size::<EntryMetadata>("EntryMetadata", self.size_of_metadata)?; |
| 59 | check_expected_size::<C::EncodedKey>("Config::EncodedKey", self.size_of_key)?; |
| 60 | check_expected_size::<C::EncodedValue>("Config::EncodedValue", self.size_of_value)?; |
| 61 | check_expected_size::<Header>("Header", self.size_of_header)?; |
| 62 | |
| 63 | if raw_bytes_len != bytes_needed::<C>(self.slot_count()) { |
| 64 | return Err(Error(format!( |
| 65 | "Provided allocation has wrong size for slot count{} . \ |
| 66 | The allocation's size is{} but the expected size is{} .", |
| 67 | self.slot_count(), |
| 68 | raw_bytes_len, |
| 69 | bytes_needed::<C>(self.slot_count()), |
| 70 | ))); |
| 71 | } |
| 72 | |
| 73 | // This should never actually be a problem because it should be impossible to |
| 74 | // create the underlying memory slice in the first place: |
| 75 | assert!(u64::from_le_bytes(self.slot_count) <= usize::MAX as u64); |
| 76 | |
| 77 | if !self.slot_count().is_power_of_two() { |
| 78 | return Err(Error(format!( |
| 79 | "Slot count of hashtable should be a power of two but is{} ", |
| 80 | self.slot_count() |
| 81 | ))); |
| 82 | } |
| 83 | |
| 84 | return Ok(()); |
| 85 | |
| 86 | fn check_expected_size<T>(name: &str, expected_size: u8) -> Result<(), Error> { |
| 87 | if expected_size as usize != size_of::<T>() { |
| 88 | Err(Error(format!( |
| 89 | "Expected size of{} to be{} but the encoded \ |
| 90 | table specifies{} . This indicates an encoding mismatch.", |
| 91 | name, |
| 92 | size_of::<T>(), |
| 93 | expected_size |
| 94 | ))) |
| 95 | } else { |
| 96 | Ok(()) |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | #[inline] |
| 102 | pub fn item_count(&self) -> usize { |
| 103 | u64::from_le_bytes(self.item_count) as usize |
| 104 | } |
| 105 | |
| 106 | #[inline] |
| 107 | pub fn set_item_count(&mut self, item_count: usize) { |
| 108 | self.item_count = (item_count as u64).to_le_bytes(); |
| 109 | } |
| 110 | |
| 111 | #[inline] |
| 112 | pub fn slot_count(&self) -> usize { |
| 113 | u64::from_le_bytes(self.slot_count) as usize |
| 114 | } |
| 115 | |
| 116 | #[inline] |
| 117 | pub fn max_load_factor(&self) -> Factor { |
| 118 | Factor(u16::from_le_bytes(self.max_load_factor)) |
| 119 | } |
| 120 | |
| 121 | #[inline] |
| 122 | fn metadata_offset<C: Config>(&self) -> isize { |
| 123 | self.entry_data_offset() + self.entry_data_size_in_bytes::<C>() as isize |
| 124 | } |
| 125 | |
| 126 | #[inline] |
| 127 | fn entry_data_size_in_bytes<C: Config>(&self) -> usize { |
| 128 | let slot_count = self.slot_count(); |
| 129 | let size_of_entry = size_of::<Entry<C::EncodedKey, C::EncodedValue>>(); |
| 130 | slot_count * size_of_entry |
| 131 | } |
| 132 | |
| 133 | #[inline] |
| 134 | fn entry_data_offset(&self) -> isize { |
| 135 | HEADER_SIZE as isize |
| 136 | } |
| 137 | |
| 138 | fn initialize<C: Config>( |
| 139 | raw_bytes: &mut [u8], |
| 140 | slot_count: usize, |
| 141 | item_count: usize, |
| 142 | max_load_factor: Factor, |
| 143 | ) { |
| 144 | assert_eq!(raw_bytes.len(), bytes_needed::<C>(slot_count)); |
| 145 | |
| 146 | let header = Header { |
| 147 | tag: HEADER_TAG, |
| 148 | size_of_metadata: size_of::<EntryMetadata>().try_into().unwrap(), |
| 149 | size_of_key: size_of::<C::EncodedKey>().try_into().unwrap(), |
| 150 | size_of_value: size_of::<C::EncodedValue>().try_into().unwrap(), |
| 151 | size_of_header: size_of::<Header>().try_into().unwrap(), |
| 152 | item_count: (item_count as u64).to_le_bytes(), |
| 153 | slot_count: (slot_count as u64).to_le_bytes(), |
| 154 | file_format_version: CURRENT_FILE_FORMAT_VERSION, |
| 155 | max_load_factor: max_load_factor.0.to_le_bytes(), |
| 156 | padding: [0u8; 2], |
| 157 | }; |
| 158 | |
| 159 | assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(())); |
| 160 | |
| 161 | unsafe { |
| 162 | *(raw_bytes.as_mut_ptr() as *mut Header) = header; |
| 163 | } |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | /// An allocation holds a byte array that is guaranteed to conform to the |
| 168 | /// hash table's binary layout. |
| 169 | #[derive(Clone, Copy)] |
| 170 | pub(crate) struct Allocation<C, M> |
| 171 | where |
| 172 | C: Config, |
| 173 | { |
| 174 | bytes: M, |
| 175 | _config: PhantomData<C>, |
| 176 | } |
| 177 | |
| 178 | impl<C, M> Allocation<C, M> |
| 179 | where |
| 180 | C: Config, |
| 181 | M: Borrow<[u8]>, |
| 182 | { |
| 183 | pub fn from_raw_bytes(raw_bytes: M) -> Result<Allocation<C, M>, Error> { |
| 184 | let allocation = Allocation { |
| 185 | bytes: raw_bytes, |
| 186 | _config: PhantomData::default(), |
| 187 | }; |
| 188 | |
| 189 | allocation |
| 190 | .header() |
| 191 | .sanity_check::<C>(allocation.bytes.borrow().len())?; |
| 192 | |
| 193 | // Check that the hash function provides the expected hash values. |
| 194 | { |
| 195 | let (entry_metadata, entry_data) = allocation.data_slices(); |
| 196 | RawTable::<C::EncodedKey, C::EncodedValue, C::H>::new(entry_metadata, entry_data) |
| 197 | .sanity_check_hashes(10)?; |
| 198 | } |
| 199 | |
| 200 | Ok(allocation) |
| 201 | } |
| 202 | |
| 203 | #[inline] |
| 204 | pub unsafe fn from_raw_bytes_unchecked(raw_bytes: M) -> Allocation<C, M> { |
| 205 | Allocation { |
| 206 | bytes: raw_bytes, |
| 207 | _config: PhantomData::default(), |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | #[inline] |
| 212 | pub fn header(&self) -> &Header { |
| 213 | let raw_bytes = self.bytes.borrow(); |
| 214 | debug_assert!(raw_bytes.len() >= HEADER_SIZE); |
| 215 | |
| 216 | let header: &Header = unsafe { &*(raw_bytes.as_ptr() as *const Header) }; |
| 217 | |
| 218 | debug_assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(())); |
| 219 | |
| 220 | header |
| 221 | } |
| 222 | |
| 223 | #[inline] |
| 224 | pub fn data_slices(&self) -> (&[EntryMetadata], &[Entry<C::EncodedKey, C::EncodedValue>]) { |
| 225 | let raw_bytes = self.bytes.borrow(); |
| 226 | let slot_count = self.header().slot_count(); |
| 227 | let entry_data_offset = self.header().entry_data_offset(); |
| 228 | let metadata_offset = self.header().metadata_offset::<C>(); |
| 229 | |
| 230 | let entry_metadata = unsafe { |
| 231 | std::slice::from_raw_parts( |
| 232 | raw_bytes.as_ptr().offset(metadata_offset) as *const EntryMetadata, |
| 233 | slot_count + REFERENCE_GROUP_SIZE, |
| 234 | ) |
| 235 | }; |
| 236 | |
| 237 | let entry_data = unsafe { |
| 238 | std::slice::from_raw_parts( |
| 239 | raw_bytes.as_ptr().offset(entry_data_offset) |
| 240 | as *const Entry<C::EncodedKey, C::EncodedValue>, |
| 241 | slot_count, |
| 242 | ) |
| 243 | }; |
| 244 | |
| 245 | debug_assert_eq!( |
| 246 | entry_data.as_ptr_range().start as usize, |
| 247 | raw_bytes.as_ptr_range().start as usize + HEADER_SIZE, |
| 248 | ); |
| 249 | |
| 250 | debug_assert_eq!( |
| 251 | entry_data.as_ptr_range().end as usize, |
| 252 | entry_metadata.as_ptr_range().start as usize, |
| 253 | ); |
| 254 | |
| 255 | debug_assert_eq!( |
| 256 | raw_bytes.as_ptr_range().end as usize, |
| 257 | entry_metadata.as_ptr_range().end as usize, |
| 258 | ); |
| 259 | |
| 260 | (entry_metadata, entry_data) |
| 261 | } |
| 262 | |
| 263 | #[inline] |
| 264 | pub fn raw_bytes(&self) -> &[u8] { |
| 265 | self.bytes.borrow() |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | impl<C, M> Allocation<C, M> |
| 270 | where |
| 271 | C: Config, |
| 272 | M: BorrowMut<[u8]>, |
| 273 | { |
| 274 | #[inline] |
| 275 | pub fn with_mut_parts<R>( |
| 276 | &mut self, |
| 277 | f: impl FnOnce( |
| 278 | &mut Header, |
| 279 | &mut [EntryMetadata], |
| 280 | &mut [Entry<C::EncodedKey, C::EncodedValue>], |
| 281 | ) -> R, |
| 282 | ) -> R { |
| 283 | let raw_bytes = self.bytes.borrow_mut(); |
| 284 | |
| 285 | // Copy the address as an integer so we can use it for the debug_assertion |
| 286 | // below without accessing `raw_bytes` again. |
| 287 | let _raw_bytes_end_addr = raw_bytes.as_ptr_range().end as usize; |
| 288 | |
| 289 | let (header, rest) = raw_bytes.split_at_mut(HEADER_SIZE); |
| 290 | let header: &mut Header = unsafe { &mut *(header.as_mut_ptr() as *mut Header) }; |
| 291 | |
| 292 | let slot_count = header.slot_count(); |
| 293 | let entry_data_size_in_bytes = header.entry_data_size_in_bytes::<C>(); |
| 294 | |
| 295 | let (entry_data_bytes, metadata_bytes) = rest.split_at_mut(entry_data_size_in_bytes); |
| 296 | |
| 297 | let entry_metadata = unsafe { |
| 298 | std::slice::from_raw_parts_mut( |
| 299 | metadata_bytes.as_mut_ptr() as *mut EntryMetadata, |
| 300 | slot_count + REFERENCE_GROUP_SIZE, |
| 301 | ) |
| 302 | }; |
| 303 | |
| 304 | let entry_data = unsafe { |
| 305 | std::slice::from_raw_parts_mut( |
| 306 | entry_data_bytes.as_mut_ptr() as *mut Entry<C::EncodedKey, C::EncodedValue>, |
| 307 | slot_count, |
| 308 | ) |
| 309 | }; |
| 310 | |
| 311 | debug_assert_eq!( |
| 312 | entry_data.as_ptr_range().start as usize, |
| 313 | header as *mut Header as usize + HEADER_SIZE, |
| 314 | ); |
| 315 | |
| 316 | debug_assert_eq!( |
| 317 | entry_data.as_ptr_range().end as usize, |
| 318 | entry_metadata.as_ptr_range().start as usize, |
| 319 | ); |
| 320 | |
| 321 | debug_assert_eq!( |
| 322 | _raw_bytes_end_addr, |
| 323 | entry_metadata.as_ptr_range().end as usize, |
| 324 | ); |
| 325 | |
| 326 | f(header, entry_metadata, entry_data) |
| 327 | } |
| 328 | } |
| 329 | |
| 330 | #[inline] |
| 331 | pub(crate) fn bytes_needed<C: Config>(slot_count: usize) -> usize { |
| 332 | assert!(slot_count.is_power_of_two()); |
| 333 | let size_of_entry: usize = size_of::<Entry<C::EncodedKey, C::EncodedValue>>(); |
| 334 | let size_of_metadata: usize = size_of::<EntryMetadata>(); |
| 335 | |
| 336 | HEADER_SIZE |
| 337 | + slot_count * size_of_entry |
| 338 | + (slot_count + REFERENCE_GROUP_SIZE) * size_of_metadata |
| 339 | } |
| 340 | |
| 341 | pub(crate) fn allocate<C: Config>( |
| 342 | slot_count: usize, |
| 343 | item_count: usize, |
| 344 | max_load_factor: Factor, |
| 345 | ) -> Allocation<C, Box<[u8]>> { |
| 346 | let bytes: Box<[u8]> = vec![0u8; bytes_needed::<C>(slot_count)].into_boxed_slice(); |
| 347 | init_in_place::<C, _>(bytes, slot_count, item_count, max_load_factor) |
| 348 | } |
| 349 | |
| 350 | pub(crate) fn init_in_place<C: Config, M: BorrowMut<[u8]>>( |
| 351 | mut bytes: M, |
| 352 | slot_count: usize, |
| 353 | item_count: usize, |
| 354 | max_load_factor: Factor, |
| 355 | ) -> Allocation<C, M> { |
| 356 | Header::initialize::<C>(raw_bytes:bytes.borrow_mut(), slot_count, item_count, max_load_factor); |
| 357 | |
| 358 | let mut allocation: Allocation |
| 359 | bytes, |
| 360 | _config: PhantomData::default(), |
| 361 | }; |
| 362 | |
| 363 | allocation.with_mut_parts(|_, metadata: &mut [u8], data: &mut [Entry< |
| 364 | metadata.fill(0xFF); |
| 365 | data.fill(Default::default()); |
| 366 | }); |
| 367 | |
| 368 | allocation |
| 369 | } |
| 370 |
Definitions
- Header
- tag
- size_of_metadata
- size_of_key
- size_of_value
- size_of_header
- item_count
- slot_count
- file_format_version
- max_load_factor
- padding
- sanity_check
- check_expected_size
- item_count
- set_item_count
- slot_count
- max_load_factor
- metadata_offset
- entry_data_size_in_bytes
- entry_data_offset
- initialize
- Allocation
- bytes
- _config
- from_raw_bytes
- from_raw_bytes_unchecked
- header
- data_slices
- raw_bytes
- with_mut_parts
- bytes_needed
- allocate
Learn Rust with the experts
Find out more
