| 1 | use std::{ | 
| 2 | borrow::{Borrow, BorrowMut}, | 
|---|
| 3 | convert::TryInto, | 
|---|
| 4 | marker::PhantomData, | 
|---|
| 5 | mem::{align_of, size_of}, | 
|---|
| 6 | }; | 
|---|
| 7 |  | 
|---|
| 8 | use crate::{ | 
|---|
| 9 | error::Error, | 
|---|
| 10 | raw_table::{Entry, EntryMetadata, RawTable}, | 
|---|
| 11 | Factor, | 
|---|
| 12 | }; | 
|---|
| 13 | use crate::{swisstable_group_query::REFERENCE_GROUP_SIZE, Config}; | 
|---|
| 14 |  | 
|---|
| 15 | const CURRENT_FILE_FORMAT_VERSION: [u8; 4] = [0, 0, 0, 2]; | 
|---|
| 16 |  | 
|---|
| 17 | #[ repr(C)] | 
|---|
| 18 | #[ derive(Clone)] | 
|---|
| 19 | pub(crate) struct Header { | 
|---|
| 20 | tag: [u8; 4], | 
|---|
| 21 | size_of_metadata: u8, | 
|---|
| 22 | size_of_key: u8, | 
|---|
| 23 | size_of_value: u8, | 
|---|
| 24 | size_of_header: u8, | 
|---|
| 25 |  | 
|---|
| 26 | item_count: [u8; 8], | 
|---|
| 27 | slot_count: [u8; 8], | 
|---|
| 28 |  | 
|---|
| 29 | file_format_version: [u8; 4], | 
|---|
| 30 | max_load_factor: [u8; 2], | 
|---|
| 31 |  | 
|---|
| 32 | // Let's keep things at least 8 byte aligned | 
|---|
| 33 | padding: [u8; 2], | 
|---|
| 34 | } | 
|---|
| 35 |  | 
|---|
| 36 | const HEADER_TAG: [u8; 4] = * b"ODHT"; | 
|---|
| 37 | const HEADER_SIZE: usize = size_of::<Header>(); | 
|---|
| 38 |  | 
|---|
| 39 | impl Header { | 
|---|
| 40 | pub fn sanity_check<C: Config>(&self, raw_bytes_len: usize) -> Result<(), Error> { | 
|---|
| 41 | assert!(align_of::<Header>() == 1); | 
|---|
| 42 | assert!(HEADER_SIZE % 8 == 0); | 
|---|
| 43 |  | 
|---|
| 44 | if self.tag != HEADER_TAG { | 
|---|
| 45 | return Err(Error(format!( | 
|---|
| 46 | "Expected header tag {:?}  but found {:?} ", | 
|---|
| 47 | HEADER_TAG, self.tag | 
|---|
| 48 | ))); | 
|---|
| 49 | } | 
|---|
| 50 |  | 
|---|
| 51 | if self.file_format_version != CURRENT_FILE_FORMAT_VERSION { | 
|---|
| 52 | return Err(Error(format!( | 
|---|
| 53 | "Expected file format version {:?}  but found {:?} ", | 
|---|
| 54 | CURRENT_FILE_FORMAT_VERSION, self.file_format_version | 
|---|
| 55 | ))); | 
|---|
| 56 | } | 
|---|
| 57 |  | 
|---|
| 58 | check_expected_size::<EntryMetadata>( "EntryMetadata", self.size_of_metadata)?; | 
|---|
| 59 | check_expected_size::<C::EncodedKey>( "Config::EncodedKey", self.size_of_key)?; | 
|---|
| 60 | check_expected_size::<C::EncodedValue>( "Config::EncodedValue", self.size_of_value)?; | 
|---|
| 61 | check_expected_size::<Header>( "Header", self.size_of_header)?; | 
|---|
| 62 |  | 
|---|
| 63 | if raw_bytes_len != bytes_needed::<C>(self.slot_count()) { | 
|---|
| 64 | return Err(Error(format!( | 
|---|
| 65 | "Provided allocation has wrong size for slot count {} . \ | 
|---|
| 66 |                  The allocation's size is {}  but the expected size is {} .", | 
|---|
| 67 | self.slot_count(), | 
|---|
| 68 | raw_bytes_len, | 
|---|
| 69 | bytes_needed::<C>(self.slot_count()), | 
|---|
| 70 | ))); | 
|---|
| 71 | } | 
|---|
| 72 |  | 
|---|
| 73 | // This should never actually be a problem because it should be impossible to | 
|---|
| 74 | // create the underlying memory slice in the first place: | 
|---|
| 75 | assert!(u64::from_le_bytes(self.slot_count) <= usize::MAX as u64); | 
|---|
| 76 |  | 
|---|
| 77 | if !self.slot_count().is_power_of_two() { | 
|---|
| 78 | return Err(Error(format!( | 
|---|
| 79 | "Slot count of hashtable should be a power of two but is {} ", | 
|---|
| 80 | self.slot_count() | 
|---|
| 81 | ))); | 
|---|
| 82 | } | 
|---|
| 83 |  | 
|---|
| 84 | return Ok(()); | 
|---|
| 85 |  | 
|---|
| 86 | fn check_expected_size<T>(name: &str, expected_size: u8) -> Result<(), Error> { | 
|---|
| 87 | if expected_size as usize != size_of::<T>() { | 
|---|
| 88 | Err(Error(format!( | 
|---|
| 89 | "Expected size of {}  to be {}  but the encoded \ | 
|---|
| 90 |                      table specifies {} . This indicates an encoding mismatch.", | 
|---|
| 91 | name, | 
|---|
| 92 | size_of::<T>(), | 
|---|
| 93 | expected_size | 
|---|
| 94 | ))) | 
|---|
| 95 | } else { | 
|---|
| 96 | Ok(()) | 
|---|
| 97 | } | 
|---|
| 98 | } | 
|---|
| 99 | } | 
|---|
| 100 |  | 
|---|
| 101 | #[ inline] | 
|---|
| 102 | pub fn item_count(&self) -> usize { | 
|---|
| 103 | u64::from_le_bytes(self.item_count) as usize | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | #[ inline] | 
|---|
| 107 | pub fn set_item_count(&mut self, item_count: usize) { | 
|---|
| 108 | self.item_count = (item_count as u64).to_le_bytes(); | 
|---|
| 109 | } | 
|---|
| 110 |  | 
|---|
| 111 | #[ inline] | 
|---|
| 112 | pub fn slot_count(&self) -> usize { | 
|---|
| 113 | u64::from_le_bytes(self.slot_count) as usize | 
|---|
| 114 | } | 
|---|
| 115 |  | 
|---|
| 116 | #[ inline] | 
|---|
| 117 | pub fn max_load_factor(&self) -> Factor { | 
|---|
| 118 | Factor(u16::from_le_bytes(self.max_load_factor)) | 
|---|
| 119 | } | 
|---|
| 120 |  | 
|---|
| 121 | #[ inline] | 
|---|
| 122 | fn metadata_offset<C: Config>(&self) -> isize { | 
|---|
| 123 | self.entry_data_offset() + self.entry_data_size_in_bytes::<C>() as isize | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | #[ inline] | 
|---|
| 127 | fn entry_data_size_in_bytes<C: Config>(&self) -> usize { | 
|---|
| 128 | let slot_count = self.slot_count(); | 
|---|
| 129 | let size_of_entry = size_of::<Entry<C::EncodedKey, C::EncodedValue>>(); | 
|---|
| 130 | slot_count * size_of_entry | 
|---|
| 131 | } | 
|---|
| 132 |  | 
|---|
| 133 | #[ inline] | 
|---|
| 134 | fn entry_data_offset(&self) -> isize { | 
|---|
| 135 | HEADER_SIZE as isize | 
|---|
| 136 | } | 
|---|
| 137 |  | 
|---|
| 138 | fn initialize<C: Config>( | 
|---|
| 139 | raw_bytes: &mut [u8], | 
|---|
| 140 | slot_count: usize, | 
|---|
| 141 | item_count: usize, | 
|---|
| 142 | max_load_factor: Factor, | 
|---|
| 143 | ) { | 
|---|
| 144 | assert_eq!(raw_bytes.len(), bytes_needed::<C>(slot_count)); | 
|---|
| 145 |  | 
|---|
| 146 | let header = Header { | 
|---|
| 147 | tag: HEADER_TAG, | 
|---|
| 148 | size_of_metadata: size_of::<EntryMetadata>().try_into().unwrap(), | 
|---|
| 149 | size_of_key: size_of::<C::EncodedKey>().try_into().unwrap(), | 
|---|
| 150 | size_of_value: size_of::<C::EncodedValue>().try_into().unwrap(), | 
|---|
| 151 | size_of_header: size_of::<Header>().try_into().unwrap(), | 
|---|
| 152 | item_count: (item_count as u64).to_le_bytes(), | 
|---|
| 153 | slot_count: (slot_count as u64).to_le_bytes(), | 
|---|
| 154 | file_format_version: CURRENT_FILE_FORMAT_VERSION, | 
|---|
| 155 | max_load_factor: max_load_factor.0.to_le_bytes(), | 
|---|
| 156 | padding: [0u8; 2], | 
|---|
| 157 | }; | 
|---|
| 158 |  | 
|---|
| 159 | assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(())); | 
|---|
| 160 |  | 
|---|
| 161 | unsafe { | 
|---|
| 162 | *(raw_bytes.as_mut_ptr() as *mut Header) = header; | 
|---|
| 163 | } | 
|---|
| 164 | } | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | /// An allocation holds a byte array that is guaranteed to conform to the | 
|---|
| 168 | /// hash table's binary layout. | 
|---|
| 169 | #[ derive(Clone, Copy)] | 
|---|
| 170 | pub(crate) struct Allocation<C, M> | 
|---|
| 171 | where | 
|---|
| 172 | C: Config, | 
|---|
| 173 | { | 
|---|
| 174 | bytes: M, | 
|---|
| 175 | _config: PhantomData<C>, | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | impl<C, M> Allocation<C, M> | 
|---|
| 179 | where | 
|---|
| 180 | C: Config, | 
|---|
| 181 | M: Borrow<[u8]>, | 
|---|
| 182 | { | 
|---|
| 183 | pub fn from_raw_bytes(raw_bytes: M) -> Result<Allocation<C, M>, Error> { | 
|---|
| 184 | let allocation = Allocation { | 
|---|
| 185 | bytes: raw_bytes, | 
|---|
| 186 | _config: PhantomData::default(), | 
|---|
| 187 | }; | 
|---|
| 188 |  | 
|---|
| 189 | allocation | 
|---|
| 190 | .header() | 
|---|
| 191 | .sanity_check::<C>(allocation.bytes.borrow().len())?; | 
|---|
| 192 |  | 
|---|
| 193 | // Check that the hash function provides the expected hash values. | 
|---|
| 194 | { | 
|---|
| 195 | let (entry_metadata, entry_data) = allocation.data_slices(); | 
|---|
| 196 | RawTable::<C::EncodedKey, C::EncodedValue, C::H>::new(entry_metadata, entry_data) | 
|---|
| 197 | .sanity_check_hashes(10)?; | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | Ok(allocation) | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | #[ inline] | 
|---|
| 204 | pub unsafe fn from_raw_bytes_unchecked(raw_bytes: M) -> Allocation<C, M> { | 
|---|
| 205 | Allocation { | 
|---|
| 206 | bytes: raw_bytes, | 
|---|
| 207 | _config: PhantomData::default(), | 
|---|
| 208 | } | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | #[ inline] | 
|---|
| 212 | pub fn header(&self) -> &Header { | 
|---|
| 213 | let raw_bytes = self.bytes.borrow(); | 
|---|
| 214 | debug_assert!(raw_bytes.len() >= HEADER_SIZE); | 
|---|
| 215 |  | 
|---|
| 216 | let header: &Header = unsafe { &*(raw_bytes.as_ptr() as *const Header) }; | 
|---|
| 217 |  | 
|---|
| 218 | debug_assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(())); | 
|---|
| 219 |  | 
|---|
| 220 | header | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | #[ inline] | 
|---|
| 224 | pub fn data_slices(&self) -> (&[EntryMetadata], &[Entry<C::EncodedKey, C::EncodedValue>]) { | 
|---|
| 225 | let raw_bytes = self.bytes.borrow(); | 
|---|
| 226 | let slot_count = self.header().slot_count(); | 
|---|
| 227 | let entry_data_offset = self.header().entry_data_offset(); | 
|---|
| 228 | let metadata_offset = self.header().metadata_offset::<C>(); | 
|---|
| 229 |  | 
|---|
| 230 | let entry_metadata = unsafe { | 
|---|
| 231 | std::slice::from_raw_parts( | 
|---|
| 232 | raw_bytes.as_ptr().offset(metadata_offset) as *const EntryMetadata, | 
|---|
| 233 | slot_count + REFERENCE_GROUP_SIZE, | 
|---|
| 234 | ) | 
|---|
| 235 | }; | 
|---|
| 236 |  | 
|---|
| 237 | let entry_data = unsafe { | 
|---|
| 238 | std::slice::from_raw_parts( | 
|---|
| 239 | raw_bytes.as_ptr().offset(entry_data_offset) | 
|---|
| 240 | as *const Entry<C::EncodedKey, C::EncodedValue>, | 
|---|
| 241 | slot_count, | 
|---|
| 242 | ) | 
|---|
| 243 | }; | 
|---|
| 244 |  | 
|---|
| 245 | debug_assert_eq!( | 
|---|
| 246 | entry_data.as_ptr_range().start as usize, | 
|---|
| 247 | raw_bytes.as_ptr_range().start as usize + HEADER_SIZE, | 
|---|
| 248 | ); | 
|---|
| 249 |  | 
|---|
| 250 | debug_assert_eq!( | 
|---|
| 251 | entry_data.as_ptr_range().end as usize, | 
|---|
| 252 | entry_metadata.as_ptr_range().start as usize, | 
|---|
| 253 | ); | 
|---|
| 254 |  | 
|---|
| 255 | debug_assert_eq!( | 
|---|
| 256 | raw_bytes.as_ptr_range().end as usize, | 
|---|
| 257 | entry_metadata.as_ptr_range().end as usize, | 
|---|
| 258 | ); | 
|---|
| 259 |  | 
|---|
| 260 | (entry_metadata, entry_data) | 
|---|
| 261 | } | 
|---|
| 262 |  | 
|---|
| 263 | #[ inline] | 
|---|
| 264 | pub fn raw_bytes(&self) -> &[u8] { | 
|---|
| 265 | self.bytes.borrow() | 
|---|
| 266 | } | 
|---|
| 267 | } | 
|---|
| 268 |  | 
|---|
| 269 | impl<C, M> Allocation<C, M> | 
|---|
| 270 | where | 
|---|
| 271 | C: Config, | 
|---|
| 272 | M: BorrowMut<[u8]>, | 
|---|
| 273 | { | 
|---|
| 274 | #[ inline] | 
|---|
| 275 | pub fn with_mut_parts<R>( | 
|---|
| 276 | &mut self, | 
|---|
| 277 | f: impl FnOnce( | 
|---|
| 278 | &mut Header, | 
|---|
| 279 | &mut [EntryMetadata], | 
|---|
| 280 | &mut [Entry<C::EncodedKey, C::EncodedValue>], | 
|---|
| 281 | ) -> R, | 
|---|
| 282 | ) -> R { | 
|---|
| 283 | let raw_bytes = self.bytes.borrow_mut(); | 
|---|
| 284 |  | 
|---|
| 285 | // Copy the address as an integer so we can use it for the debug_assertion | 
|---|
| 286 | // below without accessing `raw_bytes` again. | 
|---|
| 287 | let _raw_bytes_end_addr = raw_bytes.as_ptr_range().end as usize; | 
|---|
| 288 |  | 
|---|
| 289 | let (header, rest) = raw_bytes.split_at_mut(HEADER_SIZE); | 
|---|
| 290 | let header: &mut Header = unsafe { &mut *(header.as_mut_ptr() as *mut Header) }; | 
|---|
| 291 |  | 
|---|
| 292 | let slot_count = header.slot_count(); | 
|---|
| 293 | let entry_data_size_in_bytes = header.entry_data_size_in_bytes::<C>(); | 
|---|
| 294 |  | 
|---|
| 295 | let (entry_data_bytes, metadata_bytes) = rest.split_at_mut(entry_data_size_in_bytes); | 
|---|
| 296 |  | 
|---|
| 297 | let entry_metadata = unsafe { | 
|---|
| 298 | std::slice::from_raw_parts_mut( | 
|---|
| 299 | metadata_bytes.as_mut_ptr() as *mut EntryMetadata, | 
|---|
| 300 | slot_count + REFERENCE_GROUP_SIZE, | 
|---|
| 301 | ) | 
|---|
| 302 | }; | 
|---|
| 303 |  | 
|---|
| 304 | let entry_data = unsafe { | 
|---|
| 305 | std::slice::from_raw_parts_mut( | 
|---|
| 306 | entry_data_bytes.as_mut_ptr() as *mut Entry<C::EncodedKey, C::EncodedValue>, | 
|---|
| 307 | slot_count, | 
|---|
| 308 | ) | 
|---|
| 309 | }; | 
|---|
| 310 |  | 
|---|
| 311 | debug_assert_eq!( | 
|---|
| 312 | entry_data.as_ptr_range().start as usize, | 
|---|
| 313 | header as *mut Header as usize + HEADER_SIZE, | 
|---|
| 314 | ); | 
|---|
| 315 |  | 
|---|
| 316 | debug_assert_eq!( | 
|---|
| 317 | entry_data.as_ptr_range().end as usize, | 
|---|
| 318 | entry_metadata.as_ptr_range().start as usize, | 
|---|
| 319 | ); | 
|---|
| 320 |  | 
|---|
| 321 | debug_assert_eq!( | 
|---|
| 322 | _raw_bytes_end_addr, | 
|---|
| 323 | entry_metadata.as_ptr_range().end as usize, | 
|---|
| 324 | ); | 
|---|
| 325 |  | 
|---|
| 326 | f(header, entry_metadata, entry_data) | 
|---|
| 327 | } | 
|---|
| 328 | } | 
|---|
| 329 |  | 
|---|
| 330 | #[ inline] | 
|---|
| 331 | pub(crate) fn bytes_needed<C: Config>(slot_count: usize) -> usize { | 
|---|
| 332 | assert!(slot_count.is_power_of_two()); | 
|---|
| 333 | let size_of_entry: usize = size_of::<Entry<C::EncodedKey, C::EncodedValue>>(); | 
|---|
| 334 | let size_of_metadata: usize = size_of::<EntryMetadata>(); | 
|---|
| 335 |  | 
|---|
| 336 | HEADER_SIZE | 
|---|
| 337 | + slot_count * size_of_entry | 
|---|
| 338 | + (slot_count + REFERENCE_GROUP_SIZE) * size_of_metadata | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | pub(crate) fn allocate<C: Config>( | 
|---|
| 342 | slot_count: usize, | 
|---|
| 343 | item_count: usize, | 
|---|
| 344 | max_load_factor: Factor, | 
|---|
| 345 | ) -> Allocation<C, Box<[u8]>> { | 
|---|
| 346 | let bytes: Box<[u8]> = vec![0u8; bytes_needed::<C>(slot_count)].into_boxed_slice(); | 
|---|
| 347 | init_in_place::<C, _>(bytes, slot_count, item_count, max_load_factor) | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | pub(crate) fn init_in_place<C: Config, M: BorrowMut<[u8]>>( | 
|---|
| 351 | mut bytes: M, | 
|---|
| 352 | slot_count: usize, | 
|---|
| 353 | item_count: usize, | 
|---|
| 354 | max_load_factor: Factor, | 
|---|
| 355 | ) -> Allocation<C, M> { | 
|---|
| 356 | Header::initialize::<C>(raw_bytes:bytes.borrow_mut(), slot_count, item_count, max_load_factor); | 
|---|
| 357 |  | 
|---|
| 358 | let mut allocation: Allocation = Allocation { | 
|---|
| 359 | bytes, | 
|---|
| 360 | _config: PhantomData::default(), | 
|---|
| 361 | }; | 
|---|
| 362 |  | 
|---|
| 363 | allocation.with_mut_parts(|_, metadata: &mut [u8], data: &mut [Entry<::EncodedKey, …>]| { | 
|---|
| 364 | metadata.fill(0xFF); | 
|---|
| 365 | data.fill(Default::default()); | 
|---|
| 366 | }); | 
|---|
| 367 |  | 
|---|
| 368 | allocation | 
|---|
| 369 | } | 
|---|
| 370 |  | 
|---|