1 | use std::{ |
2 | borrow::{Borrow, BorrowMut}, |
3 | convert::TryInto, |
4 | marker::PhantomData, |
5 | mem::{align_of, size_of}, |
6 | }; |
7 | |
8 | use crate::{ |
9 | error::Error, |
10 | raw_table::{Entry, EntryMetadata, RawTable}, |
11 | Factor, |
12 | }; |
13 | use crate::{swisstable_group_query::REFERENCE_GROUP_SIZE, Config}; |
14 | |
15 | const CURRENT_FILE_FORMAT_VERSION: [u8; 4] = [0, 0, 0, 2]; |
16 | |
17 | #[repr (C)] |
18 | #[derive (Clone)] |
19 | pub(crate) struct Header { |
20 | tag: [u8; 4], |
21 | size_of_metadata: u8, |
22 | size_of_key: u8, |
23 | size_of_value: u8, |
24 | size_of_header: u8, |
25 | |
26 | item_count: [u8; 8], |
27 | slot_count: [u8; 8], |
28 | |
29 | file_format_version: [u8; 4], |
30 | max_load_factor: [u8; 2], |
31 | |
32 | // Let's keep things at least 8 byte aligned |
33 | padding: [u8; 2], |
34 | } |
35 | |
36 | const HEADER_TAG: [u8; 4] = *b"ODHT" ; |
37 | const HEADER_SIZE: usize = size_of::<Header>(); |
38 | |
39 | impl Header { |
40 | pub fn sanity_check<C: Config>(&self, raw_bytes_len: usize) -> Result<(), Error> { |
41 | assert!(align_of::<Header>() == 1); |
42 | assert!(HEADER_SIZE % 8 == 0); |
43 | |
44 | if self.tag != HEADER_TAG { |
45 | return Err(Error(format!( |
46 | "Expected header tag {:?} but found {:?}" , |
47 | HEADER_TAG, self.tag |
48 | ))); |
49 | } |
50 | |
51 | if self.file_format_version != CURRENT_FILE_FORMAT_VERSION { |
52 | return Err(Error(format!( |
53 | "Expected file format version {:?} but found {:?}" , |
54 | CURRENT_FILE_FORMAT_VERSION, self.file_format_version |
55 | ))); |
56 | } |
57 | |
58 | check_expected_size::<EntryMetadata>("EntryMetadata" , self.size_of_metadata)?; |
59 | check_expected_size::<C::EncodedKey>("Config::EncodedKey" , self.size_of_key)?; |
60 | check_expected_size::<C::EncodedValue>("Config::EncodedValue" , self.size_of_value)?; |
61 | check_expected_size::<Header>("Header" , self.size_of_header)?; |
62 | |
63 | if raw_bytes_len != bytes_needed::<C>(self.slot_count()) { |
64 | return Err(Error(format!( |
65 | "Provided allocation has wrong size for slot count {}. \ |
66 | The allocation's size is {} but the expected size is {}." , |
67 | self.slot_count(), |
68 | raw_bytes_len, |
69 | bytes_needed::<C>(self.slot_count()), |
70 | ))); |
71 | } |
72 | |
73 | // This should never actually be a problem because it should be impossible to |
74 | // create the underlying memory slice in the first place: |
75 | assert!(u64::from_le_bytes(self.slot_count) <= usize::MAX as u64); |
76 | |
77 | if !self.slot_count().is_power_of_two() { |
78 | return Err(Error(format!( |
79 | "Slot count of hashtable should be a power of two but is {}" , |
80 | self.slot_count() |
81 | ))); |
82 | } |
83 | |
84 | return Ok(()); |
85 | |
86 | fn check_expected_size<T>(name: &str, expected_size: u8) -> Result<(), Error> { |
87 | if expected_size as usize != size_of::<T>() { |
88 | Err(Error(format!( |
89 | "Expected size of {} to be {} but the encoded \ |
90 | table specifies {}. This indicates an encoding mismatch." , |
91 | name, |
92 | size_of::<T>(), |
93 | expected_size |
94 | ))) |
95 | } else { |
96 | Ok(()) |
97 | } |
98 | } |
99 | } |
100 | |
101 | #[inline ] |
102 | pub fn item_count(&self) -> usize { |
103 | u64::from_le_bytes(self.item_count) as usize |
104 | } |
105 | |
106 | #[inline ] |
107 | pub fn set_item_count(&mut self, item_count: usize) { |
108 | self.item_count = (item_count as u64).to_le_bytes(); |
109 | } |
110 | |
111 | #[inline ] |
112 | pub fn slot_count(&self) -> usize { |
113 | u64::from_le_bytes(self.slot_count) as usize |
114 | } |
115 | |
116 | #[inline ] |
117 | pub fn max_load_factor(&self) -> Factor { |
118 | Factor(u16::from_le_bytes(self.max_load_factor)) |
119 | } |
120 | |
121 | #[inline ] |
122 | fn metadata_offset<C: Config>(&self) -> isize { |
123 | self.entry_data_offset() + self.entry_data_size_in_bytes::<C>() as isize |
124 | } |
125 | |
126 | #[inline ] |
127 | fn entry_data_size_in_bytes<C: Config>(&self) -> usize { |
128 | let slot_count = self.slot_count(); |
129 | let size_of_entry = size_of::<Entry<C::EncodedKey, C::EncodedValue>>(); |
130 | slot_count * size_of_entry |
131 | } |
132 | |
133 | #[inline ] |
134 | fn entry_data_offset(&self) -> isize { |
135 | HEADER_SIZE as isize |
136 | } |
137 | |
138 | fn initialize<C: Config>( |
139 | raw_bytes: &mut [u8], |
140 | slot_count: usize, |
141 | item_count: usize, |
142 | max_load_factor: Factor, |
143 | ) { |
144 | assert_eq!(raw_bytes.len(), bytes_needed::<C>(slot_count)); |
145 | |
146 | let header = Header { |
147 | tag: HEADER_TAG, |
148 | size_of_metadata: size_of::<EntryMetadata>().try_into().unwrap(), |
149 | size_of_key: size_of::<C::EncodedKey>().try_into().unwrap(), |
150 | size_of_value: size_of::<C::EncodedValue>().try_into().unwrap(), |
151 | size_of_header: size_of::<Header>().try_into().unwrap(), |
152 | item_count: (item_count as u64).to_le_bytes(), |
153 | slot_count: (slot_count as u64).to_le_bytes(), |
154 | file_format_version: CURRENT_FILE_FORMAT_VERSION, |
155 | max_load_factor: max_load_factor.0.to_le_bytes(), |
156 | padding: [0u8; 2], |
157 | }; |
158 | |
159 | assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(())); |
160 | |
161 | unsafe { |
162 | *(raw_bytes.as_mut_ptr() as *mut Header) = header; |
163 | } |
164 | } |
165 | } |
166 | |
167 | /// An allocation holds a byte array that is guaranteed to conform to the |
168 | /// hash table's binary layout. |
169 | #[derive (Clone, Copy)] |
170 | pub(crate) struct Allocation<C, M> |
171 | where |
172 | C: Config, |
173 | { |
174 | bytes: M, |
175 | _config: PhantomData<C>, |
176 | } |
177 | |
178 | impl<C, M> Allocation<C, M> |
179 | where |
180 | C: Config, |
181 | M: Borrow<[u8]>, |
182 | { |
183 | pub fn from_raw_bytes(raw_bytes: M) -> Result<Allocation<C, M>, Error> { |
184 | let allocation = Allocation { |
185 | bytes: raw_bytes, |
186 | _config: PhantomData::default(), |
187 | }; |
188 | |
189 | allocation |
190 | .header() |
191 | .sanity_check::<C>(allocation.bytes.borrow().len())?; |
192 | |
193 | // Check that the hash function provides the expected hash values. |
194 | { |
195 | let (entry_metadata, entry_data) = allocation.data_slices(); |
196 | RawTable::<C::EncodedKey, C::EncodedValue, C::H>::new(entry_metadata, entry_data) |
197 | .sanity_check_hashes(10)?; |
198 | } |
199 | |
200 | Ok(allocation) |
201 | } |
202 | |
203 | #[inline ] |
204 | pub unsafe fn from_raw_bytes_unchecked(raw_bytes: M) -> Allocation<C, M> { |
205 | Allocation { |
206 | bytes: raw_bytes, |
207 | _config: PhantomData::default(), |
208 | } |
209 | } |
210 | |
211 | #[inline ] |
212 | pub fn header(&self) -> &Header { |
213 | let raw_bytes = self.bytes.borrow(); |
214 | debug_assert!(raw_bytes.len() >= HEADER_SIZE); |
215 | |
216 | let header: &Header = unsafe { &*(raw_bytes.as_ptr() as *const Header) }; |
217 | |
218 | debug_assert_eq!(header.sanity_check::<C>(raw_bytes.len()), Ok(())); |
219 | |
220 | header |
221 | } |
222 | |
223 | #[inline ] |
224 | pub fn data_slices(&self) -> (&[EntryMetadata], &[Entry<C::EncodedKey, C::EncodedValue>]) { |
225 | let raw_bytes = self.bytes.borrow(); |
226 | let slot_count = self.header().slot_count(); |
227 | let entry_data_offset = self.header().entry_data_offset(); |
228 | let metadata_offset = self.header().metadata_offset::<C>(); |
229 | |
230 | let entry_metadata = unsafe { |
231 | std::slice::from_raw_parts( |
232 | raw_bytes.as_ptr().offset(metadata_offset) as *const EntryMetadata, |
233 | slot_count + REFERENCE_GROUP_SIZE, |
234 | ) |
235 | }; |
236 | |
237 | let entry_data = unsafe { |
238 | std::slice::from_raw_parts( |
239 | raw_bytes.as_ptr().offset(entry_data_offset) |
240 | as *const Entry<C::EncodedKey, C::EncodedValue>, |
241 | slot_count, |
242 | ) |
243 | }; |
244 | |
245 | debug_assert_eq!( |
246 | entry_data.as_ptr_range().start as usize, |
247 | raw_bytes.as_ptr_range().start as usize + HEADER_SIZE, |
248 | ); |
249 | |
250 | debug_assert_eq!( |
251 | entry_data.as_ptr_range().end as usize, |
252 | entry_metadata.as_ptr_range().start as usize, |
253 | ); |
254 | |
255 | debug_assert_eq!( |
256 | raw_bytes.as_ptr_range().end as usize, |
257 | entry_metadata.as_ptr_range().end as usize, |
258 | ); |
259 | |
260 | (entry_metadata, entry_data) |
261 | } |
262 | |
263 | #[inline ] |
264 | pub fn raw_bytes(&self) -> &[u8] { |
265 | self.bytes.borrow() |
266 | } |
267 | } |
268 | |
269 | impl<C, M> Allocation<C, M> |
270 | where |
271 | C: Config, |
272 | M: BorrowMut<[u8]>, |
273 | { |
274 | #[inline ] |
275 | pub fn with_mut_parts<R>( |
276 | &mut self, |
277 | f: impl FnOnce( |
278 | &mut Header, |
279 | &mut [EntryMetadata], |
280 | &mut [Entry<C::EncodedKey, C::EncodedValue>], |
281 | ) -> R, |
282 | ) -> R { |
283 | let raw_bytes = self.bytes.borrow_mut(); |
284 | |
285 | // Copy the address as an integer so we can use it for the debug_assertion |
286 | // below without accessing `raw_bytes` again. |
287 | let _raw_bytes_end_addr = raw_bytes.as_ptr_range().end as usize; |
288 | |
289 | let (header, rest) = raw_bytes.split_at_mut(HEADER_SIZE); |
290 | let header: &mut Header = unsafe { &mut *(header.as_mut_ptr() as *mut Header) }; |
291 | |
292 | let slot_count = header.slot_count(); |
293 | let entry_data_size_in_bytes = header.entry_data_size_in_bytes::<C>(); |
294 | |
295 | let (entry_data_bytes, metadata_bytes) = rest.split_at_mut(entry_data_size_in_bytes); |
296 | |
297 | let entry_metadata = unsafe { |
298 | std::slice::from_raw_parts_mut( |
299 | metadata_bytes.as_mut_ptr() as *mut EntryMetadata, |
300 | slot_count + REFERENCE_GROUP_SIZE, |
301 | ) |
302 | }; |
303 | |
304 | let entry_data = unsafe { |
305 | std::slice::from_raw_parts_mut( |
306 | entry_data_bytes.as_mut_ptr() as *mut Entry<C::EncodedKey, C::EncodedValue>, |
307 | slot_count, |
308 | ) |
309 | }; |
310 | |
311 | debug_assert_eq!( |
312 | entry_data.as_ptr_range().start as usize, |
313 | header as *mut Header as usize + HEADER_SIZE, |
314 | ); |
315 | |
316 | debug_assert_eq!( |
317 | entry_data.as_ptr_range().end as usize, |
318 | entry_metadata.as_ptr_range().start as usize, |
319 | ); |
320 | |
321 | debug_assert_eq!( |
322 | _raw_bytes_end_addr, |
323 | entry_metadata.as_ptr_range().end as usize, |
324 | ); |
325 | |
326 | f(header, entry_metadata, entry_data) |
327 | } |
328 | } |
329 | |
330 | #[inline ] |
331 | pub(crate) fn bytes_needed<C: Config>(slot_count: usize) -> usize { |
332 | assert!(slot_count.is_power_of_two()); |
333 | let size_of_entry: usize = size_of::<Entry<C::EncodedKey, C::EncodedValue>>(); |
334 | let size_of_metadata: usize = size_of::<EntryMetadata>(); |
335 | |
336 | HEADER_SIZE |
337 | + slot_count * size_of_entry |
338 | + (slot_count + REFERENCE_GROUP_SIZE) * size_of_metadata |
339 | } |
340 | |
341 | pub(crate) fn allocate<C: Config>( |
342 | slot_count: usize, |
343 | item_count: usize, |
344 | max_load_factor: Factor, |
345 | ) -> Allocation<C, Box<[u8]>> { |
346 | let bytes: Box<[u8]> = vec![0u8; bytes_needed::<C>(slot_count)].into_boxed_slice(); |
347 | init_in_place::<C, _>(bytes, slot_count, item_count, max_load_factor) |
348 | } |
349 | |
350 | pub(crate) fn init_in_place<C: Config, M: BorrowMut<[u8]>>( |
351 | mut bytes: M, |
352 | slot_count: usize, |
353 | item_count: usize, |
354 | max_load_factor: Factor, |
355 | ) -> Allocation<C, M> { |
356 | Header::initialize::<C>(raw_bytes:bytes.borrow_mut(), slot_count, item_count, max_load_factor); |
357 | |
358 | let mut allocation: Allocation = Allocation { |
359 | bytes, |
360 | _config: PhantomData::default(), |
361 | }; |
362 | |
363 | allocation.with_mut_parts(|_, metadata: &mut [u8], data: &mut [Entry<::EncodedKey, …>]| { |
364 | metadata.fill(0xFF); |
365 | data.fill(Default::default()); |
366 | }); |
367 | |
368 | allocation |
369 | } |
370 | |