| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use super::FlexZeroVec; |
| 6 | use crate::ZeroVecError; |
| 7 | use alloc::vec::Vec; |
| 8 | use core::cmp::Ordering; |
| 9 | use core::fmt; |
| 10 | use core::mem; |
| 11 | use core::ops::Range; |
| 12 | |
| 13 | const USIZE_WIDTH: usize = mem::size_of::<usize>(); |
| 14 | |
| 15 | /// A zero-copy "slice" that efficiently represents `[usize]`. |
| 16 | #[repr (C, packed)] |
| 17 | pub struct FlexZeroSlice { |
| 18 | // Hard Invariant: 1 <= width <= USIZE_WIDTH (which is target_pointer_width) |
| 19 | // Soft Invariant: width == the width of the largest element |
| 20 | width: u8, |
| 21 | // Hard Invariant: data.len() % width == 0 |
| 22 | data: [u8], |
| 23 | } |
| 24 | |
| 25 | impl fmt::Debug for FlexZeroSlice { |
| 26 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 27 | self.to_vec().fmt(f) |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | impl PartialEq for FlexZeroSlice { |
| 32 | fn eq(&self, other: &Self) -> bool { |
| 33 | self.width == other.width && self.data == other.data |
| 34 | } |
| 35 | } |
| 36 | impl Eq for FlexZeroSlice {} |
| 37 | |
| 38 | /// Helper function to decode a little-endian "chunk" (byte slice of a specific length) |
| 39 | /// into a `usize`. We cannot call `usize::from_le_bytes` directly because that function |
| 40 | /// requires the high bits to be set to 0. |
| 41 | #[inline ] |
| 42 | pub(crate) fn chunk_to_usize(chunk: &[u8], width: usize) -> usize { |
| 43 | debug_assert_eq!(chunk.len(), width); |
| 44 | let mut bytes: [u8; 8] = [0; USIZE_WIDTH]; |
| 45 | #[allow (clippy::indexing_slicing)] // protected by debug_assert above |
| 46 | bytes[0..width].copy_from_slice(src:chunk); |
| 47 | usize::from_le_bytes(bytes) |
| 48 | } |
| 49 | |
| 50 | impl FlexZeroSlice { |
| 51 | /// Constructs a new empty [`FlexZeroSlice`]. |
| 52 | /// |
| 53 | /// ``` |
| 54 | /// use zerovec::vecs::FlexZeroSlice; |
| 55 | /// |
| 56 | /// const EMPTY_SLICE: &FlexZeroSlice = FlexZeroSlice::new_empty(); |
| 57 | /// |
| 58 | /// assert!(EMPTY_SLICE.is_empty()); |
| 59 | /// assert_eq!(EMPTY_SLICE.len(), 0); |
| 60 | /// assert_eq!(EMPTY_SLICE.first(), None); |
| 61 | /// ``` |
| 62 | #[inline ] |
| 63 | pub const fn new_empty() -> &'static Self { |
| 64 | const ARR: &[u8] = &[1u8]; |
| 65 | // Safety: The slice is a valid empty `FlexZeroSlice` |
| 66 | unsafe { Self::from_byte_slice_unchecked(ARR) } |
| 67 | } |
| 68 | |
| 69 | /// Safely constructs a [`FlexZeroSlice`] from a byte array. |
| 70 | /// |
| 71 | /// # Examples |
| 72 | /// |
| 73 | /// ``` |
| 74 | /// use zerovec::vecs::FlexZeroSlice; |
| 75 | /// |
| 76 | /// const FZS: &FlexZeroSlice = match FlexZeroSlice::parse_byte_slice(&[ |
| 77 | /// 2, // width |
| 78 | /// 0x42, 0x00, // first value |
| 79 | /// 0x07, 0x09, // second value |
| 80 | /// 0xFF, 0xFF, // third value |
| 81 | /// ]) { |
| 82 | /// Ok(v) => v, |
| 83 | /// Err(_) => panic!("invalid bytes" ), |
| 84 | /// }; |
| 85 | /// |
| 86 | /// assert!(!FZS.is_empty()); |
| 87 | /// assert_eq!(FZS.len(), 3); |
| 88 | /// assert_eq!(FZS.first(), Some(0x0042)); |
| 89 | /// assert_eq!(FZS.get(0), Some(0x0042)); |
| 90 | /// assert_eq!(FZS.get(1), Some(0x0907)); |
| 91 | /// assert_eq!(FZS.get(2), Some(0xFFFF)); |
| 92 | /// assert_eq!(FZS.get(3), None); |
| 93 | /// assert_eq!(FZS.last(), Some(0xFFFF)); |
| 94 | /// ``` |
| 95 | pub const fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, ZeroVecError> { |
| 96 | let (width_u8, data) = match bytes.split_first() { |
| 97 | Some(v) => v, |
| 98 | None => { |
| 99 | return Err(ZeroVecError::InvalidLength { |
| 100 | ty: "FlexZeroSlice" , |
| 101 | len: 0, |
| 102 | }) |
| 103 | } |
| 104 | }; |
| 105 | let width = *width_u8 as usize; |
| 106 | if width < 1 || width > USIZE_WIDTH { |
| 107 | return Err(ZeroVecError::ParseError { |
| 108 | ty: "FlexZeroSlice" , |
| 109 | }); |
| 110 | } |
| 111 | if data.len() % width != 0 { |
| 112 | return Err(ZeroVecError::InvalidLength { |
| 113 | ty: "FlexZeroSlice" , |
| 114 | len: bytes.len(), |
| 115 | }); |
| 116 | } |
| 117 | // Safety: All hard invariants have been checked. |
| 118 | // Note: The soft invariant requires a linear search that we don't do here. |
| 119 | Ok(unsafe { Self::from_byte_slice_unchecked(bytes) }) |
| 120 | } |
| 121 | |
| 122 | /// Constructs a [`FlexZeroSlice`] without checking invariants. |
| 123 | /// |
| 124 | /// # Panics |
| 125 | /// |
| 126 | /// Panics if `bytes` is empty. |
| 127 | /// |
| 128 | /// # Safety |
| 129 | /// |
| 130 | /// Must be called on a valid [`FlexZeroSlice`] byte array. |
| 131 | #[inline ] |
| 132 | pub const unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { |
| 133 | // Safety: The DST of FlexZeroSlice is a pointer to the `width` element and has a metadata |
| 134 | // equal to the length of the `data` field, which will be one less than the length of the |
| 135 | // overall array. |
| 136 | #[allow (clippy::panic)] // panic is documented in function contract |
| 137 | if bytes.is_empty() { |
| 138 | panic!("from_byte_slice_unchecked called with empty slice" ) |
| 139 | } |
| 140 | let slice = core::ptr::slice_from_raw_parts(bytes.as_ptr(), bytes.len() - 1); |
| 141 | &*(slice as *const Self) |
| 142 | } |
| 143 | |
| 144 | #[inline ] |
| 145 | pub(crate) unsafe fn from_byte_slice_mut_unchecked(bytes: &mut [u8]) -> &mut Self { |
| 146 | // Safety: See comments in `from_byte_slice_unchecked` |
| 147 | let remainder = core::ptr::slice_from_raw_parts_mut(bytes.as_mut_ptr(), bytes.len() - 1); |
| 148 | &mut *(remainder as *mut Self) |
| 149 | } |
| 150 | |
| 151 | /// Returns this slice as its underlying `&[u8]` byte buffer representation. |
| 152 | /// |
| 153 | /// Useful for serialization. |
| 154 | /// |
| 155 | /// # Example |
| 156 | /// |
| 157 | /// ``` |
| 158 | /// use zerovec::vecs::FlexZeroSlice; |
| 159 | /// |
| 160 | /// let bytes: &[u8] = &[2, 0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x80]; |
| 161 | /// let fzv = FlexZeroSlice::parse_byte_slice(bytes).expect("valid bytes" ); |
| 162 | /// |
| 163 | /// assert_eq!(bytes, fzv.as_bytes()); |
| 164 | /// ``` |
| 165 | #[inline ] |
| 166 | pub fn as_bytes(&self) -> &[u8] { |
| 167 | // Safety: See comments in `from_byte_slice_unchecked` |
| 168 | unsafe { |
| 169 | core::slice::from_raw_parts(self as *const Self as *const u8, self.data.len() + 1) |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | /// Borrows this `FlexZeroSlice` as a [`FlexZeroVec::Borrowed`]. |
| 174 | #[inline ] |
| 175 | pub const fn as_flexzerovec(&self) -> FlexZeroVec { |
| 176 | FlexZeroVec::Borrowed(self) |
| 177 | } |
| 178 | |
| 179 | /// Returns the number of elements in the `FlexZeroSlice`. |
| 180 | #[inline ] |
| 181 | pub fn len(&self) -> usize { |
| 182 | self.data.len() / self.get_width() |
| 183 | } |
| 184 | |
| 185 | #[inline ] |
| 186 | pub(crate) fn get_width(&self) -> usize { |
| 187 | usize::from(self.width) |
| 188 | } |
| 189 | |
| 190 | /// Returns whether there are zero elements in the `FlexZeroSlice`. |
| 191 | #[inline ] |
| 192 | pub fn is_empty(&self) -> bool { |
| 193 | self.data.len() == 0 |
| 194 | } |
| 195 | |
| 196 | /// Gets the element at `index`, or `None` if `index >= self.len()`. |
| 197 | /// |
| 198 | /// # Examples |
| 199 | /// |
| 200 | /// ``` |
| 201 | /// use zerovec::vecs::FlexZeroVec; |
| 202 | /// |
| 203 | /// let fzv: FlexZeroVec = [22, 33].iter().copied().collect(); |
| 204 | /// assert_eq!(fzv.get(0), Some(22)); |
| 205 | /// assert_eq!(fzv.get(1), Some(33)); |
| 206 | /// assert_eq!(fzv.get(2), None); |
| 207 | /// ``` |
| 208 | #[inline ] |
| 209 | pub fn get(&self, index: usize) -> Option<usize> { |
| 210 | if index >= self.len() { |
| 211 | None |
| 212 | } else { |
| 213 | Some(unsafe { self.get_unchecked(index) }) |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | /// Gets the element at `index` as a chunk of bytes, or `None` if `index >= self.len()`. |
| 218 | #[inline ] |
| 219 | pub(crate) fn get_chunk(&self, index: usize) -> Option<&[u8]> { |
| 220 | let w = self.get_width(); |
| 221 | self.data.get(index * w..index * w + w) |
| 222 | } |
| 223 | |
| 224 | /// Gets the element at `index` without checking bounds. |
| 225 | /// |
| 226 | /// # Safety |
| 227 | /// |
| 228 | /// `index` must be in-range. |
| 229 | #[inline ] |
| 230 | pub unsafe fn get_unchecked(&self, index: usize) -> usize { |
| 231 | match self.width { |
| 232 | 1 => *self.data.get_unchecked(index) as usize, |
| 233 | 2 => { |
| 234 | let ptr = self.data.as_ptr().add(index * 2); |
| 235 | u16::from_le_bytes(core::ptr::read(ptr as *const [u8; 2])) as usize |
| 236 | } |
| 237 | _ => { |
| 238 | let mut bytes = [0; USIZE_WIDTH]; |
| 239 | let w = self.get_width(); |
| 240 | assert!(w <= USIZE_WIDTH); |
| 241 | let ptr = self.data.as_ptr().add(index * w); |
| 242 | core::ptr::copy_nonoverlapping(ptr, bytes.as_mut_ptr(), w); |
| 243 | usize::from_le_bytes(bytes) |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | /// Gets the first element of the slice, or `None` if the slice is empty. |
| 249 | #[inline ] |
| 250 | pub fn first(&self) -> Option<usize> { |
| 251 | let w = self.get_width(); |
| 252 | self.data.get(0..w).map(|chunk| chunk_to_usize(chunk, w)) |
| 253 | } |
| 254 | |
| 255 | /// Gets the last element of the slice, or `None` if the slice is empty. |
| 256 | #[inline ] |
| 257 | pub fn last(&self) -> Option<usize> { |
| 258 | let l = self.data.len(); |
| 259 | if l == 0 { |
| 260 | None |
| 261 | } else { |
| 262 | let w = self.get_width(); |
| 263 | self.data |
| 264 | .get(l - w..l) |
| 265 | .map(|chunk| chunk_to_usize(chunk, w)) |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | /// Gets an iterator over the elements of the slice as `usize`. |
| 270 | #[inline ] |
| 271 | pub fn iter( |
| 272 | &self, |
| 273 | ) -> impl DoubleEndedIterator<Item = usize> + '_ + ExactSizeIterator<Item = usize> { |
| 274 | let w = self.get_width(); |
| 275 | self.data |
| 276 | .chunks_exact(w) |
| 277 | .map(move |chunk| chunk_to_usize(chunk, w)) |
| 278 | } |
| 279 | |
| 280 | /// Gets an iterator over pairs of elements. |
| 281 | /// |
| 282 | /// The second element of the final pair is `None`. |
| 283 | /// |
| 284 | /// # Examples |
| 285 | /// |
| 286 | /// ``` |
| 287 | /// use zerovec::vecs::FlexZeroVec; |
| 288 | /// |
| 289 | /// let nums: &[usize] = &[211, 281, 421, 461]; |
| 290 | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
| 291 | /// |
| 292 | /// let mut pairs_it = fzv.iter_pairs(); |
| 293 | /// |
| 294 | /// assert_eq!(pairs_it.next(), Some((211, Some(281)))); |
| 295 | /// assert_eq!(pairs_it.next(), Some((281, Some(421)))); |
| 296 | /// assert_eq!(pairs_it.next(), Some((421, Some(461)))); |
| 297 | /// assert_eq!(pairs_it.next(), Some((461, None))); |
| 298 | /// assert_eq!(pairs_it.next(), None); |
| 299 | /// ``` |
| 300 | pub fn iter_pairs(&self) -> impl Iterator<Item = (usize, Option<usize>)> + '_ { |
| 301 | self.iter().zip(self.iter().skip(1).map(Some).chain([None])) |
| 302 | } |
| 303 | |
| 304 | /// Creates a `Vec<usize>` from a [`FlexZeroSlice`] (or `FlexZeroVec`). |
| 305 | /// |
| 306 | /// # Examples |
| 307 | /// |
| 308 | /// ``` |
| 309 | /// use zerovec::vecs::FlexZeroVec; |
| 310 | /// |
| 311 | /// let nums: &[usize] = &[211, 281, 421, 461]; |
| 312 | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
| 313 | /// let vec: Vec<usize> = fzv.to_vec(); |
| 314 | /// |
| 315 | /// assert_eq!(nums, vec.as_slice()); |
| 316 | /// ``` |
| 317 | #[inline ] |
| 318 | pub fn to_vec(&self) -> Vec<usize> { |
| 319 | self.iter().collect() |
| 320 | } |
| 321 | |
| 322 | /// Binary searches a sorted `FlexZeroSlice` for the given `usize` value. |
| 323 | /// |
| 324 | /// # Examples |
| 325 | /// |
| 326 | /// ``` |
| 327 | /// use zerovec::vecs::FlexZeroVec; |
| 328 | /// |
| 329 | /// let nums: &[usize] = &[211, 281, 421, 461]; |
| 330 | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
| 331 | /// |
| 332 | /// assert_eq!(fzv.binary_search(0), Err(0)); |
| 333 | /// assert_eq!(fzv.binary_search(211), Ok(0)); |
| 334 | /// assert_eq!(fzv.binary_search(250), Err(1)); |
| 335 | /// assert_eq!(fzv.binary_search(281), Ok(1)); |
| 336 | /// assert_eq!(fzv.binary_search(300), Err(2)); |
| 337 | /// assert_eq!(fzv.binary_search(421), Ok(2)); |
| 338 | /// assert_eq!(fzv.binary_search(450), Err(3)); |
| 339 | /// assert_eq!(fzv.binary_search(461), Ok(3)); |
| 340 | /// assert_eq!(fzv.binary_search(462), Err(4)); |
| 341 | /// ``` |
| 342 | #[inline ] |
| 343 | pub fn binary_search(&self, needle: usize) -> Result<usize, usize> { |
| 344 | self.binary_search_by(|probe| probe.cmp(&needle)) |
| 345 | } |
| 346 | |
| 347 | /// Binary searches a sorted range of a `FlexZeroSlice` for the given `usize` value. |
| 348 | /// |
| 349 | /// The indices in the return value are relative to the start of the range. |
| 350 | /// |
| 351 | /// # Examples |
| 352 | /// |
| 353 | /// ``` |
| 354 | /// use zerovec::vecs::FlexZeroVec; |
| 355 | /// |
| 356 | /// // Make a FlexZeroVec with two sorted ranges: 0..3 and 3..5 |
| 357 | /// let nums: &[usize] = &[111, 222, 444, 333, 555]; |
| 358 | /// let fzv: FlexZeroVec = nums.iter().copied().collect(); |
| 359 | /// |
| 360 | /// // Search in the first range: |
| 361 | /// assert_eq!(fzv.binary_search_in_range(0, 0..3), Some(Err(0))); |
| 362 | /// assert_eq!(fzv.binary_search_in_range(111, 0..3), Some(Ok(0))); |
| 363 | /// assert_eq!(fzv.binary_search_in_range(199, 0..3), Some(Err(1))); |
| 364 | /// assert_eq!(fzv.binary_search_in_range(222, 0..3), Some(Ok(1))); |
| 365 | /// assert_eq!(fzv.binary_search_in_range(399, 0..3), Some(Err(2))); |
| 366 | /// assert_eq!(fzv.binary_search_in_range(444, 0..3), Some(Ok(2))); |
| 367 | /// assert_eq!(fzv.binary_search_in_range(999, 0..3), Some(Err(3))); |
| 368 | /// |
| 369 | /// // Search in the second range: |
| 370 | /// assert_eq!(fzv.binary_search_in_range(0, 3..5), Some(Err(0))); |
| 371 | /// assert_eq!(fzv.binary_search_in_range(333, 3..5), Some(Ok(0))); |
| 372 | /// assert_eq!(fzv.binary_search_in_range(399, 3..5), Some(Err(1))); |
| 373 | /// assert_eq!(fzv.binary_search_in_range(555, 3..5), Some(Ok(1))); |
| 374 | /// assert_eq!(fzv.binary_search_in_range(999, 3..5), Some(Err(2))); |
| 375 | /// |
| 376 | /// // Out-of-bounds range: |
| 377 | /// assert_eq!(fzv.binary_search_in_range(0, 4..6), None); |
| 378 | /// ``` |
| 379 | #[inline ] |
| 380 | pub fn binary_search_in_range( |
| 381 | &self, |
| 382 | needle: usize, |
| 383 | range: Range<usize>, |
| 384 | ) -> Option<Result<usize, usize>> { |
| 385 | self.binary_search_in_range_by(|probe| probe.cmp(&needle), range) |
| 386 | } |
| 387 | |
| 388 | /// Binary searches a sorted `FlexZeroSlice` according to a predicate function. |
| 389 | #[inline ] |
| 390 | pub fn binary_search_by( |
| 391 | &self, |
| 392 | predicate: impl FnMut(usize) -> Ordering, |
| 393 | ) -> Result<usize, usize> { |
| 394 | debug_assert!(self.len() <= self.data.len()); |
| 395 | // Safety: self.len() <= self.data.len() |
| 396 | let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) }; |
| 397 | self.binary_search_impl(predicate, scaled_slice) |
| 398 | } |
| 399 | |
| 400 | /// Binary searches a sorted range of a `FlexZeroSlice` according to a predicate function. |
| 401 | /// |
| 402 | /// The indices in the return value are relative to the start of the range. |
| 403 | #[inline ] |
| 404 | pub fn binary_search_in_range_by( |
| 405 | &self, |
| 406 | predicate: impl FnMut(usize) -> Ordering, |
| 407 | range: Range<usize>, |
| 408 | ) -> Option<Result<usize, usize>> { |
| 409 | // Note: We need to check bounds separately, since `self.data.get(range)` does not return |
| 410 | // bounds errors, since it is indexing directly into the upscaled data array |
| 411 | if range.start > self.len() || range.end > self.len() { |
| 412 | return None; |
| 413 | } |
| 414 | let scaled_slice = self.data.get(range)?; |
| 415 | Some(self.binary_search_impl(predicate, scaled_slice)) |
| 416 | } |
| 417 | |
| 418 | /// Binary searches a `FlexZeroSlice` by its indices. |
| 419 | /// |
| 420 | /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`. |
| 421 | #[inline ] |
| 422 | pub fn binary_search_with_index( |
| 423 | &self, |
| 424 | predicate: impl FnMut(usize) -> Ordering, |
| 425 | ) -> Result<usize, usize> { |
| 426 | debug_assert!(self.len() <= self.data.len()); |
| 427 | // Safety: self.len() <= self.data.len() |
| 428 | let scaled_slice = unsafe { self.data.get_unchecked(0..self.len()) }; |
| 429 | self.binary_search_with_index_impl(predicate, scaled_slice) |
| 430 | } |
| 431 | |
| 432 | /// Binary searches a range of a `FlexZeroSlice` by its indices. |
| 433 | /// |
| 434 | /// The `predicate` function is passed in-bounds indices into the `FlexZeroSlice`, which are |
| 435 | /// relative to the start of the entire slice. |
| 436 | /// |
| 437 | /// The indices in the return value are relative to the start of the range. |
| 438 | #[inline ] |
| 439 | pub fn binary_search_in_range_with_index( |
| 440 | &self, |
| 441 | predicate: impl FnMut(usize) -> Ordering, |
| 442 | range: Range<usize>, |
| 443 | ) -> Option<Result<usize, usize>> { |
| 444 | // Note: We need to check bounds separately, since `self.data.get(range)` does not return |
| 445 | // bounds errors, since it is indexing directly into the upscaled data array |
| 446 | if range.start > self.len() || range.end > self.len() { |
| 447 | return None; |
| 448 | } |
| 449 | let scaled_slice = self.data.get(range)?; |
| 450 | Some(self.binary_search_with_index_impl(predicate, scaled_slice)) |
| 451 | } |
| 452 | |
| 453 | /// # Safety |
| 454 | /// |
| 455 | /// `scaled_slice` must be a subslice of `self.data` |
| 456 | #[inline ] |
| 457 | fn binary_search_impl( |
| 458 | &self, |
| 459 | mut predicate: impl FnMut(usize) -> Ordering, |
| 460 | scaled_slice: &[u8], |
| 461 | ) -> Result<usize, usize> { |
| 462 | self.binary_search_with_index_impl( |
| 463 | |index| { |
| 464 | // Safety: The contract of `binary_search_with_index_impl` says `index` is in bounds |
| 465 | let actual_probe = unsafe { self.get_unchecked(index) }; |
| 466 | predicate(actual_probe) |
| 467 | }, |
| 468 | scaled_slice, |
| 469 | ) |
| 470 | } |
| 471 | |
| 472 | /// `predicate` is passed a valid index as an argument. |
| 473 | /// |
| 474 | /// # Safety |
| 475 | /// |
| 476 | /// `scaled_slice` must be a subslice of `self.data` |
| 477 | fn binary_search_with_index_impl( |
| 478 | &self, |
| 479 | mut predicate: impl FnMut(usize) -> Ordering, |
| 480 | scaled_slice: &[u8], |
| 481 | ) -> Result<usize, usize> { |
| 482 | // This code is an absolute atrocity. This code is not a place of honor. This |
| 483 | // code is known to the State of California to cause cancer. |
| 484 | // |
| 485 | // Unfortunately, the stdlib's `binary_search*` functions can only operate on slices. |
| 486 | // We do not have a slice. We have something we can .get() and index on, but that is not |
| 487 | // a slice. |
| 488 | // |
| 489 | // The `binary_search*` functions also do not have a variant where they give you the element's |
| 490 | // index, which we could otherwise use to directly index `self`. |
| 491 | // We do have `self.indices`, but these are indices into a byte buffer, which cannot in |
| 492 | // isolation be used to recoup the logical index of the element they refer to. |
| 493 | // |
| 494 | // However, `binary_search_by()` provides references to the elements of the slice being iterated. |
| 495 | // Since the layout of Rust slices is well-defined, we can do pointer arithmetic on these references |
| 496 | // to obtain the index being used by the search. |
| 497 | // |
| 498 | // It's worth noting that the slice we choose to search is irrelevant, as long as it has the appropriate |
| 499 | // length. `self.indices` is defined to have length `self.len()`, so it is convenient to use |
| 500 | // here and does not require additional allocations. |
| 501 | // |
| 502 | // The alternative to doing this is to implement our own binary search. This is significantly less fun. |
| 503 | |
| 504 | // Note: We always use zero_index relative to the whole indices array, even if we are |
| 505 | // only searching a subslice of it. |
| 506 | let zero_index = self.data.as_ptr() as *const _ as usize; |
| 507 | scaled_slice.binary_search_by(|probe: &_| { |
| 508 | // Note: `scaled_slice` is a slice of u8 |
| 509 | let index = probe as *const _ as usize - zero_index; |
| 510 | predicate(index) |
| 511 | }) |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | #[inline ] |
| 516 | pub(crate) fn get_item_width(item_bytes: &[u8; USIZE_WIDTH]) -> usize { |
| 517 | USIZE_WIDTH - item_bytes.iter().rev().take_while(|b: &&u8| **b == 0).count() |
| 518 | } |
| 519 | |
| 520 | /// Pre-computed information about a pending insertion operation. |
| 521 | /// |
| 522 | /// Do not create one of these directly; call `get_insert_info()`. |
| 523 | pub(crate) struct InsertInfo { |
| 524 | /// The bytes to be inserted, with zero-fill. |
| 525 | pub item_bytes: [u8; USIZE_WIDTH], |
| 526 | /// The new item width after insertion. |
| 527 | pub new_width: usize, |
| 528 | /// The new number of items in the vector: self.len() after insertion. |
| 529 | pub new_count: usize, |
| 530 | /// The new number of bytes required for the entire slice (self.data.len() + 1). |
| 531 | pub new_bytes_len: usize, |
| 532 | } |
| 533 | |
| 534 | impl FlexZeroSlice { |
| 535 | /// Compute the [`InsertInfo`] for inserting the specified item anywhere into the vector. |
| 536 | /// |
| 537 | /// # Panics |
| 538 | /// |
| 539 | /// Panics if inserting the element would require allocating more than `usize::MAX` bytes. |
| 540 | pub(crate) fn get_insert_info(&self, new_item: usize) -> InsertInfo { |
| 541 | let item_bytes = new_item.to_le_bytes(); |
| 542 | let item_width = get_item_width(&item_bytes); |
| 543 | let old_width = self.get_width(); |
| 544 | let new_width = core::cmp::max(old_width, item_width); |
| 545 | let new_count = 1 + (self.data.len() / old_width); |
| 546 | #[allow (clippy::unwrap_used)] // panic is documented in function contract |
| 547 | let new_bytes_len = new_count |
| 548 | .checked_mul(new_width) |
| 549 | .unwrap() |
| 550 | .checked_add(1) |
| 551 | .unwrap(); |
| 552 | InsertInfo { |
| 553 | item_bytes, |
| 554 | new_width, |
| 555 | new_count, |
| 556 | new_bytes_len, |
| 557 | } |
| 558 | } |
| 559 | |
| 560 | /// This function should be called on a slice with a data array `new_data_len` long |
| 561 | /// which previously held `new_count - 1` elements. |
| 562 | /// |
| 563 | /// After calling this function, all bytes in the slice will have been written. |
| 564 | pub(crate) fn insert_impl(&mut self, insert_info: InsertInfo, insert_index: usize) { |
| 565 | let InsertInfo { |
| 566 | item_bytes, |
| 567 | new_width, |
| 568 | new_count, |
| 569 | new_bytes_len, |
| 570 | } = insert_info; |
| 571 | debug_assert!(new_width <= USIZE_WIDTH); |
| 572 | debug_assert!(new_width >= self.get_width()); |
| 573 | debug_assert!(insert_index < new_count); |
| 574 | debug_assert_eq!(new_bytes_len, new_count * new_width + 1); |
| 575 | debug_assert_eq!(new_bytes_len, self.data.len() + 1); |
| 576 | // For efficiency, calculate how many items we can skip copying. |
| 577 | let lower_i = if new_width == self.get_width() { |
| 578 | insert_index |
| 579 | } else { |
| 580 | 0 |
| 581 | }; |
| 582 | // Copy elements starting from the end into the new empty section of the vector. |
| 583 | // Note: We could copy fully in place, but we need to set 0 bytes for the high bytes, |
| 584 | // so we stage the new value on the stack. |
| 585 | for i in (lower_i..new_count).rev() { |
| 586 | let bytes_to_write = if i == insert_index { |
| 587 | item_bytes |
| 588 | } else { |
| 589 | let j = if i > insert_index { i - 1 } else { i }; |
| 590 | debug_assert!(j < new_count - 1); |
| 591 | // Safety: j is in range (assertion on previous line), and it has not been |
| 592 | // overwritten yet since we are walking backwards. |
| 593 | unsafe { self.get_unchecked(j).to_le_bytes() } |
| 594 | }; |
| 595 | // Safety: The vector has capacity for `new_width` items at the new index, which is |
| 596 | // later in the array than the bytes that we read above. |
| 597 | unsafe { |
| 598 | core::ptr::copy_nonoverlapping( |
| 599 | bytes_to_write.as_ptr(), |
| 600 | self.data.as_mut_ptr().add(new_width * i), |
| 601 | new_width, |
| 602 | ); |
| 603 | } |
| 604 | } |
| 605 | self.width = new_width as u8; |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | /// Pre-computed information about a pending removal operation. |
| 610 | /// |
| 611 | /// Do not create one of these directly; call `get_remove_info()` or `get_sorted_pop_info()`. |
| 612 | pub(crate) struct RemoveInfo { |
| 613 | /// The index of the item to be removed. |
| 614 | pub remove_index: usize, |
| 615 | /// The new item width after insertion. |
| 616 | pub new_width: usize, |
| 617 | /// The new number of items in the vector: self.len() after insertion. |
| 618 | pub new_count: usize, |
| 619 | /// The new number of bytes required for the entire slice (self.data.len() + 1). |
| 620 | pub new_bytes_len: usize, |
| 621 | } |
| 622 | |
| 623 | impl FlexZeroSlice { |
| 624 | /// Compute the [`RemoveInfo`] for removing the item at the specified index. |
| 625 | pub(crate) fn get_remove_info(&self, remove_index: usize) -> RemoveInfo { |
| 626 | debug_assert!(remove_index < self.len()); |
| 627 | // Safety: remove_index is in range (assertion on previous line) |
| 628 | let item_bytes = unsafe { self.get_unchecked(remove_index).to_le_bytes() }; |
| 629 | let item_width = get_item_width(&item_bytes); |
| 630 | let old_width = self.get_width(); |
| 631 | let old_count = self.data.len() / old_width; |
| 632 | let new_width = if item_width < old_width { |
| 633 | old_width |
| 634 | } else { |
| 635 | debug_assert_eq!(old_width, item_width); |
| 636 | // We might be removing the widest element. If so, we need to scale down. |
| 637 | let mut largest_width = 1; |
| 638 | for i in 0..old_count { |
| 639 | if i == remove_index { |
| 640 | continue; |
| 641 | } |
| 642 | // Safety: i is in range (between 0 and old_count) |
| 643 | let curr_bytes = unsafe { self.get_unchecked(i).to_le_bytes() }; |
| 644 | let curr_width = get_item_width(&curr_bytes); |
| 645 | largest_width = core::cmp::max(curr_width, largest_width); |
| 646 | } |
| 647 | largest_width |
| 648 | }; |
| 649 | let new_count = old_count - 1; |
| 650 | // Note: the following line won't overflow because we are making the slice shorter. |
| 651 | let new_bytes_len = new_count * new_width + 1; |
| 652 | RemoveInfo { |
| 653 | remove_index, |
| 654 | new_width, |
| 655 | new_count, |
| 656 | new_bytes_len, |
| 657 | } |
| 658 | } |
| 659 | |
| 660 | /// Returns the [`RemoveInfo`] for removing the last element. Should be called |
| 661 | /// on a slice sorted in ascending order. |
| 662 | /// |
| 663 | /// This is more efficient than `get_remove_info()` because it doesn't require a |
| 664 | /// linear traversal of the vector in order to calculate `new_width`. |
| 665 | pub(crate) fn get_sorted_pop_info(&self) -> RemoveInfo { |
| 666 | debug_assert!(!self.is_empty()); |
| 667 | let remove_index = self.len() - 1; |
| 668 | let old_count = self.len(); |
| 669 | let new_width = if old_count == 1 { |
| 670 | 1 |
| 671 | } else { |
| 672 | // Safety: the FlexZeroSlice has at least two elements |
| 673 | let largest_item = unsafe { self.get_unchecked(remove_index - 1).to_le_bytes() }; |
| 674 | get_item_width(&largest_item) |
| 675 | }; |
| 676 | let new_count = old_count - 1; |
| 677 | // Note: the following line won't overflow because we are making the slice shorter. |
| 678 | let new_bytes_len = new_count * new_width + 1; |
| 679 | RemoveInfo { |
| 680 | remove_index, |
| 681 | new_width, |
| 682 | new_count, |
| 683 | new_bytes_len, |
| 684 | } |
| 685 | } |
| 686 | |
| 687 | /// This function should be called on a valid slice. |
| 688 | /// |
| 689 | /// After calling this function, the slice data should be truncated to `new_data_len` bytes. |
| 690 | pub(crate) fn remove_impl(&mut self, remove_info: RemoveInfo) { |
| 691 | let RemoveInfo { |
| 692 | remove_index, |
| 693 | new_width, |
| 694 | new_count, |
| 695 | .. |
| 696 | } = remove_info; |
| 697 | debug_assert!(new_width <= self.get_width()); |
| 698 | debug_assert!(new_count < self.len()); |
| 699 | // For efficiency, calculate how many items we can skip copying. |
| 700 | let lower_i = if new_width == self.get_width() { |
| 701 | remove_index |
| 702 | } else { |
| 703 | 0 |
| 704 | }; |
| 705 | // Copy elements starting from the beginning to compress the vector to fewer bytes. |
| 706 | for i in lower_i..new_count { |
| 707 | let j = if i < remove_index { i } else { i + 1 }; |
| 708 | // Safety: j is in range because j <= new_count < self.len() |
| 709 | let bytes_to_write = unsafe { self.get_unchecked(j).to_le_bytes() }; |
| 710 | // Safety: The bytes are being copied to a section of the array that is not after |
| 711 | // the section of the array that currently holds the bytes. |
| 712 | unsafe { |
| 713 | core::ptr::copy_nonoverlapping( |
| 714 | bytes_to_write.as_ptr(), |
| 715 | self.data.as_mut_ptr().add(new_width * i), |
| 716 | new_width, |
| 717 | ); |
| 718 | } |
| 719 | } |
| 720 | self.width = new_width as u8; |
| 721 | } |
| 722 | } |
| 723 | |