1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::components::VarZeroVecComponents;
6use super::*;
7use crate::ule::*;
8use alloc::boxed::Box;
9use alloc::vec::Vec;
10use core::cmp::{Ord, Ordering, PartialOrd};
11use core::fmt;
12use core::marker::PhantomData;
13use core::mem;
14
15use core::ops::Index;
16use core::ops::Range;
17
18/// A zero-copy "slice", that works for unsized types, i.e. the zero-copy version of `[T]`
19/// where `T` is not `Sized`.
20///
21/// This behaves similarly to [`VarZeroVec<T>`], however [`VarZeroVec<T>`] is allowed to contain
22/// owned data and as such is ideal for deserialization since most human readable
23/// serialization formats cannot unconditionally deserialize zero-copy.
24///
25/// This type can be used inside [`VarZeroVec<T>`](crate::VarZeroVec) and [`ZeroMap`](crate::ZeroMap):
26/// This essentially allows for the construction of zero-copy types isomorphic to `Vec<Vec<T>>` by instead
27/// using `VarZeroVec<ZeroSlice<T>>`.
28///
29/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
30/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
31///
32/// This type can be nested within itself to allow for multi-level nested `Vec`s.
33///
34/// # Examples
35///
36/// ## Nested Slices
37///
38/// The following code constructs the conceptual zero-copy equivalent of `Vec<Vec<Vec<str>>>`
39///
40/// ```rust
41/// use zerovec::ule::*;
42/// use zerovec::{VarZeroSlice, VarZeroVec, ZeroVec};
43/// let strings_1: Vec<&str> = vec!["foo", "bar", "baz"];
44/// let strings_2: Vec<&str> = vec!["twelve", "seventeen", "forty two"];
45/// let strings_3: Vec<&str> = vec!["我", "喜歡", "烏龍茶"];
46/// let strings_4: Vec<&str> = vec!["w", "ω", "文", "𑄃"];
47/// let strings_12 = vec![&*strings_1, &*strings_2];
48/// let strings_34 = vec![&*strings_3, &*strings_4];
49/// let all_strings = vec![strings_12, strings_34];
50///
51/// let vzv_1: VarZeroVec<str> = VarZeroVec::from(&strings_1);
52/// let vzv_2: VarZeroVec<str> = VarZeroVec::from(&strings_2);
53/// let vzv_3: VarZeroVec<str> = VarZeroVec::from(&strings_3);
54/// let vzv_4: VarZeroVec<str> = VarZeroVec::from(&strings_4);
55/// let vzv_12 = VarZeroVec::from(&[vzv_1.as_slice(), vzv_2.as_slice()]);
56/// let vzv_34 = VarZeroVec::from(&[vzv_3.as_slice(), vzv_4.as_slice()]);
57/// let vzv_all = VarZeroVec::from(&[vzv_12.as_slice(), vzv_34.as_slice()]);
58///
59/// let reconstructed: Vec<Vec<Vec<String>>> = vzv_all
60/// .iter()
61/// .map(|v: &VarZeroSlice<VarZeroSlice<str>>| {
62/// v.iter()
63/// .map(|x: &VarZeroSlice<_>| {
64/// x.as_varzerovec()
65/// .iter()
66/// .map(|s| s.to_owned())
67/// .collect::<Vec<String>>()
68/// })
69/// .collect::<Vec<_>>()
70/// })
71/// .collect::<Vec<_>>();
72/// assert_eq!(reconstructed, all_strings);
73///
74/// let bytes = vzv_all.as_bytes();
75/// let vzv_from_bytes: VarZeroVec<VarZeroSlice<VarZeroSlice<str>>> =
76/// VarZeroVec::parse_byte_slice(bytes).unwrap();
77/// assert_eq!(vzv_from_bytes, vzv_all);
78/// ```
79///
80/// ## Iterate over Windows
81///
82/// Although [`VarZeroSlice`] does not itself have a `.windows` iterator like
83/// [core::slice::Windows], this behavior can be easily modeled using an iterator:
84///
85/// ```
86/// use zerovec::VarZeroVec;
87///
88/// let vzv = VarZeroVec::<str>::from(&["a", "b", "c", "d"]);
89/// # let mut pairs: Vec<(&str, &str)> = Vec::new();
90///
91/// let mut it = vzv.iter().peekable();
92/// while let (Some(x), Some(y)) = (it.next(), it.peek()) {
93/// // Evaluate (x, y) here.
94/// # pairs.push((x, y));
95/// }
96/// # assert_eq!(pairs, &[("a", "b"), ("b", "c"), ("c", "d")]);
97/// ```
98//
99// safety invariant: The slice MUST be one which parses to
100// a valid VarZeroVecComponents<T>
101#[repr(transparent)]
102pub struct VarZeroSlice<T: ?Sized, F = Index16> {
103 marker: PhantomData<(F, T)>,
104 /// The original slice this was constructed from
105 entire_slice: [u8],
106}
107
108impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
109 /// Construct a new empty VarZeroSlice
110 pub const fn new_empty() -> &'static Self {
111 // The empty VZV is special-cased to the empty slice
112 unsafe { mem::transmute(&[] as &[u8]) }
113 }
114
115 /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer
116 #[inline]
117 pub(crate) fn as_components<'a>(&'a self) -> VarZeroVecComponents<'a, T, F> {
118 unsafe {
119 // safety: VarZeroSlice is guaranteed to parse here
120 VarZeroVecComponents::from_bytes_unchecked(&self.entire_slice)
121 }
122 }
123
124 /// Uses a `&[u8]` buffer as a `VarZeroSlice<T>` without any verification.
125 ///
126 /// # Safety
127 ///
128 /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
129 pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
130 // self is really just a wrapper around a byte slice
131 mem::transmute(bytes)
132 }
133
134 /// Get the number of elements in this slice
135 ///
136 /// # Example
137 ///
138 /// ```rust
139 /// # use std::str::Utf8Error;
140 /// # use zerovec::ule::ZeroVecError;
141 /// # use zerovec::VarZeroVec;
142 ///
143 /// let strings = vec!["foo", "bar", "baz", "quux"];
144 /// let vec = VarZeroVec::<str>::from(&strings);
145 ///
146 /// assert_eq!(vec.len(), 4);
147 /// # Ok::<(), ZeroVecError>(())
148 /// ```
149 pub fn len(&self) -> usize {
150 self.as_components().len()
151 }
152
153 /// Returns `true` if the slice contains no elements.
154 ///
155 /// # Examples
156 ///
157 /// ```
158 /// # use std::str::Utf8Error;
159 /// # use zerovec::ule::ZeroVecError;
160 /// # use zerovec::VarZeroVec;
161 ///
162 /// let strings: Vec<String> = vec![];
163 /// let vec = VarZeroVec::<str>::from(&strings);
164 ///
165 /// assert!(vec.is_empty());
166 /// # Ok::<(), ZeroVecError>(())
167 /// ```
168 pub fn is_empty(&self) -> bool {
169 self.as_components().is_empty()
170 }
171
172 /// Obtain an iterator over this slice's elements
173 ///
174 /// # Example
175 ///
176 /// ```rust
177 /// # use std::str::Utf8Error;
178 /// # use zerovec::ule::ZeroVecError;
179 /// # use zerovec::VarZeroVec;
180 ///
181 /// let strings = vec!["foo", "bar", "baz", "quux"];
182 /// let vec = VarZeroVec::<str>::from(&strings);
183 ///
184 /// let mut iter_results: Vec<&str> = vec.iter().collect();
185 /// assert_eq!(iter_results[0], "foo");
186 /// assert_eq!(iter_results[1], "bar");
187 /// assert_eq!(iter_results[2], "baz");
188 /// assert_eq!(iter_results[3], "quux");
189 /// # Ok::<(), ZeroVecError>(())
190 /// ```
191 pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> {
192 self.as_components().iter()
193 }
194
195 /// Get one of this slice's elements, returning `None` if the index is out of bounds
196 ///
197 /// # Example
198 ///
199 /// ```rust
200 /// # use std::str::Utf8Error;
201 /// # use zerovec::ule::ZeroVecError;
202 /// # use zerovec::VarZeroVec;
203 ///
204 /// let strings = vec!["foo", "bar", "baz", "quux"];
205 /// let vec = VarZeroVec::<str>::from(&strings);
206 ///
207 /// let mut iter_results: Vec<&str> = vec.iter().collect();
208 /// assert_eq!(vec.get(0), Some("foo"));
209 /// assert_eq!(vec.get(1), Some("bar"));
210 /// assert_eq!(vec.get(2), Some("baz"));
211 /// assert_eq!(vec.get(3), Some("quux"));
212 /// assert_eq!(vec.get(4), None);
213 /// # Ok::<(), ZeroVecError>(())
214 /// ```
215 pub fn get(&self, idx: usize) -> Option<&T> {
216 self.as_components().get(idx)
217 }
218
219 /// Get one of this slice's elements
220 ///
221 /// # Safety
222 ///
223 /// `index` must be in range
224 ///
225 /// # Example
226 ///
227 /// ```rust
228 /// # use std::str::Utf8Error;
229 /// # use zerovec::ule::ZeroVecError;
230 /// # use zerovec::VarZeroVec;
231 ///
232 /// let strings = vec!["foo", "bar", "baz", "quux"];
233 /// let vec = VarZeroVec::<str>::from(&strings);
234 ///
235 /// let mut iter_results: Vec<&str> = vec.iter().collect();
236 /// unsafe {
237 /// assert_eq!(vec.get_unchecked(0), "foo");
238 /// assert_eq!(vec.get_unchecked(1), "bar");
239 /// assert_eq!(vec.get_unchecked(2), "baz");
240 /// assert_eq!(vec.get_unchecked(3), "quux");
241 /// }
242 /// # Ok::<(), ZeroVecError>(())
243 /// ```
244 pub unsafe fn get_unchecked(&self, idx: usize) -> &T {
245 self.as_components().get_unchecked(idx)
246 }
247
248 /// Obtain an owned `Vec<Box<T>>` out of this
249 pub fn to_vec(&self) -> Vec<Box<T>> {
250 self.as_components().to_vec()
251 }
252
253 /// Get a reference to the entire encoded backing buffer of this slice
254 ///
255 /// The bytes can be passed back to [`Self::parse_byte_slice()`].
256 ///
257 /// To take the bytes as a vector, see [`VarZeroVec::into_bytes()`].
258 ///
259 /// # Example
260 ///
261 /// ```rust
262 /// # use std::str::Utf8Error;
263 /// # use zerovec::ule::ZeroVecError;
264 /// # use zerovec::VarZeroVec;
265 ///
266 /// let strings = vec!["foo", "bar", "baz"];
267 /// let vzv = VarZeroVec::<str>::from(&strings);
268 ///
269 /// assert_eq!(vzv, VarZeroVec::parse_byte_slice(vzv.as_bytes()).unwrap());
270 ///
271 /// # Ok::<(), ZeroVecError>(())
272 /// ```
273 #[inline]
274 pub const fn as_bytes(&self) -> &[u8] {
275 &self.entire_slice
276 }
277
278 /// Get this [`VarZeroSlice`] as a borrowed [`VarZeroVec`]
279 ///
280 /// If you wish to repeatedly call methods on this [`VarZeroSlice`],
281 /// it is more efficient to perform this conversion first
282 pub const fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T, F> {
283 VarZeroVec::Borrowed(self)
284 }
285
286 /// Parse a VarZeroSlice from a slice of the appropriate format
287 ///
288 /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]
289 pub fn parse_byte_slice<'a>(slice: &'a [u8]) -> Result<&'a Self, ZeroVecError> {
290 <Self as VarULE>::parse_byte_slice(slice)
291 }
292
293 /// Convert a `bytes` array known to represent a `VarZeroSlice` to a mutable reference to a `VarZeroSlice`
294 ///
295 /// # Safety
296 /// - `bytes` must be a valid sequence of bytes for this VarZeroVec
297 pub(crate) unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut Self {
298 // self is really just a wrapper around a byte slice
299 mem::transmute(bytes)
300 }
301
302 pub(crate) unsafe fn get_bytes_at_mut(&mut self, idx: usize) -> &mut [u8] {
303 let range = self.as_components().get_range(idx);
304 #[allow(clippy::indexing_slicing)] // get_range() is known to return in-bounds ranges
305 &mut self.entire_slice[range]
306 }
307}
308
309impl<T, F> VarZeroSlice<T, F>
310where
311 T: VarULE,
312 T: ?Sized,
313 T: Ord,
314 F: VarZeroVecFormat,
315{
316 /// Binary searches a sorted `VarZeroVec<T>` for the given element. For more information, see
317 /// the standard library function [`binary_search`].
318 ///
319 /// # Example
320 ///
321 /// ```
322 /// # use std::str::Utf8Error;
323 /// # use zerovec::ule::ZeroVecError;
324 /// # use zerovec::VarZeroVec;
325 ///
326 /// let strings = vec!["a", "b", "f", "g"];
327 /// let vec = VarZeroVec::<str>::from(&strings);
328 ///
329 /// assert_eq!(vec.binary_search("f"), Ok(2));
330 /// assert_eq!(vec.binary_search("e"), Err(2));
331 /// # Ok::<(), ZeroVecError>(())
332 /// ```
333 ///
334 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
335 #[inline]
336 pub fn binary_search(&self, x: &T) -> Result<usize, usize> {
337 self.as_components().binary_search(x)
338 }
339
340 /// Binary searches a `VarZeroVec<T>` for the given element within a certain sorted range.
341 ///
342 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
343 /// to the behavior of the standard library function [`binary_search`].
344 ///
345 /// The index is returned relative to the start of the range.
346 ///
347 /// # Example
348 ///
349 /// ```
350 /// # use std::str::Utf8Error;
351 /// # use zerovec::ule::ZeroVecError;
352 /// # use zerovec::VarZeroVec;
353 ///
354 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
355 /// let vec = VarZeroVec::<str>::from(&strings);
356 ///
357 /// // Same behavior as binary_search when the range covers the whole slice:
358 /// assert_eq!(vec.binary_search_in_range("g", 0..7), Some(Ok(3)));
359 /// assert_eq!(vec.binary_search_in_range("h", 0..7), Some(Err(4)));
360 ///
361 /// // Will not look outside of the range:
362 /// assert_eq!(vec.binary_search_in_range("g", 0..1), Some(Err(1)));
363 /// assert_eq!(vec.binary_search_in_range("g", 6..7), Some(Err(0)));
364 ///
365 /// // Will return indices relative to the start of the range:
366 /// assert_eq!(vec.binary_search_in_range("g", 1..6), Some(Ok(2)));
367 /// assert_eq!(vec.binary_search_in_range("h", 1..6), Some(Err(3)));
368 ///
369 /// // Will return `None` if the range is out of bounds:
370 /// assert_eq!(vec.binary_search_in_range("x", 100..200), None);
371 /// assert_eq!(vec.binary_search_in_range("x", 0..200), None);
372 /// # Ok::<(), ZeroVecError>(())
373 /// ```
374 ///
375 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
376 #[inline]
377 pub fn binary_search_in_range(
378 &self,
379 x: &T,
380 range: Range<usize>,
381 ) -> Option<Result<usize, usize>> {
382 self.as_components().binary_search_in_range(x, range)
383 }
384}
385
386impl<T, F> VarZeroSlice<T, F>
387where
388 T: VarULE,
389 T: ?Sized,
390 F: VarZeroVecFormat,
391{
392 /// Binary searches a sorted `VarZeroVec<T>` for the given predicate. For more information, see
393 /// the standard library function [`binary_search_by`].
394 ///
395 /// # Example
396 ///
397 /// ```
398 /// # use std::str::Utf8Error;
399 /// # use zerovec::ule::ZeroVecError;
400 /// # use zerovec::VarZeroVec;
401 ///
402 /// let strings = vec!["a", "b", "f", "g"];
403 /// let vec = VarZeroVec::<str>::from(&strings);
404 ///
405 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("f")), Ok(2));
406 /// assert_eq!(vec.binary_search_by(|probe| probe.cmp("e")), Err(2));
407 /// # Ok::<(), ZeroVecError>(())
408 /// ```
409 ///
410 /// [`binary_search_by`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search_by
411 #[inline]
412 pub fn binary_search_by(&self, predicate: impl FnMut(&T) -> Ordering) -> Result<usize, usize> {
413 self.as_components().binary_search_by(predicate)
414 }
415
416 /// Binary searches a `VarZeroVec<T>` for the given predicate within a certain sorted range.
417 ///
418 /// If the range is out of bounds, returns `None`. Otherwise, returns a `Result` according
419 /// to the behavior of the standard library function [`binary_search`].
420 ///
421 /// The index is returned relative to the start of the range.
422 ///
423 /// # Example
424 ///
425 /// ```
426 /// # use std::str::Utf8Error;
427 /// # use zerovec::ule::ZeroVecError;
428 /// # use zerovec::VarZeroVec;
429 ///
430 /// let strings = vec!["a", "b", "f", "g", "m", "n", "q"];
431 /// let vec = VarZeroVec::<str>::from(&strings);
432 ///
433 /// // Same behavior as binary_search when the range covers the whole slice:
434 /// assert_eq!(
435 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..7),
436 /// Some(Ok(3))
437 /// );
438 /// assert_eq!(
439 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 0..7),
440 /// Some(Err(4))
441 /// );
442 ///
443 /// // Will not look outside of the range:
444 /// assert_eq!(
445 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 0..1),
446 /// Some(Err(1))
447 /// );
448 /// assert_eq!(
449 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 6..7),
450 /// Some(Err(0))
451 /// );
452 ///
453 /// // Will return indices relative to the start of the range:
454 /// assert_eq!(
455 /// vec.binary_search_in_range_by(|v| v.cmp("g"), 1..6),
456 /// Some(Ok(2))
457 /// );
458 /// assert_eq!(
459 /// vec.binary_search_in_range_by(|v| v.cmp("h"), 1..6),
460 /// Some(Err(3))
461 /// );
462 ///
463 /// // Will return `None` if the range is out of bounds:
464 /// assert_eq!(
465 /// vec.binary_search_in_range_by(|v| v.cmp("x"), 100..200),
466 /// None
467 /// );
468 /// assert_eq!(vec.binary_search_in_range_by(|v| v.cmp("x"), 0..200), None);
469 /// # Ok::<(), ZeroVecError>(())
470 /// ```
471 ///
472 /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search
473 pub fn binary_search_in_range_by(
474 &self,
475 predicate: impl FnMut(&T) -> Ordering,
476 range: Range<usize>,
477 ) -> Option<Result<usize, usize>> {
478 self.as_components()
479 .binary_search_in_range_by(predicate, range)
480 }
481}
482// Safety (based on the safety checklist on the VarULE trait):
483// 1. VarZeroSlice does not include any uninitialized or padding bytes (achieved by `#[repr(transparent)]` on a
484// `[u8]` slice which satisfies this invariant)
485// 2. VarZeroSlice is aligned to 1 byte (achieved by `#[repr(transparent)]` on a
486// `[u8]` slice which satisfies this invariant)
487// 3. The impl of `validate_byte_slice()` returns an error if any byte is not valid.
488// 4. The impl of `validate_byte_slice()` returns an error if the slice cannot be used in its entirety
489// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data.
490// 6. `as_byte_slice()` is equivalent to a regular transmute of the underlying data
491// 7. VarZeroSlice byte equality is semantic equality (relying on the guideline of the underlying VarULE type)
492unsafe impl<T: VarULE + ?Sized + 'static, F: VarZeroVecFormat> VarULE for VarZeroSlice<T, F> {
493 fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
494 let _: VarZeroVecComponents<T, F> = VarZeroVecComponents::parse_byte_slice(bytes)?;
495 Ok(())
496 }
497
498 unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self {
499 // self is really just a wrapper around a byte slice
500 mem::transmute(src:bytes)
501 }
502
503 fn as_byte_slice(&self) -> &[u8] {
504 &self.entire_slice
505 }
506}
507
508impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Index<usize> for VarZeroSlice<T, F> {
509 type Output = T;
510 fn index(&self, index: usize) -> &Self::Output {
511 #[allow(clippy::panic)] // documented
512 match self.get(idx:index) {
513 Some(x: &T) => x,
514 None => panic!(
515 "index out of bounds: the len is {} but the index is {index}",
516 self.len()
517 ),
518 }
519 }
520}
521
522impl<T, F> PartialEq<VarZeroSlice<T, F>> for VarZeroSlice<T, F>
523where
524 T: VarULE,
525 T: ?Sized,
526 T: PartialEq,
527 F: VarZeroVecFormat,
528{
529 #[inline]
530 fn eq(&self, other: &VarZeroSlice<T, F>) -> bool {
531 // VarULE has an API guarantee that this is equivalent
532 // to `T::VarULE::eq()`
533 self.entire_slice.eq(&other.entire_slice)
534 }
535}
536
537impl<T, F> Eq for VarZeroSlice<T, F>
538where
539 T: VarULE,
540 T: ?Sized,
541 T: Eq,
542 F: VarZeroVecFormat,
543{
544}
545
546impl<T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroSlice<T, F> {
547 #[inline]
548 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
549 self.iter().partial_cmp(other.iter())
550 }
551}
552
553impl<T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroSlice<T, F> {
554 #[inline]
555 fn cmp(&self, other: &Self) -> Ordering {
556 self.iter().cmp(other.iter())
557 }
558}
559
560impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroSlice<T, F>
561where
562 T: fmt::Debug,
563{
564 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
565 f.debug_list().entries(self.iter()).finish()
566 }
567}
568
569impl<T: ?Sized, F: VarZeroVecFormat> AsRef<VarZeroSlice<T, F>> for VarZeroSlice<T, F> {
570 fn as_ref(&self) -> &VarZeroSlice<T, F> {
571 self
572 }
573}
574