1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::ule::*;
6
7use alloc::vec::Vec;
8use core::cmp::{Ord, Ordering, PartialOrd};
9use core::fmt;
10use core::ops::Deref;
11
12use super::*;
13
14/// A zero-copy, byte-aligned vector for variable-width types.
15///
16/// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is
17/// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and
18/// where `T`'s data is variable-length (e.g. `String`)
19///
20/// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more
21/// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>`
22/// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate
23/// a dynamically-sized [`VarULE`] type and conversions to and from a custom type.
24///
25/// For example, here are some owned types and their zero-copy equivalents:
26///
27/// - `Vec<String>`: `VarZeroVec<'a, str>`
28/// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>`
29/// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>`
30/// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>`
31///
32/// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice).
33///
34/// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec).
35///
36/// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from
37/// owned data (and then mutated!) but can also borrow from some buffer.
38///
39/// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the
40/// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`].
41///
42/// # Bytes and Equality
43///
44/// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait
45/// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format
46/// across major SemVer releases.
47///
48/// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use
49/// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a
50/// [`VarZeroVec<T>`] or vice-versa.
51///
52/// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still
53/// under consideration, with different options along the space-time spectrum. See
54/// [#1410](https://github.com/unicode-org/icu4x/issues/1410).
55///
56/// # Example
57///
58/// ```rust
59/// # use std::str::Utf8Error;
60/// # use zerovec::ule::ZeroVecError;
61/// use zerovec::VarZeroVec;
62///
63/// // The little-endian bytes correspond to the list of strings.
64/// let strings = vec!["w", "ω", "文", "𑄃"];
65///
66/// #[derive(serde::Serialize, serde::Deserialize)]
67/// struct Data<'a> {
68/// #[serde(borrow)]
69/// strings: VarZeroVec<'a, str>,
70/// }
71///
72/// let data = Data {
73/// strings: VarZeroVec::from(&strings),
74/// };
75///
76/// let bincode_bytes =
77/// bincode::serialize(&data).expect("Serialization should be successful");
78///
79/// // Will deserialize without allocations
80/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
81/// .expect("Deserialization should be successful");
82///
83/// assert_eq!(deserialized.strings.get(2), Some("文"));
84/// assert_eq!(deserialized.strings, &*strings);
85/// # Ok::<(), ZeroVecError>(())
86/// ```
87///
88/// Here's another example with `ZeroSlice<T>` (similar to `[T]`):
89///
90/// ```rust
91/// # use std::str::Utf8Error;
92/// # use zerovec::ule::ZeroVecError;
93/// use zerovec::ule::*;
94/// use zerovec::VarZeroVec;
95/// use zerovec::ZeroSlice;
96/// use zerovec::ZeroVec;
97///
98/// // The structured list correspond to the list of integers.
99/// let numbers: &[&[u32]] = &[
100/// &[12, 25, 38],
101/// &[39179, 100],
102/// &[42, 55555],
103/// &[12345, 54321, 9],
104/// ];
105///
106/// #[derive(serde::Serialize, serde::Deserialize)]
107/// struct Data<'a> {
108/// #[serde(borrow)]
109/// vecs: VarZeroVec<'a, ZeroSlice<u32>>,
110/// }
111///
112/// let data = Data {
113/// vecs: VarZeroVec::from(numbers),
114/// };
115///
116/// let bincode_bytes =
117/// bincode::serialize(&data).expect("Serialization should be successful");
118///
119/// let deserialized: Data = bincode::deserialize(&bincode_bytes)
120/// .expect("Deserialization should be successful");
121///
122/// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25);
123/// assert_eq!(deserialized.vecs[1], *numbers[1]);
124///
125/// # Ok::<(), ZeroVecError>(())
126/// ```
127///
128/// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`]
129/// for more information.
130///
131/// # How it Works
132///
133/// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will
134/// serialize to a specially formatted list of bytes. The format is:
135///
136/// - 4 bytes for `length` (interpreted as a little-endian u32)
137/// - `4 * length` bytes of `indices` (interpreted as little-endian u32)
138/// - Remaining bytes for actual `data`
139///
140/// Each element in the `indices` array points to the starting index of its corresponding
141/// data part in the `data` list. The ending index can be calculated from the starting index
142/// of the next element (or the length of the slice if dealing with the last element).
143///
144/// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details.
145///
146/// [`ule`]: crate::ule
147#[non_exhaustive]
148pub enum VarZeroVec<'a, T: ?Sized, F = Index16> {
149 /// An allocated VarZeroVec, allowing for mutations.
150 ///
151 /// # Examples
152 ///
153 /// ```
154 /// use zerovec::VarZeroVec;
155 ///
156 /// let mut vzv = VarZeroVec::<str>::default();
157 /// vzv.make_mut().push("foo");
158 /// vzv.make_mut().push("bar");
159 /// assert!(matches!(vzv, VarZeroVec::Owned(_)));
160 /// ```
161 Owned(VarZeroVecOwned<T, F>),
162 /// A borrowed VarZeroVec, requiring no allocations.
163 ///
164 /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned.
165 ///
166 /// # Examples
167 ///
168 /// ```
169 /// use zerovec::VarZeroVec;
170 ///
171 /// let bytes = &[
172 /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240,
173 /// 145, 132, 131,
174 /// ];
175 ///
176 /// let vzv: VarZeroVec<str> = VarZeroVec::parse_byte_slice(bytes).unwrap();
177 /// assert!(matches!(vzv, VarZeroVec::Borrowed(_)));
178 /// ```
179 Borrowed(&'a VarZeroSlice<T, F>),
180}
181
182impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> {
183 fn clone(&self) -> Self {
184 match *self {
185 VarZeroVec::Owned(ref o: &VarZeroVecOwned) => o.clone().into(),
186 VarZeroVec::Borrowed(b: &VarZeroSlice) => b.into(),
187 }
188 }
189}
190
191impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F>
192where
193 T: fmt::Debug,
194{
195 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196 VarZeroSlice::fmt(self, f)
197 }
198}
199
200impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> {
201 #[inline]
202 fn from(other: VarZeroVecOwned<T, F>) -> Self {
203 VarZeroVec::Owned(other)
204 }
205}
206
207impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> {
208 fn from(other: &'a VarZeroSlice<T, F>) -> Self {
209 VarZeroVec::Borrowed(other)
210 }
211}
212
213impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>>
214 for VarZeroVecOwned<T, F>
215{
216 #[inline]
217 fn from(other: VarZeroVec<'a, T, F>) -> Self {
218 match other {
219 VarZeroVec::Owned(o: VarZeroVecOwned) => o,
220 VarZeroVec::Borrowed(b: &VarZeroSlice) => b.into(),
221 }
222 }
223}
224
225impl<T: VarULE + ?Sized> Default for VarZeroVec<'_, T> {
226 #[inline]
227 fn default() -> Self {
228 Self::new()
229 }
230}
231
232impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> {
233 type Target = VarZeroSlice<T, F>;
234 fn deref(&self) -> &VarZeroSlice<T, F> {
235 self.as_slice()
236 }
237}
238
239impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> {
240 /// Creates a new, empty `VarZeroVec<T>`.
241 ///
242 /// # Examples
243 ///
244 /// ```
245 /// use zerovec::VarZeroVec;
246 ///
247 /// let vzv: VarZeroVec<str> = VarZeroVec::new();
248 /// assert!(vzv.is_empty());
249 /// ```
250 #[inline]
251 pub const fn new() -> Self {
252 Self::Borrowed(VarZeroSlice::new_empty())
253 }
254
255 /// Parse a VarZeroVec from a slice of the appropriate format
256 ///
257 /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`].
258 ///
259 /// # Example
260 ///
261 /// ```rust
262 /// # use std::str::Utf8Error;
263 /// # use zerovec::ule::ZeroVecError;
264 /// # use zerovec::VarZeroVec;
265 ///
266 /// let strings = vec!["foo", "bar", "baz", "quux"];
267 /// let vec = VarZeroVec::<str>::from(&strings);
268 ///
269 /// assert_eq!(&vec[0], "foo");
270 /// assert_eq!(&vec[1], "bar");
271 /// assert_eq!(&vec[2], "baz");
272 /// assert_eq!(&vec[3], "quux");
273 /// # Ok::<(), ZeroVecError>(())
274 /// ```
275 pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> {
276 let borrowed = VarZeroSlice::<T, F>::parse_byte_slice(slice)?;
277
278 Ok(VarZeroVec::Borrowed(borrowed))
279 }
280
281 /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification.
282 ///
283 /// # Safety
284 ///
285 /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`].
286 pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self {
287 Self::Borrowed(core::mem::transmute(bytes))
288 }
289
290 /// Convert this into a mutable vector of the owned `T` type, cloning if necessary.
291 ///
292 ///
293 /// # Example
294 ///
295 /// ```rust,ignore
296 /// # use std::str::Utf8Error;
297 /// # use zerovec::ule::ZeroVecError;
298 /// # use zerovec::VarZeroVec;
299 ///
300 /// let strings = vec!["foo", "bar", "baz", "quux"];
301 /// let mut vec = VarZeroVec::<str>::from(&strings);
302 ///
303 /// assert_eq!(vec.len(), 4);
304 /// let mutvec = vec.make_mut();
305 /// mutvec.push("lorem ipsum".into());
306 /// mutvec[2] = "dolor sit".into();
307 /// assert_eq!(&vec[0], "foo");
308 /// assert_eq!(&vec[1], "bar");
309 /// assert_eq!(&vec[2], "dolor sit");
310 /// assert_eq!(&vec[3], "quux");
311 /// assert_eq!(&vec[4], "lorem ipsum");
312 /// # Ok::<(), ZeroVecError>(())
313 /// ```
314 //
315 // This function is crate-public for now since we don't yet want to stabilize
316 // the internal implementation details
317 pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> {
318 match self {
319 VarZeroVec::Owned(ref mut vec) => vec,
320 VarZeroVec::Borrowed(slice) => {
321 let new_self = VarZeroVecOwned::from_slice(slice);
322 *self = new_self.into();
323 // recursion is limited since we are guaranteed to hit the Owned branch
324 self.make_mut()
325 }
326 }
327 }
328
329 /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned.
330 ///
331 /// # Example
332 ///
333 /// ```
334 /// # use std::str::Utf8Error;
335 /// # use zerovec::ule::ZeroVecError;
336 /// # use zerovec::VarZeroVec;
337 ///
338 /// let strings = vec!["foo", "bar", "baz", "quux"];
339 /// let vec = VarZeroVec::<str>::from(&strings);
340 ///
341 /// assert_eq!(vec.len(), 4);
342 /// // has 'static lifetime
343 /// let owned = vec.into_owned();
344 /// # Ok::<(), ZeroVecError>(())
345 /// ```
346 pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> {
347 self.make_mut();
348 match self {
349 VarZeroVec::Owned(vec) => vec.into(),
350 _ => unreachable!(),
351 }
352 }
353
354 /// Obtain this `VarZeroVec` as a [`VarZeroSlice`]
355 pub fn as_slice(&self) -> &VarZeroSlice<T, F> {
356 match *self {
357 VarZeroVec::Owned(ref owned) => owned,
358 VarZeroVec::Borrowed(b) => b,
359 }
360 }
361
362 /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed,
363 /// this function allocates a byte vector and copies the borrowed bytes into it.
364 ///
365 /// The bytes can be passed back to [`Self::parse_byte_slice()`].
366 ///
367 /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`].
368 ///
369 /// # Example
370 ///
371 /// ```rust
372 /// # use std::str::Utf8Error;
373 /// # use zerovec::ule::ZeroVecError;
374 /// # use zerovec::VarZeroVec;
375 ///
376 /// let strings = vec!["foo", "bar", "baz"];
377 /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes();
378 ///
379 /// let mut borrowed: VarZeroVec<str> = VarZeroVec::parse_byte_slice(&bytes)?;
380 /// assert_eq!(borrowed, &*strings);
381 ///
382 /// # Ok::<(), ZeroVecError>(())
383 /// ```
384 pub fn into_bytes(self) -> Vec<u8> {
385 match self {
386 VarZeroVec::Owned(vec) => vec.into_bytes(),
387 VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(),
388 }
389 }
390
391 /// Return whether the [`VarZeroVec`] is operating on owned or borrowed
392 /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can
393 /// be used to force it into an owned type
394 pub fn is_owned(&self) -> bool {
395 match self {
396 VarZeroVec::Owned(..) => true,
397 VarZeroVec::Borrowed(..) => false,
398 }
399 }
400
401 #[cfg(feature = "bench")]
402 #[doc(hidden)]
403 pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> {
404 self.as_slice().as_components()
405 }
406}
407
408impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F>
409where
410 T: VarULE + ?Sized,
411 A: EncodeAsVarULE<T>,
412 F: VarZeroVecFormat,
413{
414 #[inline]
415 fn from(elements: &Vec<A>) -> Self {
416 Self::from(elements.as_slice())
417 }
418}
419
420impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F>
421where
422 T: VarULE + ?Sized,
423 A: EncodeAsVarULE<T>,
424 F: VarZeroVecFormat,
425{
426 #[inline]
427 fn from(elements: &[A]) -> Self {
428 if elements.is_empty() {
429 VarZeroSlice::new_empty().into()
430 } else {
431 #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility
432 VarZeroVecOwned::try_from_elements(elements).unwrap().into()
433 }
434 }
435}
436
437impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F>
438where
439 T: VarULE + ?Sized,
440 A: EncodeAsVarULE<T>,
441 F: VarZeroVecFormat,
442{
443 #[inline]
444 fn from(elements: &[A; N]) -> Self {
445 Self::from(elements.as_slice())
446 }
447}
448
449impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F>
450where
451 T: VarULE,
452 T: ?Sized,
453 T: PartialEq,
454 F: VarZeroVecFormat,
455{
456 #[inline]
457 fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool {
458 // VZV::from_elements used to produce a non-canonical representation of the
459 // empty VZV, so we cannot use byte equality for empty vecs.
460 if self.is_empty() || other.is_empty() {
461 return self.is_empty() && other.is_empty();
462 }
463 // VarULE has an API guarantee that byte equality is semantic equality.
464 // For non-empty VZVs, there's only a single metadata representation,
465 // so this guarantee extends to the whole VZV representation.
466 self.as_bytes().eq(other.as_bytes())
467 }
468}
469
470impl<'a, T, F> Eq for VarZeroVec<'a, T, F>
471where
472 T: VarULE,
473 T: ?Sized,
474 T: Eq,
475 F: VarZeroVecFormat,
476{
477}
478
479impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F>
480where
481 T: VarULE + ?Sized,
482 T: PartialEq,
483 A: AsRef<T>,
484 F: VarZeroVecFormat,
485{
486 #[inline]
487 fn eq(&self, other: &&[A]) -> bool {
488 self.iter().eq(other.iter().map(|t: &A| t.as_ref()))
489 }
490}
491
492impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F>
493where
494 T: VarULE + ?Sized,
495 T: PartialEq,
496 A: AsRef<T>,
497 F: VarZeroVecFormat,
498{
499 #[inline]
500 fn eq(&self, other: &[A; N]) -> bool {
501 self.iter().eq(other.iter().map(|t: &A| t.as_ref()))
502 }
503}
504
505impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> {
506 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
507 self.iter().partial_cmp(other.iter())
508 }
509}
510
511impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> {
512 fn cmp(&self, other: &Self) -> Ordering {
513 self.iter().cmp(other.iter())
514 }
515}
516
517#[test]
518fn assert_single_empty_representation() {
519 assert_eq!(
520 VarZeroVec::<str>::new().as_bytes(),
521 VarZeroVec::<str>::from(&[] as &[&str]).as_bytes()
522 );
523}
524
525#[test]
526fn weird_empty_representation_equality() {
527 assert_eq!(
528 VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(),
529 VarZeroVec::<str>::parse_byte_slice(&[]).unwrap()
530 );
531}
532