1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::ule::*; |
6 | |
7 | use alloc::vec::Vec; |
8 | use core::cmp::{Ord, Ordering, PartialOrd}; |
9 | use core::fmt; |
10 | use core::ops::Deref; |
11 | |
12 | use super::*; |
13 | |
14 | /// A zero-copy, byte-aligned vector for variable-width types. |
15 | /// |
16 | /// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is |
17 | /// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and |
18 | /// where `T`'s data is variable-length (e.g. `String`) |
19 | /// |
20 | /// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more |
21 | /// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>` |
22 | /// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate |
23 | /// a dynamically-sized [`VarULE`] type and conversions to and from a custom type. |
24 | /// |
25 | /// For example, here are some owned types and their zero-copy equivalents: |
26 | /// |
27 | /// - `Vec<String>`: `VarZeroVec<'a, str>` |
28 | /// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>` |
29 | /// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>` |
30 | /// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>` |
31 | /// |
32 | /// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice). |
33 | /// |
34 | /// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec). |
35 | /// |
36 | /// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from |
37 | /// owned data (and then mutated!) but can also borrow from some buffer. |
38 | /// |
39 | /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the |
40 | /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. |
41 | /// |
42 | /// # Bytes and Equality |
43 | /// |
44 | /// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait |
45 | /// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format |
46 | /// across major SemVer releases. |
47 | /// |
48 | /// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use |
49 | /// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a |
50 | /// [`VarZeroVec<T>`] or vice-versa. |
51 | /// |
52 | /// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still |
53 | /// under consideration, with different options along the space-time spectrum. See |
54 | /// [#1410](https://github.com/unicode-org/icu4x/issues/1410). |
55 | /// |
56 | /// # Example |
57 | /// |
58 | /// ```rust |
59 | /// # use zerovec::ule::ZeroVecError; |
60 | /// use zerovec::VarZeroVec; |
61 | /// |
62 | /// // The little-endian bytes correspond to the list of strings. |
63 | /// let strings = vec!["w" , "ω" , "文" , "𑄃" ]; |
64 | /// |
65 | /// #[derive(serde::Serialize, serde::Deserialize)] |
66 | /// struct Data<'a> { |
67 | /// #[serde(borrow)] |
68 | /// strings: VarZeroVec<'a, str>, |
69 | /// } |
70 | /// |
71 | /// let data = Data { |
72 | /// strings: VarZeroVec::from(&strings), |
73 | /// }; |
74 | /// |
75 | /// let bincode_bytes = |
76 | /// bincode::serialize(&data).expect("Serialization should be successful" ); |
77 | /// |
78 | /// // Will deserialize without allocations |
79 | /// let deserialized: Data = bincode::deserialize(&bincode_bytes) |
80 | /// .expect("Deserialization should be successful" ); |
81 | /// |
82 | /// assert_eq!(deserialized.strings.get(2), Some("文" )); |
83 | /// assert_eq!(deserialized.strings, &*strings); |
84 | /// # Ok::<(), ZeroVecError>(()) |
85 | /// ``` |
86 | /// |
87 | /// Here's another example with `ZeroSlice<T>` (similar to `[T]`): |
88 | /// |
89 | /// ```rust |
90 | /// # use zerovec::ule::ZeroVecError; |
91 | /// use zerovec::VarZeroVec; |
92 | /// use zerovec::ZeroSlice; |
93 | /// |
94 | /// // The structured list correspond to the list of integers. |
95 | /// let numbers: &[&[u32]] = &[ |
96 | /// &[12, 25, 38], |
97 | /// &[39179, 100], |
98 | /// &[42, 55555], |
99 | /// &[12345, 54321, 9], |
100 | /// ]; |
101 | /// |
102 | /// #[derive(serde::Serialize, serde::Deserialize)] |
103 | /// struct Data<'a> { |
104 | /// #[serde(borrow)] |
105 | /// vecs: VarZeroVec<'a, ZeroSlice<u32>>, |
106 | /// } |
107 | /// |
108 | /// let data = Data { |
109 | /// vecs: VarZeroVec::from(numbers), |
110 | /// }; |
111 | /// |
112 | /// let bincode_bytes = |
113 | /// bincode::serialize(&data).expect("Serialization should be successful" ); |
114 | /// |
115 | /// let deserialized: Data = bincode::deserialize(&bincode_bytes) |
116 | /// .expect("Deserialization should be successful" ); |
117 | /// |
118 | /// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25); |
119 | /// assert_eq!(deserialized.vecs[1], *numbers[1]); |
120 | /// |
121 | /// # Ok::<(), ZeroVecError>(()) |
122 | /// ``` |
123 | /// |
124 | /// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`] |
125 | /// for more information. |
126 | /// |
127 | /// # How it Works |
128 | /// |
129 | /// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will |
130 | /// serialize to a specially formatted list of bytes. The format is: |
131 | /// |
132 | /// - 4 bytes for `length` (interpreted as a little-endian u32) |
133 | /// - `4 * length` bytes of `indices` (interpreted as little-endian u32) |
134 | /// - Remaining bytes for actual `data` |
135 | /// |
136 | /// Each element in the `indices` array points to the starting index of its corresponding |
137 | /// data part in the `data` list. The ending index can be calculated from the starting index |
138 | /// of the next element (or the length of the slice if dealing with the last element). |
139 | /// |
140 | /// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. |
141 | /// |
142 | /// [`ule`]: crate::ule |
143 | #[non_exhaustive ] |
144 | pub enum VarZeroVec<'a, T: ?Sized, F = Index16> { |
145 | /// An allocated VarZeroVec, allowing for mutations. |
146 | /// |
147 | /// # Examples |
148 | /// |
149 | /// ``` |
150 | /// use zerovec::VarZeroVec; |
151 | /// |
152 | /// let mut vzv = VarZeroVec::<str>::default(); |
153 | /// vzv.make_mut().push("foo" ); |
154 | /// vzv.make_mut().push("bar" ); |
155 | /// assert!(matches!(vzv, VarZeroVec::Owned(_))); |
156 | /// ``` |
157 | Owned(VarZeroVecOwned<T, F>), |
158 | /// A borrowed VarZeroVec, requiring no allocations. |
159 | /// |
160 | /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned. |
161 | /// |
162 | /// # Examples |
163 | /// |
164 | /// ``` |
165 | /// use zerovec::VarZeroVec; |
166 | /// |
167 | /// let bytes = &[ |
168 | /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, |
169 | /// 145, 132, 131, |
170 | /// ]; |
171 | /// |
172 | /// let vzv: VarZeroVec<str> = VarZeroVec::parse_byte_slice(bytes).unwrap(); |
173 | /// assert!(matches!(vzv, VarZeroVec::Borrowed(_))); |
174 | /// ``` |
175 | Borrowed(&'a VarZeroSlice<T, F>), |
176 | } |
177 | |
178 | impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> { |
179 | fn clone(&self) -> Self { |
180 | match *self { |
181 | VarZeroVec::Owned(ref o: &VarZeroVecOwned) => o.clone().into(), |
182 | VarZeroVec::Borrowed(b: &VarZeroSlice) => b.into(), |
183 | } |
184 | } |
185 | } |
186 | |
187 | impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F> |
188 | where |
189 | T: fmt::Debug, |
190 | { |
191 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
192 | VarZeroSlice::fmt(self, f) |
193 | } |
194 | } |
195 | |
196 | impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> { |
197 | #[inline ] |
198 | fn from(other: VarZeroVecOwned<T, F>) -> Self { |
199 | VarZeroVec::Owned(other) |
200 | } |
201 | } |
202 | |
203 | impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> { |
204 | fn from(other: &'a VarZeroSlice<T, F>) -> Self { |
205 | VarZeroVec::Borrowed(other) |
206 | } |
207 | } |
208 | |
209 | impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>> |
210 | for VarZeroVecOwned<T, F> |
211 | { |
212 | #[inline ] |
213 | fn from(other: VarZeroVec<'a, T, F>) -> Self { |
214 | match other { |
215 | VarZeroVec::Owned(o: VarZeroVecOwned) => o, |
216 | VarZeroVec::Borrowed(b: &VarZeroSlice) => b.into(), |
217 | } |
218 | } |
219 | } |
220 | |
221 | impl<T: VarULE + ?Sized> Default for VarZeroVec<'_, T> { |
222 | #[inline ] |
223 | fn default() -> Self { |
224 | Self::new() |
225 | } |
226 | } |
227 | |
228 | impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> { |
229 | type Target = VarZeroSlice<T, F>; |
230 | fn deref(&self) -> &VarZeroSlice<T, F> { |
231 | self.as_slice() |
232 | } |
233 | } |
234 | |
235 | impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> { |
236 | /// Creates a new, empty `VarZeroVec<T>`. |
237 | /// |
238 | /// # Examples |
239 | /// |
240 | /// ``` |
241 | /// use zerovec::VarZeroVec; |
242 | /// |
243 | /// let vzv: VarZeroVec<str> = VarZeroVec::new(); |
244 | /// assert!(vzv.is_empty()); |
245 | /// ``` |
246 | #[inline ] |
247 | pub const fn new() -> Self { |
248 | Self::Borrowed(VarZeroSlice::new_empty()) |
249 | } |
250 | |
251 | /// Parse a VarZeroVec from a slice of the appropriate format |
252 | /// |
253 | /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]. |
254 | /// |
255 | /// # Example |
256 | /// |
257 | /// ```rust |
258 | /// # use zerovec::ule::ZeroVecError; |
259 | /// # use zerovec::VarZeroVec; |
260 | /// |
261 | /// let strings = vec!["foo" , "bar" , "baz" , "quux" ]; |
262 | /// let vec = VarZeroVec::<str>::from(&strings); |
263 | /// |
264 | /// assert_eq!(&vec[0], "foo" ); |
265 | /// assert_eq!(&vec[1], "bar" ); |
266 | /// assert_eq!(&vec[2], "baz" ); |
267 | /// assert_eq!(&vec[3], "quux" ); |
268 | /// # Ok::<(), ZeroVecError>(()) |
269 | /// ``` |
270 | pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> { |
271 | let borrowed = VarZeroSlice::<T, F>::parse_byte_slice(slice)?; |
272 | |
273 | Ok(VarZeroVec::Borrowed(borrowed)) |
274 | } |
275 | |
276 | /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification. |
277 | /// |
278 | /// # Safety |
279 | /// |
280 | /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. |
281 | pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { |
282 | Self::Borrowed(core::mem::transmute::<&[u8], &VarZeroSlice<T, F>>(bytes)) |
283 | } |
284 | |
285 | /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. |
286 | /// |
287 | /// |
288 | /// # Example |
289 | /// |
290 | /// ```rust,ignore |
291 | /// # use zerovec::ule::ZeroVecError; |
292 | /// # use zerovec::VarZeroVec; |
293 | /// |
294 | /// let strings = vec!["foo" , "bar" , "baz" , "quux" ]; |
295 | /// let mut vec = VarZeroVec::<str>::from(&strings); |
296 | /// |
297 | /// assert_eq!(vec.len(), 4); |
298 | /// let mutvec = vec.make_mut(); |
299 | /// mutvec.push("lorem ipsum" .into()); |
300 | /// mutvec[2] = "dolor sit" .into(); |
301 | /// assert_eq!(&vec[0], "foo" ); |
302 | /// assert_eq!(&vec[1], "bar" ); |
303 | /// assert_eq!(&vec[2], "dolor sit" ); |
304 | /// assert_eq!(&vec[3], "quux" ); |
305 | /// assert_eq!(&vec[4], "lorem ipsum" ); |
306 | /// # Ok::<(), ZeroVecError>(()) |
307 | /// ``` |
308 | // |
309 | // This function is crate-public for now since we don't yet want to stabilize |
310 | // the internal implementation details |
311 | pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> { |
312 | match self { |
313 | VarZeroVec::Owned(ref mut vec) => vec, |
314 | VarZeroVec::Borrowed(slice) => { |
315 | let new_self = VarZeroVecOwned::from_slice(slice); |
316 | *self = new_self.into(); |
317 | // recursion is limited since we are guaranteed to hit the Owned branch |
318 | self.make_mut() |
319 | } |
320 | } |
321 | } |
322 | |
323 | /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. |
324 | /// |
325 | /// # Example |
326 | /// |
327 | /// ``` |
328 | /// # use zerovec::ule::ZeroVecError; |
329 | /// # use zerovec::VarZeroVec; |
330 | /// |
331 | /// let strings = vec!["foo" , "bar" , "baz" , "quux" ]; |
332 | /// let vec = VarZeroVec::<str>::from(&strings); |
333 | /// |
334 | /// assert_eq!(vec.len(), 4); |
335 | /// // has 'static lifetime |
336 | /// let owned = vec.into_owned(); |
337 | /// # Ok::<(), ZeroVecError>(()) |
338 | /// ``` |
339 | pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> { |
340 | self.make_mut(); |
341 | match self { |
342 | VarZeroVec::Owned(vec) => vec.into(), |
343 | _ => unreachable!(), |
344 | } |
345 | } |
346 | |
347 | /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] |
348 | pub fn as_slice(&self) -> &VarZeroSlice<T, F> { |
349 | match *self { |
350 | VarZeroVec::Owned(ref owned) => owned, |
351 | VarZeroVec::Borrowed(b) => b, |
352 | } |
353 | } |
354 | |
355 | /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed, |
356 | /// this function allocates a byte vector and copies the borrowed bytes into it. |
357 | /// |
358 | /// The bytes can be passed back to [`Self::parse_byte_slice()`]. |
359 | /// |
360 | /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`]. |
361 | /// |
362 | /// # Example |
363 | /// |
364 | /// ```rust |
365 | /// # use zerovec::ule::ZeroVecError; |
366 | /// # use zerovec::VarZeroVec; |
367 | /// |
368 | /// let strings = vec!["foo" , "bar" , "baz" ]; |
369 | /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes(); |
370 | /// |
371 | /// let mut borrowed: VarZeroVec<str> = VarZeroVec::parse_byte_slice(&bytes)?; |
372 | /// assert_eq!(borrowed, &*strings); |
373 | /// |
374 | /// # Ok::<(), ZeroVecError>(()) |
375 | /// ``` |
376 | pub fn into_bytes(self) -> Vec<u8> { |
377 | match self { |
378 | VarZeroVec::Owned(vec) => vec.into_bytes(), |
379 | VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(), |
380 | } |
381 | } |
382 | |
383 | /// Return whether the [`VarZeroVec`] is operating on owned or borrowed |
384 | /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can |
385 | /// be used to force it into an owned type |
386 | pub fn is_owned(&self) -> bool { |
387 | match self { |
388 | VarZeroVec::Owned(..) => true, |
389 | VarZeroVec::Borrowed(..) => false, |
390 | } |
391 | } |
392 | |
393 | #[cfg (feature = "bench" )] |
394 | #[doc (hidden)] |
395 | pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> { |
396 | self.as_slice().as_components() |
397 | } |
398 | } |
399 | |
400 | impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F> |
401 | where |
402 | T: VarULE + ?Sized, |
403 | A: EncodeAsVarULE<T>, |
404 | F: VarZeroVecFormat, |
405 | { |
406 | #[inline ] |
407 | fn from(elements: &Vec<A>) -> Self { |
408 | Self::from(elements.as_slice()) |
409 | } |
410 | } |
411 | |
412 | impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F> |
413 | where |
414 | T: VarULE + ?Sized, |
415 | A: EncodeAsVarULE<T>, |
416 | F: VarZeroVecFormat, |
417 | { |
418 | #[inline ] |
419 | fn from(elements: &[A]) -> Self { |
420 | if elements.is_empty() { |
421 | VarZeroSlice::new_empty().into() |
422 | } else { |
423 | #[allow (clippy::unwrap_used)] // TODO(#1410) Better story for fallibility |
424 | VarZeroVecOwned::try_from_elements(elements).unwrap().into() |
425 | } |
426 | } |
427 | } |
428 | |
429 | impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F> |
430 | where |
431 | T: VarULE + ?Sized, |
432 | A: EncodeAsVarULE<T>, |
433 | F: VarZeroVecFormat, |
434 | { |
435 | #[inline ] |
436 | fn from(elements: &[A; N]) -> Self { |
437 | Self::from(elements.as_slice()) |
438 | } |
439 | } |
440 | |
441 | impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F> |
442 | where |
443 | T: VarULE, |
444 | T: ?Sized, |
445 | T: PartialEq, |
446 | F: VarZeroVecFormat, |
447 | { |
448 | #[inline ] |
449 | fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool { |
450 | // VZV::from_elements used to produce a non-canonical representation of the |
451 | // empty VZV, so we cannot use byte equality for empty vecs. |
452 | if self.is_empty() || other.is_empty() { |
453 | return self.is_empty() && other.is_empty(); |
454 | } |
455 | // VarULE has an API guarantee that byte equality is semantic equality. |
456 | // For non-empty VZVs, there's only a single metadata representation, |
457 | // so this guarantee extends to the whole VZV representation. |
458 | self.as_bytes().eq(other.as_bytes()) |
459 | } |
460 | } |
461 | |
462 | impl<'a, T, F> Eq for VarZeroVec<'a, T, F> |
463 | where |
464 | T: VarULE, |
465 | T: ?Sized, |
466 | T: Eq, |
467 | F: VarZeroVecFormat, |
468 | { |
469 | } |
470 | |
471 | impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F> |
472 | where |
473 | T: VarULE + ?Sized, |
474 | T: PartialEq, |
475 | A: AsRef<T>, |
476 | F: VarZeroVecFormat, |
477 | { |
478 | #[inline ] |
479 | fn eq(&self, other: &&[A]) -> bool { |
480 | self.iter().eq(other.iter().map(|t: &A| t.as_ref())) |
481 | } |
482 | } |
483 | |
484 | impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F> |
485 | where |
486 | T: VarULE + ?Sized, |
487 | T: PartialEq, |
488 | A: AsRef<T>, |
489 | F: VarZeroVecFormat, |
490 | { |
491 | #[inline ] |
492 | fn eq(&self, other: &[A; N]) -> bool { |
493 | self.iter().eq(other.iter().map(|t: &A| t.as_ref())) |
494 | } |
495 | } |
496 | |
497 | impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> { |
498 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
499 | self.iter().partial_cmp(other.iter()) |
500 | } |
501 | } |
502 | |
503 | impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> { |
504 | fn cmp(&self, other: &Self) -> Ordering { |
505 | self.iter().cmp(other.iter()) |
506 | } |
507 | } |
508 | |
509 | #[test ] |
510 | fn assert_single_empty_representation() { |
511 | assert_eq!( |
512 | VarZeroVec::<str>::new().as_bytes(), |
513 | VarZeroVec::<str>::from(&[] as &[&str]).as_bytes() |
514 | ); |
515 | } |
516 | |
517 | #[test ] |
518 | fn weird_empty_representation_equality() { |
519 | assert_eq!( |
520 | VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(), |
521 | VarZeroVec::<str>::parse_byte_slice(&[]).unwrap() |
522 | ); |
523 | } |
524 | |