1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::ule::*; |
6 | |
7 | use alloc::vec::Vec; |
8 | use core::cmp::{Ord, Ordering, PartialOrd}; |
9 | use core::fmt; |
10 | use core::ops::Deref; |
11 | |
12 | use super::*; |
13 | |
14 | /// A zero-copy, byte-aligned vector for variable-width types. |
15 | /// |
16 | /// `VarZeroVec<T>` is designed as a drop-in replacement for `Vec<T>` in situations where it is |
17 | /// desirable to borrow data from an unaligned byte slice, such as zero-copy deserialization, and |
18 | /// where `T`'s data is variable-length (e.g. `String`) |
19 | /// |
20 | /// `T` must implement [`VarULE`], which is already implemented for [`str`] and `[u8]`. For storing more |
21 | /// complicated series of elements, it is implemented on `ZeroSlice<T>` as well as `VarZeroSlice<T>` |
22 | /// for nesting. [`zerovec::make_varule`](crate::make_varule) may be used to generate |
23 | /// a dynamically-sized [`VarULE`] type and conversions to and from a custom type. |
24 | /// |
25 | /// For example, here are some owned types and their zero-copy equivalents: |
26 | /// |
27 | /// - `Vec<String>`: `VarZeroVec<'a, str>` |
28 | /// - `Vec<Vec<u8>>>`: `VarZeroVec<'a, [u8]>` |
29 | /// - `Vec<Vec<u32>>`: `VarZeroVec<'a, ZeroSlice<u32>>` |
30 | /// - `Vec<Vec<String>>`: `VarZeroVec<'a, VarZeroSlice<str>>` |
31 | /// |
32 | /// Most of the methods on `VarZeroVec<'a, T>` come from its [`Deref`] implementation to [`VarZeroSlice<T>`](VarZeroSlice). |
33 | /// |
34 | /// For creating zero-copy vectors of fixed-size types, see [`ZeroVec`](crate::ZeroVec). |
35 | /// |
36 | /// `VarZeroVec<T>` behaves much like [`Cow`](alloc::borrow::Cow), where it can be constructed from |
37 | /// owned data (and then mutated!) but can also borrow from some buffer. |
38 | /// |
39 | /// The `F` type parameter is a [`VarZeroVecFormat`] (see its docs for more details), which can be used to select the |
40 | /// precise format of the backing buffer with various size and performance tradeoffs. It defaults to [`Index16`]. |
41 | /// |
42 | /// # Bytes and Equality |
43 | /// |
44 | /// Two [`VarZeroVec`]s are equal if and only if their bytes are equal, as described in the trait |
45 | /// [`VarULE`]. However, we do not guarantee stability of byte equality or serialization format |
46 | /// across major SemVer releases. |
47 | /// |
48 | /// To compare a [`Vec<T>`] to a [`VarZeroVec<T>`], it is generally recommended to use |
49 | /// [`Iterator::eq`], since it is somewhat expensive at runtime to convert from a [`Vec<T>`] to a |
50 | /// [`VarZeroVec<T>`] or vice-versa. |
51 | /// |
52 | /// Prior to zerovec reaching 1.0, the precise byte representation of [`VarZeroVec`] is still |
53 | /// under consideration, with different options along the space-time spectrum. See |
54 | /// [#1410](https://github.com/unicode-org/icu4x/issues/1410). |
55 | /// |
56 | /// # Example |
57 | /// |
58 | /// ```rust |
59 | /// # use std::str::Utf8Error; |
60 | /// # use zerovec::ule::ZeroVecError; |
61 | /// use zerovec::VarZeroVec; |
62 | /// |
63 | /// // The little-endian bytes correspond to the list of strings. |
64 | /// let strings = vec!["w" , "ω" , "文" , "𑄃" ]; |
65 | /// |
66 | /// #[derive(serde::Serialize, serde::Deserialize)] |
67 | /// struct Data<'a> { |
68 | /// #[serde(borrow)] |
69 | /// strings: VarZeroVec<'a, str>, |
70 | /// } |
71 | /// |
72 | /// let data = Data { |
73 | /// strings: VarZeroVec::from(&strings), |
74 | /// }; |
75 | /// |
76 | /// let bincode_bytes = |
77 | /// bincode::serialize(&data).expect("Serialization should be successful" ); |
78 | /// |
79 | /// // Will deserialize without allocations |
80 | /// let deserialized: Data = bincode::deserialize(&bincode_bytes) |
81 | /// .expect("Deserialization should be successful" ); |
82 | /// |
83 | /// assert_eq!(deserialized.strings.get(2), Some("文" )); |
84 | /// assert_eq!(deserialized.strings, &*strings); |
85 | /// # Ok::<(), ZeroVecError>(()) |
86 | /// ``` |
87 | /// |
88 | /// Here's another example with `ZeroSlice<T>` (similar to `[T]`): |
89 | /// |
90 | /// ```rust |
91 | /// # use std::str::Utf8Error; |
92 | /// # use zerovec::ule::ZeroVecError; |
93 | /// use zerovec::ule::*; |
94 | /// use zerovec::VarZeroVec; |
95 | /// use zerovec::ZeroSlice; |
96 | /// use zerovec::ZeroVec; |
97 | /// |
98 | /// // The structured list correspond to the list of integers. |
99 | /// let numbers: &[&[u32]] = &[ |
100 | /// &[12, 25, 38], |
101 | /// &[39179, 100], |
102 | /// &[42, 55555], |
103 | /// &[12345, 54321, 9], |
104 | /// ]; |
105 | /// |
106 | /// #[derive(serde::Serialize, serde::Deserialize)] |
107 | /// struct Data<'a> { |
108 | /// #[serde(borrow)] |
109 | /// vecs: VarZeroVec<'a, ZeroSlice<u32>>, |
110 | /// } |
111 | /// |
112 | /// let data = Data { |
113 | /// vecs: VarZeroVec::from(numbers), |
114 | /// }; |
115 | /// |
116 | /// let bincode_bytes = |
117 | /// bincode::serialize(&data).expect("Serialization should be successful" ); |
118 | /// |
119 | /// let deserialized: Data = bincode::deserialize(&bincode_bytes) |
120 | /// .expect("Deserialization should be successful" ); |
121 | /// |
122 | /// assert_eq!(deserialized.vecs[0].get(1).unwrap(), 25); |
123 | /// assert_eq!(deserialized.vecs[1], *numbers[1]); |
124 | /// |
125 | /// # Ok::<(), ZeroVecError>(()) |
126 | /// ``` |
127 | /// |
128 | /// [`VarZeroVec`]s can be nested infinitely via a similar mechanism, see the docs of [`VarZeroSlice`] |
129 | /// for more information. |
130 | /// |
131 | /// # How it Works |
132 | /// |
133 | /// `VarZeroVec<T>`, when used with non-human-readable serializers (like `bincode`), will |
134 | /// serialize to a specially formatted list of bytes. The format is: |
135 | /// |
136 | /// - 4 bytes for `length` (interpreted as a little-endian u32) |
137 | /// - `4 * length` bytes of `indices` (interpreted as little-endian u32) |
138 | /// - Remaining bytes for actual `data` |
139 | /// |
140 | /// Each element in the `indices` array points to the starting index of its corresponding |
141 | /// data part in the `data` list. The ending index can be calculated from the starting index |
142 | /// of the next element (or the length of the slice if dealing with the last element). |
143 | /// |
144 | /// See [the design doc](https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md) for more details. |
145 | /// |
146 | /// [`ule`]: crate::ule |
147 | #[non_exhaustive ] |
148 | pub enum VarZeroVec<'a, T: ?Sized, F = Index16> { |
149 | /// An allocated VarZeroVec, allowing for mutations. |
150 | /// |
151 | /// # Examples |
152 | /// |
153 | /// ``` |
154 | /// use zerovec::VarZeroVec; |
155 | /// |
156 | /// let mut vzv = VarZeroVec::<str>::default(); |
157 | /// vzv.make_mut().push("foo" ); |
158 | /// vzv.make_mut().push("bar" ); |
159 | /// assert!(matches!(vzv, VarZeroVec::Owned(_))); |
160 | /// ``` |
161 | Owned(VarZeroVecOwned<T, F>), |
162 | /// A borrowed VarZeroVec, requiring no allocations. |
163 | /// |
164 | /// If a mutating operation is invoked on VarZeroVec, the Borrowed is converted to Owned. |
165 | /// |
166 | /// # Examples |
167 | /// |
168 | /// ``` |
169 | /// use zerovec::VarZeroVec; |
170 | /// |
171 | /// let bytes = &[ |
172 | /// 4, 0, 0, 0, 0, 0, 1, 0, 3, 0, 6, 0, 119, 207, 137, 230, 150, 135, 240, |
173 | /// 145, 132, 131, |
174 | /// ]; |
175 | /// |
176 | /// let vzv: VarZeroVec<str> = VarZeroVec::parse_byte_slice(bytes).unwrap(); |
177 | /// assert!(matches!(vzv, VarZeroVec::Borrowed(_))); |
178 | /// ``` |
179 | Borrowed(&'a VarZeroSlice<T, F>), |
180 | } |
181 | |
182 | impl<'a, T: ?Sized, F> Clone for VarZeroVec<'a, T, F> { |
183 | fn clone(&self) -> Self { |
184 | match *self { |
185 | VarZeroVec::Owned(ref o: &VarZeroVecOwned) => o.clone().into(), |
186 | VarZeroVec::Borrowed(b: &VarZeroSlice) => b.into(), |
187 | } |
188 | } |
189 | } |
190 | |
191 | impl<T: VarULE + ?Sized, F: VarZeroVecFormat> fmt::Debug for VarZeroVec<'_, T, F> |
192 | where |
193 | T: fmt::Debug, |
194 | { |
195 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
196 | VarZeroSlice::fmt(self, f) |
197 | } |
198 | } |
199 | |
200 | impl<'a, T: ?Sized, F> From<VarZeroVecOwned<T, F>> for VarZeroVec<'a, T, F> { |
201 | #[inline ] |
202 | fn from(other: VarZeroVecOwned<T, F>) -> Self { |
203 | VarZeroVec::Owned(other) |
204 | } |
205 | } |
206 | |
207 | impl<'a, T: ?Sized, F> From<&'a VarZeroSlice<T, F>> for VarZeroVec<'a, T, F> { |
208 | fn from(other: &'a VarZeroSlice<T, F>) -> Self { |
209 | VarZeroVec::Borrowed(other) |
210 | } |
211 | } |
212 | |
213 | impl<'a, T: ?Sized + VarULE, F: VarZeroVecFormat> From<VarZeroVec<'a, T, F>> |
214 | for VarZeroVecOwned<T, F> |
215 | { |
216 | #[inline ] |
217 | fn from(other: VarZeroVec<'a, T, F>) -> Self { |
218 | match other { |
219 | VarZeroVec::Owned(o: VarZeroVecOwned) => o, |
220 | VarZeroVec::Borrowed(b: &VarZeroSlice) => b.into(), |
221 | } |
222 | } |
223 | } |
224 | |
225 | impl<T: VarULE + ?Sized> Default for VarZeroVec<'_, T> { |
226 | #[inline ] |
227 | fn default() -> Self { |
228 | Self::new() |
229 | } |
230 | } |
231 | |
232 | impl<T: VarULE + ?Sized, F: VarZeroVecFormat> Deref for VarZeroVec<'_, T, F> { |
233 | type Target = VarZeroSlice<T, F>; |
234 | fn deref(&self) -> &VarZeroSlice<T, F> { |
235 | self.as_slice() |
236 | } |
237 | } |
238 | |
239 | impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVec<'a, T, F> { |
240 | /// Creates a new, empty `VarZeroVec<T>`. |
241 | /// |
242 | /// # Examples |
243 | /// |
244 | /// ``` |
245 | /// use zerovec::VarZeroVec; |
246 | /// |
247 | /// let vzv: VarZeroVec<str> = VarZeroVec::new(); |
248 | /// assert!(vzv.is_empty()); |
249 | /// ``` |
250 | #[inline ] |
251 | pub const fn new() -> Self { |
252 | Self::Borrowed(VarZeroSlice::new_empty()) |
253 | } |
254 | |
255 | /// Parse a VarZeroVec from a slice of the appropriate format |
256 | /// |
257 | /// Slices of the right format can be obtained via [`VarZeroSlice::as_bytes()`]. |
258 | /// |
259 | /// # Example |
260 | /// |
261 | /// ```rust |
262 | /// # use std::str::Utf8Error; |
263 | /// # use zerovec::ule::ZeroVecError; |
264 | /// # use zerovec::VarZeroVec; |
265 | /// |
266 | /// let strings = vec!["foo" , "bar" , "baz" , "quux" ]; |
267 | /// let vec = VarZeroVec::<str>::from(&strings); |
268 | /// |
269 | /// assert_eq!(&vec[0], "foo" ); |
270 | /// assert_eq!(&vec[1], "bar" ); |
271 | /// assert_eq!(&vec[2], "baz" ); |
272 | /// assert_eq!(&vec[3], "quux" ); |
273 | /// # Ok::<(), ZeroVecError>(()) |
274 | /// ``` |
275 | pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ZeroVecError> { |
276 | let borrowed = VarZeroSlice::<T, F>::parse_byte_slice(slice)?; |
277 | |
278 | Ok(VarZeroVec::Borrowed(borrowed)) |
279 | } |
280 | |
281 | /// Uses a `&[u8]` buffer as a `VarZeroVec<T>` without any verification. |
282 | /// |
283 | /// # Safety |
284 | /// |
285 | /// `bytes` need to be an output from [`VarZeroSlice::as_bytes()`]. |
286 | pub const unsafe fn from_bytes_unchecked(bytes: &'a [u8]) -> Self { |
287 | Self::Borrowed(core::mem::transmute(bytes)) |
288 | } |
289 | |
290 | /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. |
291 | /// |
292 | /// |
293 | /// # Example |
294 | /// |
295 | /// ```rust,ignore |
296 | /// # use std::str::Utf8Error; |
297 | /// # use zerovec::ule::ZeroVecError; |
298 | /// # use zerovec::VarZeroVec; |
299 | /// |
300 | /// let strings = vec!["foo" , "bar" , "baz" , "quux" ]; |
301 | /// let mut vec = VarZeroVec::<str>::from(&strings); |
302 | /// |
303 | /// assert_eq!(vec.len(), 4); |
304 | /// let mutvec = vec.make_mut(); |
305 | /// mutvec.push("lorem ipsum" .into()); |
306 | /// mutvec[2] = "dolor sit" .into(); |
307 | /// assert_eq!(&vec[0], "foo" ); |
308 | /// assert_eq!(&vec[1], "bar" ); |
309 | /// assert_eq!(&vec[2], "dolor sit" ); |
310 | /// assert_eq!(&vec[3], "quux" ); |
311 | /// assert_eq!(&vec[4], "lorem ipsum" ); |
312 | /// # Ok::<(), ZeroVecError>(()) |
313 | /// ``` |
314 | // |
315 | // This function is crate-public for now since we don't yet want to stabilize |
316 | // the internal implementation details |
317 | pub fn make_mut(&mut self) -> &mut VarZeroVecOwned<T, F> { |
318 | match self { |
319 | VarZeroVec::Owned(ref mut vec) => vec, |
320 | VarZeroVec::Borrowed(slice) => { |
321 | let new_self = VarZeroVecOwned::from_slice(slice); |
322 | *self = new_self.into(); |
323 | // recursion is limited since we are guaranteed to hit the Owned branch |
324 | self.make_mut() |
325 | } |
326 | } |
327 | } |
328 | |
329 | /// Converts a borrowed ZeroVec to an owned ZeroVec. No-op if already owned. |
330 | /// |
331 | /// # Example |
332 | /// |
333 | /// ``` |
334 | /// # use std::str::Utf8Error; |
335 | /// # use zerovec::ule::ZeroVecError; |
336 | /// # use zerovec::VarZeroVec; |
337 | /// |
338 | /// let strings = vec!["foo" , "bar" , "baz" , "quux" ]; |
339 | /// let vec = VarZeroVec::<str>::from(&strings); |
340 | /// |
341 | /// assert_eq!(vec.len(), 4); |
342 | /// // has 'static lifetime |
343 | /// let owned = vec.into_owned(); |
344 | /// # Ok::<(), ZeroVecError>(()) |
345 | /// ``` |
346 | pub fn into_owned(mut self) -> VarZeroVec<'static, T, F> { |
347 | self.make_mut(); |
348 | match self { |
349 | VarZeroVec::Owned(vec) => vec.into(), |
350 | _ => unreachable!(), |
351 | } |
352 | } |
353 | |
354 | /// Obtain this `VarZeroVec` as a [`VarZeroSlice`] |
355 | pub fn as_slice(&self) -> &VarZeroSlice<T, F> { |
356 | match *self { |
357 | VarZeroVec::Owned(ref owned) => owned, |
358 | VarZeroVec::Borrowed(b) => b, |
359 | } |
360 | } |
361 | |
362 | /// Takes the byte vector representing the encoded data of this VarZeroVec. If borrowed, |
363 | /// this function allocates a byte vector and copies the borrowed bytes into it. |
364 | /// |
365 | /// The bytes can be passed back to [`Self::parse_byte_slice()`]. |
366 | /// |
367 | /// To get a reference to the bytes without moving, see [`VarZeroSlice::as_bytes()`]. |
368 | /// |
369 | /// # Example |
370 | /// |
371 | /// ```rust |
372 | /// # use std::str::Utf8Error; |
373 | /// # use zerovec::ule::ZeroVecError; |
374 | /// # use zerovec::VarZeroVec; |
375 | /// |
376 | /// let strings = vec!["foo" , "bar" , "baz" ]; |
377 | /// let bytes = VarZeroVec::<str>::from(&strings).into_bytes(); |
378 | /// |
379 | /// let mut borrowed: VarZeroVec<str> = VarZeroVec::parse_byte_slice(&bytes)?; |
380 | /// assert_eq!(borrowed, &*strings); |
381 | /// |
382 | /// # Ok::<(), ZeroVecError>(()) |
383 | /// ``` |
384 | pub fn into_bytes(self) -> Vec<u8> { |
385 | match self { |
386 | VarZeroVec::Owned(vec) => vec.into_bytes(), |
387 | VarZeroVec::Borrowed(vec) => vec.as_bytes().to_vec(), |
388 | } |
389 | } |
390 | |
391 | /// Return whether the [`VarZeroVec`] is operating on owned or borrowed |
392 | /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can |
393 | /// be used to force it into an owned type |
394 | pub fn is_owned(&self) -> bool { |
395 | match self { |
396 | VarZeroVec::Owned(..) => true, |
397 | VarZeroVec::Borrowed(..) => false, |
398 | } |
399 | } |
400 | |
401 | #[cfg (feature = "bench" )] |
402 | #[doc (hidden)] |
403 | pub fn as_components<'b>(&'b self) -> VarZeroVecComponents<'b, T, F> { |
404 | self.as_slice().as_components() |
405 | } |
406 | } |
407 | |
408 | impl<A, T, F> From<&Vec<A>> for VarZeroVec<'static, T, F> |
409 | where |
410 | T: VarULE + ?Sized, |
411 | A: EncodeAsVarULE<T>, |
412 | F: VarZeroVecFormat, |
413 | { |
414 | #[inline ] |
415 | fn from(elements: &Vec<A>) -> Self { |
416 | Self::from(elements.as_slice()) |
417 | } |
418 | } |
419 | |
420 | impl<A, T, F> From<&[A]> for VarZeroVec<'static, T, F> |
421 | where |
422 | T: VarULE + ?Sized, |
423 | A: EncodeAsVarULE<T>, |
424 | F: VarZeroVecFormat, |
425 | { |
426 | #[inline ] |
427 | fn from(elements: &[A]) -> Self { |
428 | if elements.is_empty() { |
429 | VarZeroSlice::new_empty().into() |
430 | } else { |
431 | #[allow (clippy::unwrap_used)] // TODO(#1410) Better story for fallibility |
432 | VarZeroVecOwned::try_from_elements(elements).unwrap().into() |
433 | } |
434 | } |
435 | } |
436 | |
437 | impl<A, T, F, const N: usize> From<&[A; N]> for VarZeroVec<'static, T, F> |
438 | where |
439 | T: VarULE + ?Sized, |
440 | A: EncodeAsVarULE<T>, |
441 | F: VarZeroVecFormat, |
442 | { |
443 | #[inline ] |
444 | fn from(elements: &[A; N]) -> Self { |
445 | Self::from(elements.as_slice()) |
446 | } |
447 | } |
448 | |
449 | impl<'a, 'b, T, F> PartialEq<VarZeroVec<'b, T, F>> for VarZeroVec<'a, T, F> |
450 | where |
451 | T: VarULE, |
452 | T: ?Sized, |
453 | T: PartialEq, |
454 | F: VarZeroVecFormat, |
455 | { |
456 | #[inline ] |
457 | fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool { |
458 | // VZV::from_elements used to produce a non-canonical representation of the |
459 | // empty VZV, so we cannot use byte equality for empty vecs. |
460 | if self.is_empty() || other.is_empty() { |
461 | return self.is_empty() && other.is_empty(); |
462 | } |
463 | // VarULE has an API guarantee that byte equality is semantic equality. |
464 | // For non-empty VZVs, there's only a single metadata representation, |
465 | // so this guarantee extends to the whole VZV representation. |
466 | self.as_bytes().eq(other.as_bytes()) |
467 | } |
468 | } |
469 | |
470 | impl<'a, T, F> Eq for VarZeroVec<'a, T, F> |
471 | where |
472 | T: VarULE, |
473 | T: ?Sized, |
474 | T: Eq, |
475 | F: VarZeroVecFormat, |
476 | { |
477 | } |
478 | |
479 | impl<T, A, F> PartialEq<&'_ [A]> for VarZeroVec<'_, T, F> |
480 | where |
481 | T: VarULE + ?Sized, |
482 | T: PartialEq, |
483 | A: AsRef<T>, |
484 | F: VarZeroVecFormat, |
485 | { |
486 | #[inline ] |
487 | fn eq(&self, other: &&[A]) -> bool { |
488 | self.iter().eq(other.iter().map(|t: &A| t.as_ref())) |
489 | } |
490 | } |
491 | |
492 | impl<T, A, F, const N: usize> PartialEq<[A; N]> for VarZeroVec<'_, T, F> |
493 | where |
494 | T: VarULE + ?Sized, |
495 | T: PartialEq, |
496 | A: AsRef<T>, |
497 | F: VarZeroVecFormat, |
498 | { |
499 | #[inline ] |
500 | fn eq(&self, other: &[A; N]) -> bool { |
501 | self.iter().eq(other.iter().map(|t: &A| t.as_ref())) |
502 | } |
503 | } |
504 | |
505 | impl<'a, T: VarULE + ?Sized + PartialOrd, F: VarZeroVecFormat> PartialOrd for VarZeroVec<'a, T, F> { |
506 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
507 | self.iter().partial_cmp(other.iter()) |
508 | } |
509 | } |
510 | |
511 | impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T, F> { |
512 | fn cmp(&self, other: &Self) -> Ordering { |
513 | self.iter().cmp(other.iter()) |
514 | } |
515 | } |
516 | |
517 | #[test ] |
518 | fn assert_single_empty_representation() { |
519 | assert_eq!( |
520 | VarZeroVec::<str>::new().as_bytes(), |
521 | VarZeroVec::<str>::from(&[] as &[&str]).as_bytes() |
522 | ); |
523 | } |
524 | |
525 | #[test ] |
526 | fn weird_empty_representation_equality() { |
527 | assert_eq!( |
528 | VarZeroVec::<str>::parse_byte_slice(&[0, 0, 0, 0]).unwrap(), |
529 | VarZeroVec::<str>::parse_byte_slice(&[]).unwrap() |
530 | ); |
531 | } |
532 | |