1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::ule::*; |
6 | use crate::varzerovec::VarZeroVecFormat; |
7 | use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec}; |
8 | use alloc::borrow::{Cow, ToOwned}; |
9 | use alloc::boxed::Box; |
10 | use alloc::string::String; |
11 | use alloc::{vec, vec::Vec}; |
12 | use core::mem; |
13 | |
14 | /// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on |
15 | /// custom DSTs where the type cannot be obtained as a reference to some other type. |
16 | /// |
17 | /// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field |
18 | /// of the VarULE type to the callback, in order. For an implementation to be safe, the slices |
19 | /// to the callback must, when concatenated, be a valid instance of the VarULE type. |
20 | /// |
21 | /// See the [custom VarULEdocumentation](crate::ule::custom) for examples. |
22 | /// |
23 | /// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`] |
24 | /// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to |
25 | /// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where |
26 | /// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work. |
27 | /// |
28 | /// A typical implementation will take each field in the order found in the [`VarULE`] type, |
29 | /// convert it to ULE, call [`ULE::as_byte_slice()`] on them, and pass the slices to `cb` in order. |
30 | /// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying |
31 | /// byte representation passed through. |
32 | /// |
33 | /// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical |
34 | /// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the |
35 | /// dynamically-sized part. |
36 | /// |
37 | /// # Safety |
38 | /// |
39 | /// The safety invariants of [`Self::encode_var_ule_as_slices()`] are: |
40 | /// - It must call `cb` (only once) |
41 | /// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type |
42 | /// (i.e. if fed to [`VarULE::validate_byte_slice()`] they must produce a successful result) |
43 | /// - It must return the return value of `cb` to the caller |
44 | /// |
45 | /// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided. |
46 | /// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced |
47 | /// with `unreachable!()`. |
48 | /// |
49 | /// The safety invariants of [`Self::encode_var_ule_len()`] are: |
50 | /// - It must return the length of the corresponding VarULE type |
51 | /// |
52 | /// The safety invariants of [`Self::encode_var_ule_write()`] are: |
53 | /// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type |
54 | pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> { |
55 | /// Calls `cb` with a piecewise list of byte slices that when concatenated |
56 | /// produce the memory pattern of the corresponding instance of `T`. |
57 | /// |
58 | /// Do not call this function directly; instead use the other two. Some implementors |
59 | /// may define this function to panic. |
60 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R; |
61 | |
62 | /// Return the length, in bytes, of the corresponding [`VarULE`] type |
63 | fn encode_var_ule_len(&self) -> usize { |
64 | self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum()) |
65 | } |
66 | |
67 | /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should |
68 | /// be the size of [`Self::encode_var_ule_len()`] |
69 | fn encode_var_ule_write(&self, mut dst: &mut [u8]) { |
70 | debug_assert_eq!(self.encode_var_ule_len(), dst.len()); |
71 | self.encode_var_ule_as_slices(move |slices| { |
72 | #[allow (clippy::indexing_slicing)] // by debug_assert |
73 | for slice in slices { |
74 | dst[..slice.len()].copy_from_slice(slice); |
75 | dst = &mut dst[slice.len()..]; |
76 | } |
77 | }); |
78 | } |
79 | } |
80 | |
81 | /// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>` |
82 | /// |
83 | /// This is primarily useful for generating `Deserialize` impls for VarULE types |
84 | pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> { |
85 | // zero-fill the vector to avoid uninitialized data UB |
86 | let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()]; |
87 | x.encode_var_ule_write(&mut vec); |
88 | let boxed: ManuallyDrop> = mem::ManuallyDrop::new(vec.into_boxed_slice()); |
89 | unsafe { |
90 | // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]` |
91 | // and can be recouped via from_byte_slice_unchecked() |
92 | let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T; |
93 | |
94 | // Safety: we can construct an owned version since we have mem::forgotten the older owner |
95 | Box::from_raw(ptr) |
96 | } |
97 | } |
98 | |
99 | unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T { |
100 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { |
101 | cb(&[T::as_byte_slice(self)]) |
102 | } |
103 | } |
104 | |
105 | unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T { |
106 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { |
107 | cb(&[T::as_byte_slice(self)]) |
108 | } |
109 | } |
110 | |
111 | unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T> |
112 | where |
113 | T: ToOwned, |
114 | { |
115 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { |
116 | cb(&[T::as_byte_slice(self.as_ref())]) |
117 | } |
118 | } |
119 | |
120 | unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> { |
121 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { |
122 | cb(&[T::as_byte_slice(self)]) |
123 | } |
124 | } |
125 | |
126 | unsafe impl EncodeAsVarULE<str> for String { |
127 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { |
128 | cb(&[self.as_bytes()]) |
129 | } |
130 | } |
131 | |
132 | // Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>` |
133 | // for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here. |
134 | unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T> |
135 | where |
136 | T: ULE, |
137 | { |
138 | fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { |
139 | cb(&[<[T] as VarULE>::as_byte_slice(self)]) |
140 | } |
141 | } |
142 | |
143 | unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T] |
144 | where |
145 | T: AsULE + 'static, |
146 | { |
147 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
148 | // unnecessary if the other two are implemented |
149 | unreachable!() |
150 | } |
151 | |
152 | #[inline ] |
153 | fn encode_var_ule_len(&self) -> usize { |
154 | self.len() * core::mem::size_of::<T::ULE>() |
155 | } |
156 | |
157 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
158 | #[allow (non_snake_case)] |
159 | let S: usize = core::mem::size_of::<T::ULE>(); |
160 | debug_assert_eq!(self.len() * S, dst.len()); |
161 | for (item: &T, ref mut chunk: &mut &mut [u8]) in self.iter().zip(dst.chunks_mut(chunk_size:S)) { |
162 | let ule: ::ULE = item.to_unaligned(); |
163 | chunk.copy_from_slice(src:ULE::as_byte_slice(core::slice::from_ref(&ule))); |
164 | } |
165 | } |
166 | } |
167 | |
168 | unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T> |
169 | where |
170 | T: AsULE + 'static, |
171 | { |
172 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
173 | // unnecessary if the other two are implemented |
174 | unreachable!() |
175 | } |
176 | |
177 | #[inline ] |
178 | fn encode_var_ule_len(&self) -> usize { |
179 | self.as_slice().encode_var_ule_len() |
180 | } |
181 | |
182 | #[inline ] |
183 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
184 | self.as_slice().encode_var_ule_write(dst) |
185 | } |
186 | } |
187 | |
188 | unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T> |
189 | where |
190 | T: AsULE + 'static, |
191 | { |
192 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
193 | // unnecessary if the other two are implemented |
194 | unreachable!() |
195 | } |
196 | |
197 | #[inline ] |
198 | fn encode_var_ule_len(&self) -> usize { |
199 | self.as_bytes().len() |
200 | } |
201 | |
202 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
203 | debug_assert_eq!(self.as_bytes().len(), dst.len()); |
204 | dst.copy_from_slice(self.as_bytes()); |
205 | } |
206 | } |
207 | |
208 | unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E] |
209 | where |
210 | T: VarULE + ?Sized, |
211 | E: EncodeAsVarULE<T>, |
212 | F: VarZeroVecFormat, |
213 | { |
214 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
215 | // unnecessary if the other two are implemented |
216 | unimplemented!() |
217 | } |
218 | |
219 | #[allow (clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV. |
220 | fn encode_var_ule_len(&self) -> usize { |
221 | crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize |
222 | } |
223 | |
224 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
225 | crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, output:dst) |
226 | } |
227 | } |
228 | |
229 | unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E> |
230 | where |
231 | T: VarULE + ?Sized, |
232 | E: EncodeAsVarULE<T>, |
233 | F: VarZeroVecFormat, |
234 | { |
235 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
236 | // unnecessary if the other two are implemented |
237 | unreachable!() |
238 | } |
239 | |
240 | #[inline ] |
241 | fn encode_var_ule_len(&self) -> usize { |
242 | <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice()) |
243 | } |
244 | |
245 | #[inline ] |
246 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
247 | <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst) |
248 | } |
249 | } |
250 | |
251 | unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F> |
252 | where |
253 | T: VarULE + ?Sized, |
254 | F: VarZeroVecFormat, |
255 | { |
256 | fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
257 | // unnecessary if the other two are implemented |
258 | unreachable!() |
259 | } |
260 | |
261 | #[inline ] |
262 | fn encode_var_ule_len(&self) -> usize { |
263 | self.as_bytes().len() |
264 | } |
265 | |
266 | #[inline ] |
267 | fn encode_var_ule_write(&self, dst: &mut [u8]) { |
268 | debug_assert_eq!(self.as_bytes().len(), dst.len()); |
269 | dst.copy_from_slice(self.as_bytes()); |
270 | } |
271 | } |
272 | |
273 | #[cfg (test)] |
274 | mod test { |
275 | use super::*; |
276 | |
277 | const STRING_ARRAY: [&str; 2] = ["hello" , "world" ]; |
278 | |
279 | const STRING_SLICE: &[&str] = &STRING_ARRAY; |
280 | |
281 | const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07]; |
282 | |
283 | const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY]; |
284 | |
285 | const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY]; |
286 | |
287 | const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE]; |
288 | |
289 | const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE]; |
290 | |
291 | const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F]; |
292 | |
293 | const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY]; |
294 | |
295 | const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY]; |
296 | |
297 | const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE]; |
298 | |
299 | const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE]; |
300 | |
301 | #[test ] |
302 | fn test_vzv_from() { |
303 | type VZV<'a, T> = VarZeroVec<'a, T>; |
304 | type ZS<T> = ZeroSlice<T>; |
305 | type VZS<T> = VarZeroSlice<T>; |
306 | |
307 | let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY); |
308 | let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()]; |
309 | let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()]; |
310 | let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()]; |
311 | |
312 | let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY); |
313 | let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()]; |
314 | let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()]; |
315 | let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()]; |
316 | |
317 | let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY); |
318 | let b: VZV<str> = VarZeroVec::from(STRING_SLICE); |
319 | let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE)); |
320 | assert_eq!(a, STRING_SLICE); |
321 | assert_eq!(a, b); |
322 | assert_eq!(a, c); |
323 | |
324 | let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY); |
325 | let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE); |
326 | let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec); |
327 | assert_eq!(a, U8_2D_SLICE); |
328 | assert_eq!(a, b); |
329 | assert_eq!(a, c); |
330 | let u8_3d_vzv_brackets = &[a.clone(), a.clone()]; |
331 | |
332 | let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY); |
333 | let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE); |
334 | let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec); |
335 | let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec); |
336 | assert_eq!(a, U8_2D_SLICE); |
337 | assert_eq!(a, b); |
338 | assert_eq!(a, c); |
339 | assert_eq!(a, d); |
340 | let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()]; |
341 | |
342 | let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY); |
343 | let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE); |
344 | let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec); |
345 | let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets); |
346 | assert_eq!( |
347 | a.iter() |
348 | .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>()) |
349 | .collect::<Vec<Vec<Vec<u8>>>>(), |
350 | u8_3d_vec |
351 | ); |
352 | assert_eq!(a, b); |
353 | assert_eq!(a, c); |
354 | assert_eq!(a, d); |
355 | |
356 | let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY); |
357 | let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE); |
358 | let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec); |
359 | let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice); |
360 | assert_eq!( |
361 | a.iter() |
362 | .map(|x| x |
363 | .iter() |
364 | .map(|y| y.iter().collect::<Vec<u8>>()) |
365 | .collect::<Vec<Vec<u8>>>()) |
366 | .collect::<Vec<Vec<Vec<u8>>>>(), |
367 | u8_3d_vec |
368 | ); |
369 | assert_eq!(a, b); |
370 | assert_eq!(a, c); |
371 | assert_eq!(a, d); |
372 | |
373 | let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY); |
374 | let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE); |
375 | let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec); |
376 | let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec); |
377 | assert_eq!(a, u32_2d_zerovec); |
378 | assert_eq!(a, b); |
379 | assert_eq!(a, c); |
380 | assert_eq!(a, d); |
381 | let u32_3d_vzv = &[a.clone(), a.clone()]; |
382 | |
383 | let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY); |
384 | let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE); |
385 | let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec); |
386 | let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv); |
387 | assert_eq!( |
388 | a.iter() |
389 | .map(|x| x |
390 | .iter() |
391 | .map(|y| y.iter().collect::<Vec<u32>>()) |
392 | .collect::<Vec<Vec<u32>>>()) |
393 | .collect::<Vec<Vec<Vec<u32>>>>(), |
394 | u32_3d_vec |
395 | ); |
396 | assert_eq!(a, b); |
397 | assert_eq!(a, c); |
398 | assert_eq!(a, d); |
399 | } |
400 | } |
401 | |