1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::ule::*;
6use crate::varzerovec::VarZeroVecFormat;
7use crate::{VarZeroSlice, VarZeroVec, ZeroSlice, ZeroVec};
8use alloc::borrow::{Cow, ToOwned};
9use alloc::boxed::Box;
10use alloc::string::String;
11use alloc::{vec, vec::Vec};
12use core::mem;
13
14/// Allows types to be encoded as VarULEs. This is highly useful for implementing VarULE on
15/// custom DSTs where the type cannot be obtained as a reference to some other type.
16///
17/// [`Self::encode_var_ule_as_slices()`] should be implemented by providing an encoded slice for each field
18/// of the VarULE type to the callback, in order. For an implementation to be safe, the slices
19/// to the callback must, when concatenated, be a valid instance of the VarULE type.
20///
21/// See the [custom VarULEdocumentation](crate::ule::custom) for examples.
22///
23/// [`Self::encode_var_ule_as_slices()`] is only used to provide default implementations for [`Self::encode_var_ule_write()`]
24/// and [`Self::encode_var_ule_len()`]. If you override the default implementations it is totally valid to
25/// replace [`Self::encode_var_ule_as_slices()`]'s body with `unreachable!()`. This can be done for cases where
26/// it is not possible to implement [`Self::encode_var_ule_as_slices()`] but the other methods still work.
27///
28/// A typical implementation will take each field in the order found in the [`VarULE`] type,
29/// convert it to ULE, call [`ULE::as_byte_slice()`] on them, and pass the slices to `cb` in order.
30/// A trailing [`ZeroVec`](crate::ZeroVec) or [`VarZeroVec`](crate::VarZeroVec) can have their underlying
31/// byte representation passed through.
32///
33/// In case the compiler is not optimizing [`Self::encode_var_ule_len()`], it can be overridden. A typical
34/// implementation will add up the sizes of each field on the [`VarULE`] type and then add in the byte length of the
35/// dynamically-sized part.
36///
37/// # Safety
38///
39/// The safety invariants of [`Self::encode_var_ule_as_slices()`] are:
40/// - It must call `cb` (only once)
41/// - The slices passed to `cb`, if concatenated, should be a valid instance of the `T` [`VarULE`] type
42/// (i.e. if fed to [`VarULE::validate_byte_slice()`] they must produce a successful result)
43/// - It must return the return value of `cb` to the caller
44///
45/// One or more of [`Self::encode_var_ule_len()`] and [`Self::encode_var_ule_write()`] may be provided.
46/// If both are, then `zerovec` code is guaranteed to not call [`Self::encode_var_ule_as_slices()`], and it may be replaced
47/// with `unreachable!()`.
48///
49/// The safety invariants of [`Self::encode_var_ule_len()`] are:
50/// - It must return the length of the corresponding VarULE type
51///
52/// The safety invariants of [`Self::encode_var_ule_write()`] are:
53/// - The slice written to `dst` must be a valid instance of the `T` [`VarULE`] type
54pub unsafe trait EncodeAsVarULE<T: VarULE + ?Sized> {
55 /// Calls `cb` with a piecewise list of byte slices that when concatenated
56 /// produce the memory pattern of the corresponding instance of `T`.
57 ///
58 /// Do not call this function directly; instead use the other two. Some implementors
59 /// may define this function to panic.
60 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R;
61
62 /// Return the length, in bytes, of the corresponding [`VarULE`] type
63 fn encode_var_ule_len(&self) -> usize {
64 self.encode_var_ule_as_slices(|slices| slices.iter().map(|s| s.len()).sum())
65 }
66
67 /// Write the corresponding [`VarULE`] type to the `dst` buffer. `dst` should
68 /// be the size of [`Self::encode_var_ule_len()`]
69 fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
70 debug_assert_eq!(self.encode_var_ule_len(), dst.len());
71 self.encode_var_ule_as_slices(move |slices| {
72 #[allow(clippy::indexing_slicing)] // by debug_assert
73 for slice in slices {
74 dst[..slice.len()].copy_from_slice(slice);
75 dst = &mut dst[slice.len()..];
76 }
77 });
78 }
79}
80
81/// Given an [`EncodeAsVarULE`] type `S`, encode it into a `Box<T>`
82///
83/// This is primarily useful for generating `Deserialize` impls for VarULE types
84pub fn encode_varule_to_box<S: EncodeAsVarULE<T>, T: VarULE + ?Sized>(x: &S) -> Box<T> {
85 // zero-fill the vector to avoid uninitialized data UB
86 let mut vec: Vec<u8> = vec![0; x.encode_var_ule_len()];
87 x.encode_var_ule_write(&mut vec);
88 let boxed: ManuallyDrop> = mem::ManuallyDrop::new(vec.into_boxed_slice());
89 unsafe {
90 // Safety: `ptr` is a box, and `T` is a VarULE which guarantees it has the same memory layout as `[u8]`
91 // and can be recouped via from_byte_slice_unchecked()
92 let ptr: *mut T = T::from_byte_slice_unchecked(&boxed) as *const T as *mut T;
93
94 // Safety: we can construct an owned version since we have mem::forgotten the older owner
95 Box::from_raw(ptr)
96 }
97}
98
99unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for T {
100 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
101 cb(&[T::as_byte_slice(self)])
102 }
103}
104
105unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for &'_ T {
106 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
107 cb(&[T::as_byte_slice(self)])
108 }
109}
110
111unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Cow<'_, T>
112where
113 T: ToOwned,
114{
115 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
116 cb(&[T::as_byte_slice(self.as_ref())])
117 }
118}
119
120unsafe impl<T: VarULE + ?Sized> EncodeAsVarULE<T> for Box<T> {
121 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
122 cb(&[T::as_byte_slice(self)])
123 }
124}
125
126unsafe impl EncodeAsVarULE<str> for String {
127 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
128 cb(&[self.as_bytes()])
129 }
130}
131
132// Note: This impl could technically use `T: AsULE`, but we want users to prefer `ZeroSlice<T>`
133// for cases where T is not a ULE. Therefore, we can use the more efficient `memcpy` impl here.
134unsafe impl<T> EncodeAsVarULE<[T]> for Vec<T>
135where
136 T: ULE,
137{
138 fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
139 cb(&[<[T] as VarULE>::as_byte_slice(self)])
140 }
141}
142
143unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for &'_ [T]
144where
145 T: AsULE + 'static,
146{
147 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
148 // unnecessary if the other two are implemented
149 unreachable!()
150 }
151
152 #[inline]
153 fn encode_var_ule_len(&self) -> usize {
154 self.len() * core::mem::size_of::<T::ULE>()
155 }
156
157 fn encode_var_ule_write(&self, dst: &mut [u8]) {
158 #[allow(non_snake_case)]
159 let S: usize = core::mem::size_of::<T::ULE>();
160 debug_assert_eq!(self.len() * S, dst.len());
161 for (item: &T, ref mut chunk: &mut &mut [u8]) in self.iter().zip(dst.chunks_mut(chunk_size:S)) {
162 let ule: ::ULE = item.to_unaligned();
163 chunk.copy_from_slice(src:ULE::as_byte_slice(core::slice::from_ref(&ule)));
164 }
165 }
166}
167
168unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for Vec<T>
169where
170 T: AsULE + 'static,
171{
172 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
173 // unnecessary if the other two are implemented
174 unreachable!()
175 }
176
177 #[inline]
178 fn encode_var_ule_len(&self) -> usize {
179 self.as_slice().encode_var_ule_len()
180 }
181
182 #[inline]
183 fn encode_var_ule_write(&self, dst: &mut [u8]) {
184 self.as_slice().encode_var_ule_write(dst)
185 }
186}
187
188unsafe impl<T> EncodeAsVarULE<ZeroSlice<T>> for ZeroVec<'_, T>
189where
190 T: AsULE + 'static,
191{
192 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
193 // unnecessary if the other two are implemented
194 unreachable!()
195 }
196
197 #[inline]
198 fn encode_var_ule_len(&self) -> usize {
199 self.as_bytes().len()
200 }
201
202 fn encode_var_ule_write(&self, dst: &mut [u8]) {
203 debug_assert_eq!(self.as_bytes().len(), dst.len());
204 dst.copy_from_slice(self.as_bytes());
205 }
206}
207
208unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for &'_ [E]
209where
210 T: VarULE + ?Sized,
211 E: EncodeAsVarULE<T>,
212 F: VarZeroVecFormat,
213{
214 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
215 // unnecessary if the other two are implemented
216 unimplemented!()
217 }
218
219 #[allow(clippy::unwrap_used)] // TODO(#1410): Rethink length errors in VZV.
220 fn encode_var_ule_len(&self) -> usize {
221 crate::varzerovec::components::compute_serializable_len::<T, E, F>(self).unwrap() as usize
222 }
223
224 fn encode_var_ule_write(&self, dst: &mut [u8]) {
225 crate::varzerovec::components::write_serializable_bytes::<T, E, F>(self, output:dst)
226 }
227}
228
229unsafe impl<T, E, F> EncodeAsVarULE<VarZeroSlice<T, F>> for Vec<E>
230where
231 T: VarULE + ?Sized,
232 E: EncodeAsVarULE<T>,
233 F: VarZeroVecFormat,
234{
235 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
236 // unnecessary if the other two are implemented
237 unreachable!()
238 }
239
240 #[inline]
241 fn encode_var_ule_len(&self) -> usize {
242 <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_len(&self.as_slice())
243 }
244
245 #[inline]
246 fn encode_var_ule_write(&self, dst: &mut [u8]) {
247 <_ as EncodeAsVarULE<VarZeroSlice<T, F>>>::encode_var_ule_write(&self.as_slice(), dst)
248 }
249}
250
251unsafe impl<T, F> EncodeAsVarULE<VarZeroSlice<T, F>> for VarZeroVec<'_, T, F>
252where
253 T: VarULE + ?Sized,
254 F: VarZeroVecFormat,
255{
256 fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R {
257 // unnecessary if the other two are implemented
258 unreachable!()
259 }
260
261 #[inline]
262 fn encode_var_ule_len(&self) -> usize {
263 self.as_bytes().len()
264 }
265
266 #[inline]
267 fn encode_var_ule_write(&self, dst: &mut [u8]) {
268 debug_assert_eq!(self.as_bytes().len(), dst.len());
269 dst.copy_from_slice(self.as_bytes());
270 }
271}
272
273#[cfg(test)]
274mod test {
275 use super::*;
276
277 const STRING_ARRAY: [&str; 2] = ["hello", "world"];
278
279 const STRING_SLICE: &[&str] = &STRING_ARRAY;
280
281 const U8_ARRAY: [u8; 8] = [0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07];
282
283 const U8_2D_ARRAY: [&[u8]; 2] = [&U8_ARRAY, &U8_ARRAY];
284
285 const U8_2D_SLICE: &[&[u8]] = &[&U8_ARRAY, &U8_ARRAY];
286
287 const U8_3D_ARRAY: [&[&[u8]]; 2] = [U8_2D_SLICE, U8_2D_SLICE];
288
289 const U8_3D_SLICE: &[&[&[u8]]] = &[U8_2D_SLICE, U8_2D_SLICE];
290
291 const U32_ARRAY: [u32; 4] = [0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F];
292
293 const U32_2D_ARRAY: [&[u32]; 2] = [&U32_ARRAY, &U32_ARRAY];
294
295 const U32_2D_SLICE: &[&[u32]] = &[&U32_ARRAY, &U32_ARRAY];
296
297 const U32_3D_ARRAY: [&[&[u32]]; 2] = [U32_2D_SLICE, U32_2D_SLICE];
298
299 const U32_3D_SLICE: &[&[&[u32]]] = &[U32_2D_SLICE, U32_2D_SLICE];
300
301 #[test]
302 fn test_vzv_from() {
303 type VZV<'a, T> = VarZeroVec<'a, T>;
304 type ZS<T> = ZeroSlice<T>;
305 type VZS<T> = VarZeroSlice<T>;
306
307 let u8_zerovec: ZeroVec<u8> = ZeroVec::from_slice_or_alloc(&U8_ARRAY);
308 let u8_2d_zerovec: [ZeroVec<u8>; 2] = [u8_zerovec.clone(), u8_zerovec.clone()];
309 let u8_2d_vec: Vec<Vec<u8>> = vec![U8_ARRAY.into(), U8_ARRAY.into()];
310 let u8_3d_vec: Vec<Vec<Vec<u8>>> = vec![u8_2d_vec.clone(), u8_2d_vec.clone()];
311
312 let u32_zerovec: ZeroVec<u32> = ZeroVec::from_slice_or_alloc(&U32_ARRAY);
313 let u32_2d_zerovec: [ZeroVec<u32>; 2] = [u32_zerovec.clone(), u32_zerovec.clone()];
314 let u32_2d_vec: Vec<Vec<u32>> = vec![U32_ARRAY.into(), U32_ARRAY.into()];
315 let u32_3d_vec: Vec<Vec<Vec<u32>>> = vec![u32_2d_vec.clone(), u32_2d_vec.clone()];
316
317 let a: VZV<str> = VarZeroVec::from(&STRING_ARRAY);
318 let b: VZV<str> = VarZeroVec::from(STRING_SLICE);
319 let c: VZV<str> = VarZeroVec::from(&Vec::from(STRING_SLICE));
320 assert_eq!(a, STRING_SLICE);
321 assert_eq!(a, b);
322 assert_eq!(a, c);
323
324 let a: VZV<[u8]> = VarZeroVec::from(&U8_2D_ARRAY);
325 let b: VZV<[u8]> = VarZeroVec::from(U8_2D_SLICE);
326 let c: VZV<[u8]> = VarZeroVec::from(&u8_2d_vec);
327 assert_eq!(a, U8_2D_SLICE);
328 assert_eq!(a, b);
329 assert_eq!(a, c);
330 let u8_3d_vzv_brackets = &[a.clone(), a.clone()];
331
332 let a: VZV<ZS<u8>> = VarZeroVec::from(&U8_2D_ARRAY);
333 let b: VZV<ZS<u8>> = VarZeroVec::from(U8_2D_SLICE);
334 let c: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_vec);
335 let d: VZV<ZS<u8>> = VarZeroVec::from(&u8_2d_zerovec);
336 assert_eq!(a, U8_2D_SLICE);
337 assert_eq!(a, b);
338 assert_eq!(a, c);
339 assert_eq!(a, d);
340 let u8_3d_vzv_zeroslice = &[a.clone(), a.clone()];
341
342 let a: VZV<VZS<[u8]>> = VarZeroVec::from(&U8_3D_ARRAY);
343 let b: VZV<VZS<[u8]>> = VarZeroVec::from(U8_3D_SLICE);
344 let c: VZV<VZS<[u8]>> = VarZeroVec::from(&u8_3d_vec);
345 let d: VZV<VZS<[u8]>> = VarZeroVec::from(u8_3d_vzv_brackets);
346 assert_eq!(
347 a.iter()
348 .map(|x| x.iter().map(|y| y.to_vec()).collect::<Vec<Vec<u8>>>())
349 .collect::<Vec<Vec<Vec<u8>>>>(),
350 u8_3d_vec
351 );
352 assert_eq!(a, b);
353 assert_eq!(a, c);
354 assert_eq!(a, d);
355
356 let a: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&U8_3D_ARRAY);
357 let b: VZV<VZS<ZS<u8>>> = VarZeroVec::from(U8_3D_SLICE);
358 let c: VZV<VZS<ZS<u8>>> = VarZeroVec::from(&u8_3d_vec);
359 let d: VZV<VZS<ZS<u8>>> = VarZeroVec::from(u8_3d_vzv_zeroslice);
360 assert_eq!(
361 a.iter()
362 .map(|x| x
363 .iter()
364 .map(|y| y.iter().collect::<Vec<u8>>())
365 .collect::<Vec<Vec<u8>>>())
366 .collect::<Vec<Vec<Vec<u8>>>>(),
367 u8_3d_vec
368 );
369 assert_eq!(a, b);
370 assert_eq!(a, c);
371 assert_eq!(a, d);
372
373 let a: VZV<ZS<u32>> = VarZeroVec::from(&U32_2D_ARRAY);
374 let b: VZV<ZS<u32>> = VarZeroVec::from(U32_2D_SLICE);
375 let c: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_vec);
376 let d: VZV<ZS<u32>> = VarZeroVec::from(&u32_2d_zerovec);
377 assert_eq!(a, u32_2d_zerovec);
378 assert_eq!(a, b);
379 assert_eq!(a, c);
380 assert_eq!(a, d);
381 let u32_3d_vzv = &[a.clone(), a.clone()];
382
383 let a: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&U32_3D_ARRAY);
384 let b: VZV<VZS<ZS<u32>>> = VarZeroVec::from(U32_3D_SLICE);
385 let c: VZV<VZS<ZS<u32>>> = VarZeroVec::from(&u32_3d_vec);
386 let d: VZV<VZS<ZS<u32>>> = VarZeroVec::from(u32_3d_vzv);
387 assert_eq!(
388 a.iter()
389 .map(|x| x
390 .iter()
391 .map(|y| y.iter().collect::<Vec<u32>>())
392 .collect::<Vec<Vec<u32>>>())
393 .collect::<Vec<Vec<Vec<u32>>>>(),
394 u32_3d_vec
395 );
396 assert_eq!(a, b);
397 assert_eq!(a, c);
398 assert_eq!(a, d);
399 }
400}
401