| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | #![allow (clippy::upper_case_acronyms)] |
| 6 | //! ULE implementation for Plain Old Data types, including all sized integers. |
| 7 | |
| 8 | use super::*; |
| 9 | use crate::impl_ule_from_array; |
| 10 | use crate::ZeroSlice; |
| 11 | use core::num::{NonZeroI8, NonZeroU8}; |
| 12 | |
| 13 | /// A u8 array of little-endian data with infallible conversions to and from &[u8]. |
| 14 | #[repr (transparent)] |
| 15 | #[derive (Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)] |
| 16 | #[allow (clippy::exhaustive_structs)] // newtype |
| 17 | pub struct RawBytesULE<const N: usize>(pub [u8; N]); |
| 18 | |
| 19 | impl<const N: usize> RawBytesULE<N> { |
| 20 | #[inline ] |
| 21 | pub fn as_bytes(&self) -> &[u8] { |
| 22 | &self.0 |
| 23 | } |
| 24 | |
| 25 | #[inline ] |
| 26 | pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { |
| 27 | let data: *mut u8 = bytes.as_mut_ptr(); |
| 28 | let len: usize = bytes.len() / N; |
| 29 | // Safe because Self is transparent over [u8; N] |
| 30 | unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) } |
| 31 | } |
| 32 | } |
| 33 | |
| 34 | // Safety (based on the safety checklist on the ULE trait): |
| 35 | // 1. RawBytesULE does not include any uninitialized or padding bytes. |
| 36 | // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) |
| 37 | // 2. RawBytesULE is aligned to 1 byte. |
| 38 | // (achieved by `#[repr(transparent)]` on a type that satisfies this invariant) |
| 39 | // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). |
| 40 | // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes. |
| 41 | // 5. The other ULE methods use the default impl. |
| 42 | // 6. RawBytesULE byte equality is semantic equality |
| 43 | unsafe impl<const N: usize> ULE for RawBytesULE<N> { |
| 44 | #[inline ] |
| 45 | fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { |
| 46 | if bytes.len() % N == 0 { |
| 47 | // Safe because Self is transparent over [u8; N] |
| 48 | Ok(()) |
| 49 | } else { |
| 50 | Err(ZeroVecError::length::<Self>(bytes.len())) |
| 51 | } |
| 52 | } |
| 53 | } |
| 54 | |
| 55 | impl<const N: usize> From<[u8; N]> for RawBytesULE<N> { |
| 56 | #[inline ] |
| 57 | fn from(le_bytes: [u8; N]) -> Self { |
| 58 | Self(le_bytes) |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | macro_rules! impl_byte_slice_size { |
| 63 | ($unsigned:ty, $size:literal) => { |
| 64 | impl RawBytesULE<$size> { |
| 65 | #[doc = concat!("Gets this `RawBytesULE` as a `" , stringify!($unsigned), "`. This is equivalent to calling [`AsULE::from_unaligned()`] on the appropriately sized type." )] |
| 66 | #[inline] |
| 67 | pub fn as_unsigned_int(&self) -> $unsigned { |
| 68 | <$unsigned as $crate::ule::AsULE>::from_unaligned(*self) |
| 69 | } |
| 70 | |
| 71 | #[doc = concat!("Converts a `" , stringify!($unsigned), "` to a `RawBytesULE`. This is equivalent to calling [`AsULE::to_unaligned()`] on the appropriately sized type." )] |
| 72 | #[inline] |
| 73 | pub const fn from_aligned(value: $unsigned) -> Self { |
| 74 | Self(value.to_le_bytes()) |
| 75 | } |
| 76 | |
| 77 | impl_ule_from_array!( |
| 78 | $unsigned, |
| 79 | RawBytesULE<$size>, |
| 80 | RawBytesULE([0; $size]) |
| 81 | ); |
| 82 | } |
| 83 | }; |
| 84 | } |
| 85 | |
| 86 | macro_rules! impl_const_constructors { |
| 87 | ($base:ty, $size:literal) => { |
| 88 | impl ZeroSlice<$base> { |
| 89 | /// This function can be used for constructing ZeroVecs in a const context, avoiding |
| 90 | /// parsing checks. |
| 91 | /// |
| 92 | /// This cannot be generic over T because of current limitations in `const`, but if |
| 93 | /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`] |
| 94 | /// instead. |
| 95 | /// |
| 96 | /// See [`ZeroSlice::cast()`] for an example. |
| 97 | pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> { |
| 98 | let len = bytes.len(); |
| 99 | #[allow(clippy::modulo_one)] |
| 100 | if len % $size == 0 { |
| 101 | Ok(unsafe { Self::from_bytes_unchecked(bytes) }) |
| 102 | } else { |
| 103 | Err(ZeroVecError::InvalidLength { |
| 104 | ty: concat!("<const construct: " , $size, ">" ), |
| 105 | len, |
| 106 | }) |
| 107 | } |
| 108 | } |
| 109 | } |
| 110 | }; |
| 111 | } |
| 112 | |
| 113 | macro_rules! impl_byte_slice_type { |
| 114 | ($single_fn:ident, $type:ty, $size:literal) => { |
| 115 | impl From<$type> for RawBytesULE<$size> { |
| 116 | #[inline] |
| 117 | fn from(value: $type) -> Self { |
| 118 | Self(value.to_le_bytes()) |
| 119 | } |
| 120 | } |
| 121 | impl AsULE for $type { |
| 122 | type ULE = RawBytesULE<$size>; |
| 123 | #[inline] |
| 124 | fn to_unaligned(self) -> Self::ULE { |
| 125 | RawBytesULE(self.to_le_bytes()) |
| 126 | } |
| 127 | #[inline] |
| 128 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 129 | <$type>::from_le_bytes(unaligned.0) |
| 130 | } |
| 131 | } |
| 132 | // EqULE is true because $type and RawBytesULE<$size> |
| 133 | // have the same byte sequence on little-endian |
| 134 | unsafe impl EqULE for $type {} |
| 135 | |
| 136 | impl RawBytesULE<$size> { |
| 137 | pub const fn $single_fn(v: $type) -> Self { |
| 138 | RawBytesULE(v.to_le_bytes()) |
| 139 | } |
| 140 | } |
| 141 | }; |
| 142 | } |
| 143 | |
| 144 | macro_rules! impl_byte_slice_unsigned_type { |
| 145 | ($type:ty, $size:literal) => { |
| 146 | impl_byte_slice_type!(from_unsigned, $type, $size); |
| 147 | }; |
| 148 | } |
| 149 | |
| 150 | macro_rules! impl_byte_slice_signed_type { |
| 151 | ($type:ty, $size:literal) => { |
| 152 | impl_byte_slice_type!(from_signed, $type, $size); |
| 153 | }; |
| 154 | } |
| 155 | |
| 156 | impl_byte_slice_size!(u16, 2); |
| 157 | impl_byte_slice_size!(u32, 4); |
| 158 | impl_byte_slice_size!(u64, 8); |
| 159 | impl_byte_slice_size!(u128, 16); |
| 160 | |
| 161 | impl_byte_slice_unsigned_type!(u16, 2); |
| 162 | impl_byte_slice_unsigned_type!(u32, 4); |
| 163 | impl_byte_slice_unsigned_type!(u64, 8); |
| 164 | impl_byte_slice_unsigned_type!(u128, 16); |
| 165 | |
| 166 | impl_byte_slice_signed_type!(i16, 2); |
| 167 | impl_byte_slice_signed_type!(i32, 4); |
| 168 | impl_byte_slice_signed_type!(i64, 8); |
| 169 | impl_byte_slice_signed_type!(i128, 16); |
| 170 | |
| 171 | impl_const_constructors!(u8, 1); |
| 172 | impl_const_constructors!(u16, 2); |
| 173 | impl_const_constructors!(u32, 4); |
| 174 | impl_const_constructors!(u64, 8); |
| 175 | impl_const_constructors!(u128, 16); |
| 176 | |
| 177 | // Note: The f32 and f64 const constructors currently have limited use because |
| 178 | // `f32::to_le_bytes` is not yet const. |
| 179 | |
| 180 | impl_const_constructors!(bool, 1); |
| 181 | |
| 182 | // Safety (based on the safety checklist on the ULE trait): |
| 183 | // 1. u8 does not include any uninitialized or padding bytes. |
| 184 | // 2. u8 is aligned to 1 byte. |
| 185 | // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). |
| 186 | // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). |
| 187 | // 5. The other ULE methods use the default impl. |
| 188 | // 6. u8 byte equality is semantic equality |
| 189 | unsafe impl ULE for u8 { |
| 190 | #[inline ] |
| 191 | fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { |
| 192 | Ok(()) |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | impl AsULE for u8 { |
| 197 | type ULE = Self; |
| 198 | #[inline ] |
| 199 | fn to_unaligned(self) -> Self::ULE { |
| 200 | self |
| 201 | } |
| 202 | #[inline ] |
| 203 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 204 | unaligned |
| 205 | } |
| 206 | } |
| 207 | |
| 208 | // EqULE is true because u8 is its own ULE. |
| 209 | unsafe impl EqULE for u8 {} |
| 210 | |
| 211 | // Safety (based on the safety checklist on the ULE trait): |
| 212 | // 1. NonZeroU8 does not include any uninitialized or padding bytes. |
| 213 | // 2. NonZeroU8 is aligned to 1 byte. |
| 214 | // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00). |
| 215 | // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). |
| 216 | // 5. The other ULE methods use the default impl. |
| 217 | // 6. NonZeroU8 byte equality is semantic equality |
| 218 | unsafe impl ULE for NonZeroU8 { |
| 219 | #[inline ] |
| 220 | fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { |
| 221 | bytes.iter().try_for_each(|b: &u8| { |
| 222 | if *b == 0x00 { |
| 223 | Err(ZeroVecError::parse::<Self>()) |
| 224 | } else { |
| 225 | Ok(()) |
| 226 | } |
| 227 | }) |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | impl AsULE for NonZeroU8 { |
| 232 | type ULE = Self; |
| 233 | #[inline ] |
| 234 | fn to_unaligned(self) -> Self::ULE { |
| 235 | self |
| 236 | } |
| 237 | #[inline ] |
| 238 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 239 | unaligned |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | unsafe impl EqULE for NonZeroU8 {} |
| 244 | |
| 245 | impl NicheBytes<1> for NonZeroU8 { |
| 246 | const NICHE_BIT_PATTERN: [u8; 1] = [0x00]; |
| 247 | } |
| 248 | |
| 249 | // Safety (based on the safety checklist on the ULE trait): |
| 250 | // 1. i8 does not include any uninitialized or padding bytes. |
| 251 | // 2. i8 is aligned to 1 byte. |
| 252 | // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (never). |
| 253 | // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). |
| 254 | // 5. The other ULE methods use the default impl. |
| 255 | // 6. i8 byte equality is semantic equality |
| 256 | unsafe impl ULE for i8 { |
| 257 | #[inline ] |
| 258 | fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> { |
| 259 | Ok(()) |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | impl AsULE for i8 { |
| 264 | type ULE = Self; |
| 265 | #[inline ] |
| 266 | fn to_unaligned(self) -> Self::ULE { |
| 267 | self |
| 268 | } |
| 269 | #[inline ] |
| 270 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 271 | unaligned |
| 272 | } |
| 273 | } |
| 274 | |
| 275 | // EqULE is true because i8 is its own ULE. |
| 276 | unsafe impl EqULE for i8 {} |
| 277 | |
| 278 | impl AsULE for NonZeroI8 { |
| 279 | type ULE = NonZeroU8; |
| 280 | #[inline ] |
| 281 | fn to_unaligned(self) -> Self::ULE { |
| 282 | // Safety: NonZeroU8 and NonZeroI8 have same size |
| 283 | unsafe { core::mem::transmute(self) } |
| 284 | } |
| 285 | |
| 286 | #[inline ] |
| 287 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 288 | // Safety: NonZeroU8 and NonZeroI8 have same size |
| 289 | unsafe { core::mem::transmute(src:unaligned) } |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | // These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation |
| 294 | // on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits |
| 295 | // |
| 296 | // The only potential problem is that some older platforms treat signaling NaNs differently. This is |
| 297 | // still quite portable, signalingness is not typically super important. |
| 298 | |
| 299 | impl AsULE for f32 { |
| 300 | type ULE = RawBytesULE<4>; |
| 301 | #[inline ] |
| 302 | fn to_unaligned(self) -> Self::ULE { |
| 303 | self.to_bits().to_unaligned() |
| 304 | } |
| 305 | #[inline ] |
| 306 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 307 | Self::from_bits(u32::from_unaligned(unaligned)) |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | impl AsULE for f64 { |
| 312 | type ULE = RawBytesULE<8>; |
| 313 | #[inline ] |
| 314 | fn to_unaligned(self) -> Self::ULE { |
| 315 | self.to_bits().to_unaligned() |
| 316 | } |
| 317 | #[inline ] |
| 318 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 319 | Self::from_bits(u64::from_unaligned(unaligned)) |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | // The from_bits documentation mentions that they have identical byte representations to integers |
| 324 | // and EqULE only cares about LE systems |
| 325 | unsafe impl EqULE for f32 {} |
| 326 | unsafe impl EqULE for f64 {} |
| 327 | |
| 328 | // The bool impl is not as efficient as it could be |
| 329 | // We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking |
| 330 | // for better bitpacking |
| 331 | |
| 332 | // Safety (based on the safety checklist on the ULE trait): |
| 333 | // 1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero) |
| 334 | // 2. bool is aligned to 1 byte. |
| 335 | // 3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1). |
| 336 | // 4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never). |
| 337 | // 5. The other ULE methods use the default impl. |
| 338 | // 6. bool byte equality is semantic equality |
| 339 | unsafe impl ULE for bool { |
| 340 | #[inline ] |
| 341 | fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> { |
| 342 | for byte: &u8 in bytes { |
| 343 | // https://doc.rust-lang.org/reference/types/boolean.html |
| 344 | // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1 |
| 345 | if *byte > 1 { |
| 346 | return Err(ZeroVecError::parse::<Self>()); |
| 347 | } |
| 348 | } |
| 349 | Ok(()) |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | impl AsULE for bool { |
| 354 | type ULE = Self; |
| 355 | #[inline ] |
| 356 | fn to_unaligned(self) -> Self::ULE { |
| 357 | self |
| 358 | } |
| 359 | #[inline ] |
| 360 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 361 | unaligned |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | // EqULE is true because bool is its own ULE. |
| 366 | unsafe impl EqULE for bool {} |
| 367 | |