| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | macro_rules! impl_tinystr_subtag { |
| 6 | ( |
| 7 | $(#[$doc:meta])* |
| 8 | $name:ident, |
| 9 | $($path:ident)::+, |
| 10 | $macro_name:ident, |
| 11 | $legacy_macro_name:ident, |
| 12 | $len_start:literal..=$len_end:literal, |
| 13 | $tinystr_ident:ident, |
| 14 | $validate:expr, |
| 15 | $normalize:expr, |
| 16 | $is_normalized:expr, |
| 17 | $error:ident, |
| 18 | [$good_example:literal $(,$more_good_examples:literal)*], |
| 19 | [$bad_example:literal $(, $more_bad_examples:literal)*], |
| 20 | ) => { |
| 21 | #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Copy)] |
| 22 | #[cfg_attr(feature = "serde" , derive(serde::Serialize))] |
| 23 | #[repr(transparent)] |
| 24 | $(#[$doc])* |
| 25 | pub struct $name(tinystr::TinyAsciiStr<$len_end>); |
| 26 | |
| 27 | impl $name { |
| 28 | /// A constructor which takes a UTF-8 slice, parses it and |
| 29 | #[doc = concat!("produces a well-formed [`" , stringify!($name), "`]." )] |
| 30 | /// |
| 31 | /// # Examples |
| 32 | /// |
| 33 | /// ``` |
| 34 | #[doc = concat!("use icu_locid::" , stringify!($($path::)+), stringify!($name), ";" )] |
| 35 | /// |
| 36 | #[doc = concat!("assert!(" , stringify!($name), "::try_from_bytes(b" , stringify!($good_example), ").is_ok());" )] |
| 37 | #[doc = concat!("assert!(" , stringify!($name), "::try_from_bytes(b" , stringify!($bad_example), ").is_err());" )] |
| 38 | /// ``` |
| 39 | pub const fn try_from_bytes(v: &[u8]) -> Result<Self, crate::parser::errors::ParserError> { |
| 40 | Self::try_from_bytes_manual_slice(v, 0, v.len()) |
| 41 | } |
| 42 | |
| 43 | /// Equivalent to [`try_from_bytes(bytes[start..end])`](Self::try_from_bytes), |
| 44 | /// but callable in a `const` context (which range indexing is not). |
| 45 | pub const fn try_from_bytes_manual_slice( |
| 46 | v: &[u8], |
| 47 | start: usize, |
| 48 | end: usize, |
| 49 | ) -> Result<Self, crate::parser::errors::ParserError> { |
| 50 | let slen = end - start; |
| 51 | |
| 52 | #[allow(clippy::double_comparisons)] // if len_start == len_end |
| 53 | if slen < $len_start || slen > $len_end { |
| 54 | return Err(crate::parser::errors::ParserError::$error); |
| 55 | } |
| 56 | |
| 57 | match tinystr::TinyAsciiStr::from_bytes_manual_slice(v, start, end) { |
| 58 | Ok($tinystr_ident) if $validate => Ok(Self($normalize)), |
| 59 | _ => Err(crate::parser::errors::ParserError::$error), |
| 60 | } |
| 61 | } |
| 62 | |
| 63 | #[doc = concat!("Safely creates a [`" , stringify!($name), "`] from its raw format" )] |
| 64 | /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`], |
| 65 | /// this constructor only takes normalized values. |
| 66 | pub const fn try_from_raw( |
| 67 | v: [u8; $len_end], |
| 68 | ) -> Result<Self, crate::parser::errors::ParserError> { |
| 69 | if let Ok($tinystr_ident) = tinystr::TinyAsciiStr::<$len_end>::try_from_raw(v) { |
| 70 | if $tinystr_ident.len() >= $len_start && $is_normalized { |
| 71 | Ok(Self($tinystr_ident)) |
| 72 | } else { |
| 73 | Err(crate::parser::errors::ParserError::$error) |
| 74 | } |
| 75 | } else { |
| 76 | Err(crate::parser::errors::ParserError::$error) |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | #[doc = concat!("Unsafely creates a [`" , stringify!($name), "`] from its raw format" )] |
| 81 | /// as returned by [`Self::into_raw`]. Unlike [`Self::try_from_bytes`], |
| 82 | /// this constructor only takes normalized values. |
| 83 | /// |
| 84 | /// # Safety |
| 85 | /// |
| 86 | /// This function is safe iff [`Self::try_from_raw`] returns an `Ok`. This is the case |
| 87 | /// for inputs that are correctly normalized. |
| 88 | pub const unsafe fn from_raw_unchecked(v: [u8; $len_end]) -> Self { |
| 89 | Self(tinystr::TinyAsciiStr::from_bytes_unchecked(v)) |
| 90 | } |
| 91 | |
| 92 | /// Deconstructs into a raw format to be consumed by |
| 93 | /// [`from_raw_unchecked`](Self::from_raw_unchecked()) or |
| 94 | /// [`try_from_raw`](Self::try_from_raw()). |
| 95 | pub const fn into_raw(self) -> [u8; $len_end] { |
| 96 | *self.0.all_bytes() |
| 97 | } |
| 98 | |
| 99 | #[inline] |
| 100 | /// A helper function for displaying as a `&str`. |
| 101 | pub const fn as_str(&self) -> &str { |
| 102 | self.0.as_str() |
| 103 | } |
| 104 | |
| 105 | #[doc(hidden)] |
| 106 | pub const fn into_tinystr(&self) -> tinystr::TinyAsciiStr<$len_end> { |
| 107 | self.0 |
| 108 | } |
| 109 | |
| 110 | /// Compare with BCP-47 bytes. |
| 111 | /// |
| 112 | /// The return value is equivalent to what would happen if you first converted |
| 113 | /// `self` to a BCP-47 string and then performed a byte comparison. |
| 114 | /// |
| 115 | /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
| 116 | /// binary search. The only argument producing [`Ordering::Equal`](core::cmp::Ordering::Equal) |
| 117 | /// is `self.as_str().as_bytes()`. |
| 118 | #[inline] |
| 119 | pub fn strict_cmp(self, other: &[u8]) -> core::cmp::Ordering { |
| 120 | self.as_str().as_bytes().cmp(other) |
| 121 | } |
| 122 | |
| 123 | /// Compare with a potentially unnormalized BCP-47 string. |
| 124 | /// |
| 125 | /// The return value is equivalent to what would happen if you first parsed the |
| 126 | /// BCP-47 string and then performed a structural comparison. |
| 127 | /// |
| 128 | #[inline] |
| 129 | pub fn normalizing_eq(self, other: &str) -> bool { |
| 130 | self.as_str().eq_ignore_ascii_case(other) |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | impl core::str::FromStr for $name { |
| 135 | type Err = crate::parser::errors::ParserError; |
| 136 | |
| 137 | fn from_str(source: &str) -> Result<Self, Self::Err> { |
| 138 | Self::try_from_bytes(source.as_bytes()) |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | impl<'l> From<&'l $name> for &'l str { |
| 143 | fn from(input: &'l $name) -> Self { |
| 144 | input.as_str() |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | impl From<$name> for tinystr::TinyAsciiStr<$len_end> { |
| 149 | fn from(input: $name) -> Self { |
| 150 | input.into_tinystr() |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | impl writeable::Writeable for $name { |
| 155 | #[inline] |
| 156 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
| 157 | sink.write_str(self.as_str()) |
| 158 | } |
| 159 | #[inline] |
| 160 | fn writeable_length_hint(&self) -> writeable::LengthHint { |
| 161 | writeable::LengthHint::exact(self.0.len()) |
| 162 | } |
| 163 | #[inline] |
| 164 | fn write_to_string(&self) -> alloc::borrow::Cow<str> { |
| 165 | alloc::borrow::Cow::Borrowed(self.0.as_str()) |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | writeable::impl_display_with_writeable!($name); |
| 170 | |
| 171 | #[doc = concat!("A macro allowing for compile-time construction of valid [`" , stringify!($name), "`] subtags." )] |
| 172 | /// |
| 173 | /// # Examples |
| 174 | /// |
| 175 | /// Parsing errors don't have to be handled at runtime: |
| 176 | /// ``` |
| 177 | /// assert_eq!( |
| 178 | #[doc = concat!(" icu_locid::" , $(stringify!($path), "::" ,)+ stringify!($macro_name), "!(" , stringify!($good_example) ,")," )] |
| 179 | #[doc = concat!(" " , stringify!($good_example), ".parse::<icu_locid::" , $(stringify!($path), "::" ,)+ stringify!($name), ">().unwrap()" )] |
| 180 | /// ); |
| 181 | /// ``` |
| 182 | /// |
| 183 | /// Invalid input is a compile failure: |
| 184 | /// ```compile_fail,E0080 |
| 185 | #[doc = concat!("icu_locid::" , $(stringify!($path), "::" ,)+ stringify!($macro_name), "!(" , stringify!($bad_example) ,");" )] |
| 186 | /// ``` |
| 187 | /// |
| 188 | #[doc = concat!("[`" , stringify!($name), "`]: crate::" , $(stringify!($path), "::" ,)+ stringify!($name))] |
| 189 | #[macro_export] |
| 190 | #[doc(hidden)] |
| 191 | macro_rules! $legacy_macro_name { |
| 192 | ($string:literal) => {{ |
| 193 | use $crate::$($path ::)+ $name; |
| 194 | const R: $name = |
| 195 | match $name::try_from_bytes($string.as_bytes()) { |
| 196 | Ok(r) => r, |
| 197 | #[allow(clippy::panic)] // const context |
| 198 | _ => panic!(concat!("Invalid " , $(stringify!($path), "::" ,)+ stringify!($name), ": " , $string)), |
| 199 | }; |
| 200 | R |
| 201 | }}; |
| 202 | } |
| 203 | #[doc(inline)] |
| 204 | pub use $legacy_macro_name as $macro_name; |
| 205 | |
| 206 | #[cfg(feature = "databake" )] |
| 207 | impl databake::Bake for $name { |
| 208 | fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { |
| 209 | env.insert("icu_locid" ); |
| 210 | let string = self.as_str(); |
| 211 | databake::quote! { icu_locid::$($path::)+ $macro_name!(#string) } |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | #[test] |
| 216 | fn test_construction() { |
| 217 | let maybe = $name::try_from_bytes($good_example.as_bytes()); |
| 218 | assert!(maybe.is_ok()); |
| 219 | assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw())); |
| 220 | assert_eq!(maybe.unwrap().as_str(), $good_example); |
| 221 | $( |
| 222 | let maybe = $name::try_from_bytes($more_good_examples.as_bytes()); |
| 223 | assert!(maybe.is_ok()); |
| 224 | assert_eq!(maybe, $name::try_from_raw(maybe.unwrap().into_raw())); |
| 225 | assert_eq!(maybe.unwrap().as_str(), $more_good_examples); |
| 226 | )* |
| 227 | assert!($name::try_from_bytes($bad_example.as_bytes()).is_err()); |
| 228 | $( |
| 229 | assert!($name::try_from_bytes($more_bad_examples.as_bytes()).is_err()); |
| 230 | )* |
| 231 | } |
| 232 | |
| 233 | #[test] |
| 234 | fn test_writeable() { |
| 235 | writeable::assert_writeable_eq!(&$good_example.parse::<$name>().unwrap(), $good_example); |
| 236 | $( |
| 237 | writeable::assert_writeable_eq!($more_good_examples.parse::<$name>().unwrap(), $more_good_examples); |
| 238 | )* |
| 239 | } |
| 240 | |
| 241 | #[cfg(feature = "serde" )] |
| 242 | impl<'de> serde::Deserialize<'de> for $name { |
| 243 | fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> |
| 244 | where |
| 245 | D: serde::de::Deserializer<'de>, |
| 246 | { |
| 247 | struct Visitor; |
| 248 | |
| 249 | impl<'de> serde::de::Visitor<'de> for Visitor { |
| 250 | type Value = $name; |
| 251 | |
| 252 | fn expecting( |
| 253 | &self, |
| 254 | formatter: &mut core::fmt::Formatter<'_>, |
| 255 | ) -> core::fmt::Result { |
| 256 | write!(formatter, "a valid BCP-47 {}" , stringify!($name)) |
| 257 | } |
| 258 | |
| 259 | fn visit_str<E: serde::de::Error>(self, s: &str) -> Result<Self::Value, E> { |
| 260 | s.parse().map_err(serde::de::Error::custom) |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | if deserializer.is_human_readable() { |
| 265 | deserializer.deserialize_string(Visitor) |
| 266 | } else { |
| 267 | Self::try_from_raw(serde::de::Deserialize::deserialize(deserializer)?) |
| 268 | .map_err(serde::de::Error::custom) |
| 269 | } |
| 270 | } |
| 271 | } |
| 272 | |
| 273 | // Safety checklist for ULE: |
| 274 | // |
| 275 | // 1. Must not include any uninitialized or padding bytes (true since transparent over a ULE). |
| 276 | // 2. Must have an alignment of 1 byte (true since transparent over a ULE). |
| 277 | // 3. ULE::validate_byte_slice() checks that the given byte slice represents a valid slice. |
| 278 | // 4. ULE::validate_byte_slice() checks that the given byte slice has a valid length. |
| 279 | // 5. All other methods must be left with their default impl. |
| 280 | // 6. Byte equality is semantic equality. |
| 281 | #[cfg(feature = "zerovec" )] |
| 282 | unsafe impl zerovec::ule::ULE for $name { |
| 283 | fn validate_byte_slice(bytes: &[u8]) -> Result<(), zerovec::ZeroVecError> { |
| 284 | let it = bytes.chunks_exact(core::mem::size_of::<Self>()); |
| 285 | if !it.remainder().is_empty() { |
| 286 | return Err(zerovec::ZeroVecError::length::<Self>(bytes.len())); |
| 287 | } |
| 288 | for v in it { |
| 289 | // The following can be removed once `array_chunks` is stabilized. |
| 290 | let mut a = [0; core::mem::size_of::<Self>()]; |
| 291 | a.copy_from_slice(v); |
| 292 | if Self::try_from_raw(a).is_err() { |
| 293 | return Err(zerovec::ZeroVecError::parse::<Self>()); |
| 294 | } |
| 295 | } |
| 296 | Ok(()) |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | #[cfg(feature = "zerovec" )] |
| 301 | impl zerovec::ule::AsULE for $name { |
| 302 | type ULE = Self; |
| 303 | fn to_unaligned(self) -> Self::ULE { |
| 304 | self |
| 305 | } |
| 306 | fn from_unaligned(unaligned: Self::ULE) -> Self { |
| 307 | unaligned |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | #[cfg(feature = "zerovec" )] |
| 312 | impl<'a> zerovec::maps::ZeroMapKV<'a> for $name { |
| 313 | type Container = zerovec::ZeroVec<'a, $name>; |
| 314 | type Slice = zerovec::ZeroSlice<$name>; |
| 315 | type GetType = $name; |
| 316 | type OwnedType = $name; |
| 317 | } |
| 318 | }; |
| 319 | } |
| 320 | |
| 321 | macro_rules! impl_writeable_for_each_subtag_str_no_test { |
| 322 | ($type:tt $(, $self:ident, $borrow_cond:expr => $borrow:expr)?) => { |
| 323 | impl writeable::Writeable for $type { |
| 324 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
| 325 | let mut initial = true; |
| 326 | self.for_each_subtag_str(&mut |subtag| { |
| 327 | if initial { |
| 328 | initial = false; |
| 329 | } else { |
| 330 | sink.write_char('-' )?; |
| 331 | } |
| 332 | sink.write_str(subtag) |
| 333 | }) |
| 334 | } |
| 335 | |
| 336 | #[inline] |
| 337 | fn writeable_length_hint(&self) -> writeable::LengthHint { |
| 338 | let mut result = writeable::LengthHint::exact(0); |
| 339 | let mut initial = true; |
| 340 | self.for_each_subtag_str::<core::convert::Infallible, _>(&mut |subtag| { |
| 341 | if initial { |
| 342 | initial = false; |
| 343 | } else { |
| 344 | result += 1; |
| 345 | } |
| 346 | result += subtag.len(); |
| 347 | Ok(()) |
| 348 | }) |
| 349 | .expect("infallible" ); |
| 350 | result |
| 351 | } |
| 352 | |
| 353 | $( |
| 354 | fn write_to_string(&self) -> alloc::borrow::Cow<str> { |
| 355 | #[allow(clippy::unwrap_used)] // impl_writeable_for_subtag_list's $borrow uses unwrap |
| 356 | let $self = self; |
| 357 | if $borrow_cond { |
| 358 | $borrow |
| 359 | } else { |
| 360 | let mut output = alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); |
| 361 | let _ = self.write_to(&mut output); |
| 362 | alloc::borrow::Cow::Owned(output) |
| 363 | } |
| 364 | } |
| 365 | )? |
| 366 | } |
| 367 | |
| 368 | writeable::impl_display_with_writeable!($type); |
| 369 | }; |
| 370 | } |
| 371 | |
| 372 | macro_rules! impl_writeable_for_subtag_list { |
| 373 | ($type:tt, $sample1:literal, $sample2:literal) => { |
| 374 | impl_writeable_for_each_subtag_str_no_test!($type, selff, selff.0.len() == 1 => alloc::borrow::Cow::Borrowed(selff.0.get(0).unwrap().as_str())); |
| 375 | |
| 376 | #[test] |
| 377 | fn test_writeable() { |
| 378 | writeable::assert_writeable_eq!(&$type::default(), "" ); |
| 379 | writeable::assert_writeable_eq!( |
| 380 | &$type::from_short_slice_unchecked(alloc::vec![$sample1.parse().unwrap()].into()), |
| 381 | $sample1, |
| 382 | ); |
| 383 | writeable::assert_writeable_eq!( |
| 384 | &$type::from_short_slice_unchecked(vec![ |
| 385 | $sample1.parse().unwrap(), |
| 386 | $sample2.parse().unwrap() |
| 387 | ].into()), |
| 388 | core::concat!($sample1, "-" , $sample2), |
| 389 | ); |
| 390 | } |
| 391 | }; |
| 392 | } |
| 393 | |
| 394 | macro_rules! impl_writeable_for_key_value { |
| 395 | ($type:tt, $key1:literal, $value1:literal, $key2:literal, $expected2:literal) => { |
| 396 | impl_writeable_for_each_subtag_str_no_test!($type); |
| 397 | |
| 398 | #[test] |
| 399 | fn test_writeable() { |
| 400 | writeable::assert_writeable_eq!(&$type::default(), "" ); |
| 401 | writeable::assert_writeable_eq!( |
| 402 | &$type::from_tuple_vec(vec![($key1.parse().unwrap(), $value1.parse().unwrap())]), |
| 403 | core::concat!($key1, "-" , $value1), |
| 404 | ); |
| 405 | writeable::assert_writeable_eq!( |
| 406 | &$type::from_tuple_vec(vec![ |
| 407 | ($key1.parse().unwrap(), $value1.parse().unwrap()), |
| 408 | ($key2.parse().unwrap(), "true" .parse().unwrap()) |
| 409 | ]), |
| 410 | core::concat!($key1, "-" , $value1, "-" , $expected2), |
| 411 | ); |
| 412 | } |
| 413 | }; |
| 414 | } |
| 415 | |