| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use crate::parser::{ParserError, SubtagIterator}; |
| 6 | use crate::shortvec::ShortBoxSlice; |
| 7 | use core::ops::RangeInclusive; |
| 8 | use core::str::FromStr; |
| 9 | use tinystr::TinyAsciiStr; |
| 10 | |
| 11 | /// A value used in a list of [`Keywords`](super::Keywords). |
| 12 | /// |
| 13 | /// The value has to be a sequence of one or more alphanumerical strings |
| 14 | /// separated by `-`. |
| 15 | /// Each part of the sequence has to be no shorter than three characters and no |
| 16 | /// longer than 8. |
| 17 | /// |
| 18 | /// |
| 19 | /// # Examples |
| 20 | /// |
| 21 | /// ``` |
| 22 | /// use icu::locid::extensions::unicode::{value, Value}; |
| 23 | /// use writeable::assert_writeable_eq; |
| 24 | /// |
| 25 | /// assert_writeable_eq!(value!("gregory" ), "gregory" ); |
| 26 | /// assert_writeable_eq!( |
| 27 | /// "islamic-civil" .parse::<Value>().unwrap(), |
| 28 | /// "islamic-civil" |
| 29 | /// ); |
| 30 | /// |
| 31 | /// // The value "true" has the special, empty string representation |
| 32 | /// assert_eq!(value!("true" ).to_string(), "" ); |
| 33 | /// ``` |
| 34 | #[derive (Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)] |
| 35 | pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>); |
| 36 | |
| 37 | const VALUE_LENGTH: RangeInclusive<usize> = 3..=8; |
| 38 | const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true" ); |
| 39 | |
| 40 | impl Value { |
| 41 | /// A constructor which takes a utf8 slice, parses it and |
| 42 | /// produces a well-formed [`Value`]. |
| 43 | /// |
| 44 | /// # Examples |
| 45 | /// |
| 46 | /// ``` |
| 47 | /// use icu::locid::extensions::unicode::Value; |
| 48 | /// |
| 49 | /// Value::try_from_bytes(b"buddhist" ).expect("Parsing failed." ); |
| 50 | /// ``` |
| 51 | pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> { |
| 52 | let mut v = ShortBoxSlice::new(); |
| 53 | |
| 54 | if !input.is_empty() { |
| 55 | for subtag in SubtagIterator::new(input) { |
| 56 | let val = Self::subtag_from_bytes(subtag)?; |
| 57 | if let Some(val) = val { |
| 58 | v.push(val); |
| 59 | } |
| 60 | } |
| 61 | } |
| 62 | Ok(Self(v)) |
| 63 | } |
| 64 | |
| 65 | /// Const constructor for when the value contains only a single subtag. |
| 66 | /// |
| 67 | /// # Examples |
| 68 | /// |
| 69 | /// ``` |
| 70 | /// use icu::locid::extensions::unicode::Value; |
| 71 | /// |
| 72 | /// Value::try_from_single_subtag(b"buddhist" ).expect("valid subtag" ); |
| 73 | /// Value::try_from_single_subtag(b"#####" ).expect_err("invalid subtag" ); |
| 74 | /// Value::try_from_single_subtag(b"foo-bar" ).expect_err("not a single subtag" ); |
| 75 | /// ``` |
| 76 | pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> { |
| 77 | match Self::subtag_from_bytes(subtag) { |
| 78 | Err(_) => Err(ParserError::InvalidExtension), |
| 79 | Ok(option) => Ok(Self::from_tinystr(option)), |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | #[doc (hidden)] |
| 84 | pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] { |
| 85 | &self.0 |
| 86 | } |
| 87 | |
| 88 | #[doc (hidden)] |
| 89 | pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> { |
| 90 | self.0.single() |
| 91 | } |
| 92 | |
| 93 | #[doc (hidden)] |
| 94 | pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self { |
| 95 | match subtag { |
| 96 | None => Self(ShortBoxSlice::new()), |
| 97 | Some(val) => { |
| 98 | debug_assert!(val.is_ascii_alphanumeric()); |
| 99 | debug_assert!(!matches!(val, TRUE_VALUE)); |
| 100 | Self(ShortBoxSlice::new_single(val)) |
| 101 | } |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice<TinyAsciiStr<8>>) -> Self { |
| 106 | Self(input) |
| 107 | } |
| 108 | |
| 109 | #[doc (hidden)] |
| 110 | pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> { |
| 111 | Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len()) |
| 112 | } |
| 113 | |
| 114 | pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> { |
| 115 | Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len()) |
| 116 | } |
| 117 | |
| 118 | pub(crate) const fn parse_subtag_from_bytes_manual_slice( |
| 119 | bytes: &[u8], |
| 120 | start: usize, |
| 121 | end: usize, |
| 122 | ) -> Result<Option<TinyAsciiStr<8>>, ParserError> { |
| 123 | let slice_len = end - start; |
| 124 | if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() { |
| 125 | return Err(ParserError::InvalidExtension); |
| 126 | } |
| 127 | |
| 128 | match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) { |
| 129 | Ok(TRUE_VALUE) => Ok(None), |
| 130 | Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())), |
| 131 | Ok(_) => Err(ParserError::InvalidExtension), |
| 132 | Err(_) => Err(ParserError::InvalidSubtag), |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
| 137 | where |
| 138 | F: FnMut(&str) -> Result<(), E>, |
| 139 | { |
| 140 | self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f) |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | impl FromStr for Value { |
| 145 | type Err = ParserError; |
| 146 | |
| 147 | fn from_str(source: &str) -> Result<Self, Self::Err> { |
| 148 | Self::try_from_bytes(input:source.as_bytes()) |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | impl_writeable_for_subtag_list!(Value, "islamic" , "civil" ); |
| 153 | |
| 154 | /// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag. |
| 155 | /// |
| 156 | /// The macro only supports single-subtag values. |
| 157 | /// |
| 158 | /// # Examples |
| 159 | /// |
| 160 | /// ``` |
| 161 | /// use icu::locid::extensions::unicode::{key, value}; |
| 162 | /// use icu::locid::Locale; |
| 163 | /// |
| 164 | /// let loc: Locale = "de-u-ca-buddhist" .parse().unwrap(); |
| 165 | /// |
| 166 | /// assert_eq!( |
| 167 | /// loc.extensions.unicode.keywords.get(&key!("ca" )), |
| 168 | /// Some(&value!("buddhist" )) |
| 169 | /// ); |
| 170 | /// ``` |
| 171 | /// |
| 172 | /// [`Value`]: crate::extensions::unicode::Value |
| 173 | #[macro_export ] |
| 174 | #[doc (hidden)] |
| 175 | macro_rules! extensions_unicode_value { |
| 176 | ($value:literal) => {{ |
| 177 | // What we want: |
| 178 | // const R: $crate::extensions::unicode::Value = |
| 179 | // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) { |
| 180 | // Ok(r) => r, |
| 181 | // #[allow(clippy::panic)] // const context |
| 182 | // _ => panic!(concat!("Invalid Unicode extension value: ", $value)), |
| 183 | // }; |
| 184 | // Workaround until https://github.com/rust-lang/rust/issues/73255 lands: |
| 185 | const R: $crate::extensions::unicode::Value = |
| 186 | $crate::extensions::unicode::Value::from_tinystr( |
| 187 | match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) { |
| 188 | Ok(r) => r, |
| 189 | _ => panic!(concat!("Invalid Unicode extension value: " , $value)), |
| 190 | }, |
| 191 | ); |
| 192 | R |
| 193 | }}; |
| 194 | } |
| 195 | #[doc (inline)] |
| 196 | pub use extensions_unicode_value as value; |
| 197 | |