1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::helpers::ShortSlice; |
6 | use crate::parser::{ParserError, SubtagIterator}; |
7 | use core::ops::RangeInclusive; |
8 | use core::str::FromStr; |
9 | use tinystr::TinyAsciiStr; |
10 | |
11 | /// A value used in a list of [`Keywords`](super::Keywords). |
12 | /// |
13 | /// The value has to be a sequence of one or more alphanumerical strings |
14 | /// separated by `-`. |
15 | /// Each part of the sequence has to be no shorter than three characters and no |
16 | /// longer than 8. |
17 | /// |
18 | /// |
19 | /// # Examples |
20 | /// |
21 | /// ``` |
22 | /// use icu::locid::extensions::unicode::{value, Value}; |
23 | /// use writeable::assert_writeable_eq; |
24 | /// |
25 | /// assert_writeable_eq!(value!("gregory" ), "gregory" ); |
26 | /// assert_writeable_eq!( |
27 | /// "islamic-civil" .parse::<Value>().unwrap(), |
28 | /// "islamic-civil" |
29 | /// ); |
30 | /// |
31 | /// // The value "true" has the special, empty string representation |
32 | /// assert_eq!(value!("true" ).to_string(), "" ); |
33 | /// ``` |
34 | #[derive (Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)] |
35 | pub struct Value(ShortSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>); |
36 | |
37 | const VALUE_LENGTH: RangeInclusive<usize> = 3..=8; |
38 | const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true" ); |
39 | |
40 | impl Value { |
41 | /// A constructor which takes a utf8 slice, parses it and |
42 | /// produces a well-formed [`Value`]. |
43 | /// |
44 | /// # Examples |
45 | /// |
46 | /// ``` |
47 | /// use icu::locid::extensions::unicode::Value; |
48 | /// |
49 | /// Value::try_from_bytes(b"buddhist" ).expect("Parsing failed." ); |
50 | /// ``` |
51 | pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> { |
52 | let mut v = ShortSlice::new(); |
53 | |
54 | if !input.is_empty() { |
55 | for subtag in SubtagIterator::new(input) { |
56 | let val = Self::subtag_from_bytes(subtag)?; |
57 | if let Some(val) = val { |
58 | v.push(val); |
59 | } |
60 | } |
61 | } |
62 | Ok(Self(v)) |
63 | } |
64 | |
65 | /// Const constructor for when the value contains only a single subtag. |
66 | /// |
67 | /// # Examples |
68 | /// |
69 | /// ``` |
70 | /// use icu::locid::extensions::unicode::Value; |
71 | /// |
72 | /// Value::try_from_single_subtag(b"buddhist" ).expect("valid subtag" ); |
73 | /// Value::try_from_single_subtag(b"#####" ).expect_err("invalid subtag" ); |
74 | /// Value::try_from_single_subtag(b"foo-bar" ).expect_err("not a single subtag" ); |
75 | /// ``` |
76 | pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> { |
77 | match Self::subtag_from_bytes(subtag) { |
78 | Err(_) => Err(ParserError::InvalidExtension), |
79 | Ok(option) => Ok(Self::from_tinystr(option)), |
80 | } |
81 | } |
82 | |
83 | #[doc (hidden)] |
84 | pub fn as_tinystr_slice(&self) -> &[TinyAsciiStr<8>] { |
85 | &self.0 |
86 | } |
87 | |
88 | #[doc (hidden)] |
89 | pub const fn as_single_subtag(&self) -> Option<&TinyAsciiStr<8>> { |
90 | self.0.single() |
91 | } |
92 | |
93 | #[doc (hidden)] |
94 | pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self { |
95 | match subtag { |
96 | None => Self(ShortSlice::new()), |
97 | Some(val) => { |
98 | debug_assert!(val.is_ascii_alphanumeric()); |
99 | debug_assert!(!matches!(val, TRUE_VALUE)); |
100 | Self(ShortSlice::new_single(val)) |
101 | } |
102 | } |
103 | } |
104 | |
105 | pub(crate) fn from_short_slice_unchecked(input: ShortSlice<TinyAsciiStr<8>>) -> Self { |
106 | Self(input) |
107 | } |
108 | |
109 | #[doc (hidden)] |
110 | pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> { |
111 | Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len()) |
112 | } |
113 | |
114 | pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> { |
115 | Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len()) |
116 | } |
117 | |
118 | pub(crate) const fn parse_subtag_from_bytes_manual_slice( |
119 | bytes: &[u8], |
120 | start: usize, |
121 | end: usize, |
122 | ) -> Result<Option<TinyAsciiStr<8>>, ParserError> { |
123 | let slice_len = end - start; |
124 | if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() { |
125 | return Err(ParserError::InvalidExtension); |
126 | } |
127 | |
128 | match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) { |
129 | Ok(TRUE_VALUE) => Ok(None), |
130 | Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())), |
131 | Ok(_) => Err(ParserError::InvalidExtension), |
132 | Err(_) => Err(ParserError::InvalidSubtag), |
133 | } |
134 | } |
135 | |
136 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
137 | where |
138 | F: FnMut(&str) -> Result<(), E>, |
139 | { |
140 | self.0.iter().map(TinyAsciiStr::as_str).try_for_each(f) |
141 | } |
142 | } |
143 | |
144 | impl FromStr for Value { |
145 | type Err = ParserError; |
146 | |
147 | fn from_str(source: &str) -> Result<Self, Self::Err> { |
148 | Self::try_from_bytes(input:source.as_bytes()) |
149 | } |
150 | } |
151 | |
152 | impl_writeable_for_subtag_list!(Value, "islamic" , "civil" ); |
153 | |
154 | /// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag. |
155 | /// |
156 | /// The macro only supports single-subtag values. |
157 | /// |
158 | /// # Examples |
159 | /// |
160 | /// ``` |
161 | /// use icu::locid::extensions::unicode::{key, value}; |
162 | /// use icu::locid::Locale; |
163 | /// |
164 | /// let loc: Locale = "de-u-ca-buddhist" .parse().unwrap(); |
165 | /// |
166 | /// assert_eq!( |
167 | /// loc.extensions.unicode.keywords.get(&key!("ca" )), |
168 | /// Some(&value!("buddhist" )) |
169 | /// ); |
170 | /// ``` |
171 | /// |
172 | /// [`Value`]: crate::extensions::unicode::Value |
173 | #[macro_export ] |
174 | #[doc (hidden)] |
175 | macro_rules! extensions_unicode_value { |
176 | ($value:literal) => {{ |
177 | // What we want: |
178 | // const R: $crate::extensions::unicode::Value = |
179 | // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) { |
180 | // Ok(r) => r, |
181 | // #[allow(clippy::panic)] // const context |
182 | // _ => panic!(concat!("Invalid Unicode extension value: ", $value)), |
183 | // }; |
184 | // Workaround until https://github.com/rust-lang/rust/issues/73255 lands: |
185 | const R: $crate::extensions::unicode::Value = |
186 | $crate::extensions::unicode::Value::from_tinystr( |
187 | match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) { |
188 | Ok(r) => r, |
189 | _ => panic!(concat!("Invalid Unicode extension value: " , $value)), |
190 | }, |
191 | ); |
192 | R |
193 | }}; |
194 | } |
195 | #[doc (inline)] |
196 | pub use extensions_unicode_value as value; |
197 | |