1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! Unicode Extensions provide information about user preferences in a given locale. |
6 | //! |
7 | //! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and |
8 | //! [`Attributes`]. |
9 | //! |
10 | //! |
11 | //! # Examples |
12 | //! |
13 | //! ``` |
14 | //! use icu::locid::extensions::unicode::{attribute, key, value, Unicode}; |
15 | //! use icu::locid::Locale; |
16 | //! |
17 | //! let loc: Locale = "en-US-u-foobar-hc-h12" .parse().expect("Parsing failed." ); |
18 | //! |
19 | //! assert_eq!( |
20 | //! loc.extensions.unicode.keywords.get(&key!("hc" )), |
21 | //! Some(&value!("h12" )) |
22 | //! ); |
23 | //! assert!(loc |
24 | //! .extensions |
25 | //! .unicode |
26 | //! .attributes |
27 | //! .contains(&attribute!("foobar" ))); |
28 | //! ``` |
29 | mod attribute; |
30 | mod attributes; |
31 | mod key; |
32 | mod keywords; |
33 | mod value; |
34 | |
35 | use core::cmp::Ordering; |
36 | |
37 | #[doc (inline)] |
38 | pub use attribute::{attribute, Attribute}; |
39 | pub use attributes::Attributes; |
40 | #[doc (inline)] |
41 | pub use key::{key, Key}; |
42 | pub use keywords::Keywords; |
43 | #[doc (inline)] |
44 | pub use value::{value, Value}; |
45 | |
46 | use crate::parser::ParserError; |
47 | use crate::parser::SubtagIterator; |
48 | use crate::shortvec::ShortBoxSlice; |
49 | use litemap::LiteMap; |
50 | |
51 | /// Unicode Extensions provide information about user preferences in a given locale. |
52 | /// |
53 | /// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale |
54 | /// Identifier`] specification. |
55 | /// |
56 | /// Unicode extensions provide subtags that specify language and/or locale-based behavior |
57 | /// or refinements to language tags, according to work done by the Unicode Consortium. |
58 | /// (See [`RFC 6067`] for details). |
59 | /// |
60 | /// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension |
61 | /// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt |
62 | /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier |
63 | /// |
64 | /// # Examples |
65 | /// |
66 | /// ``` |
67 | /// use icu::locid::extensions::unicode::{key, value}; |
68 | /// use icu::locid::Locale; |
69 | /// |
70 | /// let loc: Locale = |
71 | /// "de-u-hc-h12-ca-buddhist" .parse().expect("Parsing failed." ); |
72 | /// |
73 | /// assert_eq!( |
74 | /// loc.extensions.unicode.keywords.get(&key!("ca" )), |
75 | /// Some(&value!("buddhist" )) |
76 | /// ); |
77 | /// ``` |
78 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] |
79 | #[allow (clippy::exhaustive_structs)] // spec-backed stable datastructure |
80 | pub struct Unicode { |
81 | /// The key-value pairs present in this locale extension, with each extension key subtag |
82 | /// associated to its provided value subtag. |
83 | pub keywords: Keywords, |
84 | /// A canonically ordered sequence of single standalone subtags for this locale extension. |
85 | pub attributes: Attributes, |
86 | } |
87 | |
88 | impl Unicode { |
89 | /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`. |
90 | /// |
91 | /// # Examples |
92 | /// |
93 | /// ``` |
94 | /// use icu::locid::extensions::unicode::Unicode; |
95 | /// |
96 | /// assert_eq!(Unicode::new(), Unicode::default()); |
97 | /// ``` |
98 | #[inline ] |
99 | pub const fn new() -> Self { |
100 | Self { |
101 | keywords: Keywords::new(), |
102 | attributes: Attributes::new(), |
103 | } |
104 | } |
105 | |
106 | /// Returns [`true`] if there list of keywords and attributes is empty. |
107 | /// |
108 | /// # Examples |
109 | /// |
110 | /// ``` |
111 | /// use icu::locid::Locale; |
112 | /// |
113 | /// let loc: Locale = "en-US-u-foo" .parse().expect("Parsing failed." ); |
114 | /// |
115 | /// assert!(!loc.extensions.unicode.is_empty()); |
116 | /// ``` |
117 | pub fn is_empty(&self) -> bool { |
118 | self.keywords.is_empty() && self.attributes.is_empty() |
119 | } |
120 | |
121 | /// Clears all Unicode extension keywords and attributes, effectively removing |
122 | /// the Unicode extension. |
123 | /// |
124 | /// # Example |
125 | /// |
126 | /// ``` |
127 | /// use icu::locid::Locale; |
128 | /// |
129 | /// let mut loc: Locale = |
130 | /// "und-t-mul-u-hello-ca-buddhist-hc-h12" .parse().unwrap(); |
131 | /// loc.extensions.unicode.clear(); |
132 | /// assert_eq!(loc, "und-t-mul" .parse().unwrap()); |
133 | /// ``` |
134 | pub fn clear(&mut self) { |
135 | self.keywords.clear(); |
136 | self.attributes.clear(); |
137 | } |
138 | |
139 | pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) { |
140 | (&self.attributes, &self.keywords) |
141 | } |
142 | |
143 | /// Returns an ordering suitable for use in [`BTreeSet`]. |
144 | /// |
145 | /// The ordering may or may not be equivalent to string ordering, and it |
146 | /// may or may not be stable across ICU4X releases. |
147 | /// |
148 | /// [`BTreeSet`]: alloc::collections::BTreeSet |
149 | pub fn total_cmp(&self, other: &Self) -> Ordering { |
150 | self.as_tuple().cmp(&other.as_tuple()) |
151 | } |
152 | |
153 | pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { |
154 | let mut attributes = ShortBoxSlice::new(); |
155 | |
156 | while let Some(subtag) = iter.peek() { |
157 | if let Ok(attr) = Attribute::try_from_bytes(subtag) { |
158 | if let Err(idx) = attributes.binary_search(&attr) { |
159 | attributes.insert(idx, attr); |
160 | } |
161 | } else { |
162 | break; |
163 | } |
164 | iter.next(); |
165 | } |
166 | |
167 | let mut keywords = LiteMap::new(); |
168 | |
169 | let mut current_keyword = None; |
170 | let mut current_value = ShortBoxSlice::new(); |
171 | |
172 | while let Some(subtag) = iter.peek() { |
173 | let slen = subtag.len(); |
174 | if slen == 2 { |
175 | if let Some(kw) = current_keyword.take() { |
176 | keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); |
177 | current_value = ShortBoxSlice::new(); |
178 | } |
179 | current_keyword = Some(Key::try_from_bytes(subtag)?); |
180 | } else if current_keyword.is_some() { |
181 | match Value::parse_subtag(subtag) { |
182 | Ok(Some(t)) => current_value.push(t), |
183 | Ok(None) => {} |
184 | Err(_) => break, |
185 | } |
186 | } else { |
187 | break; |
188 | } |
189 | iter.next(); |
190 | } |
191 | |
192 | if let Some(kw) = current_keyword.take() { |
193 | keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); |
194 | } |
195 | |
196 | // Ensure we've defined at least one attribute or keyword |
197 | if attributes.is_empty() && keywords.is_empty() { |
198 | return Err(ParserError::InvalidExtension); |
199 | } |
200 | |
201 | Ok(Self { |
202 | keywords: keywords.into(), |
203 | attributes: Attributes::from_short_slice_unchecked(attributes), |
204 | }) |
205 | } |
206 | |
207 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
208 | where |
209 | F: FnMut(&str) -> Result<(), E>, |
210 | { |
211 | if self.is_empty() { |
212 | return Ok(()); |
213 | } |
214 | f("u" )?; |
215 | self.attributes.for_each_subtag_str(f)?; |
216 | self.keywords.for_each_subtag_str(f)?; |
217 | Ok(()) |
218 | } |
219 | } |
220 | |
221 | writeable::impl_display_with_writeable!(Unicode); |
222 | |
223 | impl writeable::Writeable for Unicode { |
224 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
225 | if self.is_empty() { |
226 | return Ok(()); |
227 | } |
228 | sink.write_str("u" )?; |
229 | if !self.attributes.is_empty() { |
230 | sink.write_char('-' )?; |
231 | writeable::Writeable::write_to(&self.attributes, sink)?; |
232 | } |
233 | if !self.keywords.is_empty() { |
234 | sink.write_char('-' )?; |
235 | writeable::Writeable::write_to(&self.keywords, sink)?; |
236 | } |
237 | Ok(()) |
238 | } |
239 | |
240 | fn writeable_length_hint(&self) -> writeable::LengthHint { |
241 | if self.is_empty() { |
242 | return writeable::LengthHint::exact(0); |
243 | } |
244 | let mut result = writeable::LengthHint::exact(1); |
245 | if !self.attributes.is_empty() { |
246 | result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1; |
247 | } |
248 | if !self.keywords.is_empty() { |
249 | result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1; |
250 | } |
251 | result |
252 | } |
253 | } |
254 | |