1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! Unicode Extensions provide information about user preferences in a given locale. |
6 | //! |
7 | //! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and |
8 | //! [`Attributes`]. |
9 | //! |
10 | //! |
11 | //! # Examples |
12 | //! |
13 | //! ``` |
14 | //! use icu::locid::extensions::unicode::{attribute, key, value, Unicode}; |
15 | //! use icu::locid::Locale; |
16 | //! |
17 | //! let loc: Locale = "en-US-u-foobar-hc-h12" .parse().expect("Parsing failed." ); |
18 | //! |
19 | //! assert_eq!( |
20 | //! loc.extensions.unicode.keywords.get(&key!("hc" )), |
21 | //! Some(&value!("h12" )) |
22 | //! ); |
23 | //! assert!(loc |
24 | //! .extensions |
25 | //! .unicode |
26 | //! .attributes |
27 | //! .contains(&attribute!("foobar" ))); |
28 | //! ``` |
29 | mod attribute; |
30 | mod attributes; |
31 | mod key; |
32 | mod keywords; |
33 | mod value; |
34 | |
35 | #[doc (inline)] |
36 | pub use attribute::{attribute, Attribute}; |
37 | pub use attributes::Attributes; |
38 | #[doc (inline)] |
39 | pub use key::{key, Key}; |
40 | pub use keywords::Keywords; |
41 | #[doc (inline)] |
42 | pub use value::{value, Value}; |
43 | |
44 | use crate::helpers::ShortSlice; |
45 | use crate::parser::ParserError; |
46 | use crate::parser::SubtagIterator; |
47 | use litemap::LiteMap; |
48 | |
49 | /// Unicode Extensions provide information about user preferences in a given locale. |
50 | /// |
51 | /// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale |
52 | /// Identifier`] specification. |
53 | /// |
54 | /// Unicode extensions provide subtags that specify language and/or locale-based behavior |
55 | /// or refinements to language tags, according to work done by the Unicode Consortium. |
56 | /// (See [`RFC 6067`] for details). |
57 | /// |
58 | /// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension |
59 | /// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt |
60 | /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier |
61 | /// |
62 | /// # Examples |
63 | /// |
64 | /// ``` |
65 | /// use icu::locid::extensions::unicode::{key, value}; |
66 | /// use icu::locid::Locale; |
67 | /// |
68 | /// let loc: Locale = |
69 | /// "de-u-hc-h12-ca-buddhist" .parse().expect("Parsing failed." ); |
70 | /// |
71 | /// assert_eq!( |
72 | /// loc.extensions.unicode.keywords.get(&key!("ca" )), |
73 | /// Some(&value!("buddhist" )) |
74 | /// ); |
75 | /// ``` |
76 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] |
77 | #[allow (clippy::exhaustive_structs)] // spec-backed stable datastructure |
78 | pub struct Unicode { |
79 | /// The key-value pairs present in this locale extension, with each extension key subtag |
80 | /// associated to its provided value subtag. |
81 | pub keywords: Keywords, |
82 | /// A canonically ordered sequence of single standalone subtags for this locale extension. |
83 | pub attributes: Attributes, |
84 | } |
85 | |
86 | impl Unicode { |
87 | /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`. |
88 | /// |
89 | /// # Examples |
90 | /// |
91 | /// ``` |
92 | /// use icu::locid::extensions::unicode::Unicode; |
93 | /// |
94 | /// assert_eq!(Unicode::new(), Unicode::default()); |
95 | /// ``` |
96 | #[inline ] |
97 | pub const fn new() -> Self { |
98 | Self { |
99 | keywords: Keywords::new(), |
100 | attributes: Attributes::new(), |
101 | } |
102 | } |
103 | |
104 | /// Returns [`true`] if there list of keywords and attributes is empty. |
105 | /// |
106 | /// # Examples |
107 | /// |
108 | /// ``` |
109 | /// use icu::locid::Locale; |
110 | /// |
111 | /// let loc: Locale = "en-US-u-foo" .parse().expect("Parsing failed." ); |
112 | /// |
113 | /// assert!(!loc.extensions.unicode.is_empty()); |
114 | /// ``` |
115 | pub fn is_empty(&self) -> bool { |
116 | self.keywords.is_empty() && self.attributes.is_empty() |
117 | } |
118 | |
119 | /// Clears all Unicode extension keywords and attributes, effectively removing |
120 | /// the Unicode extension. |
121 | /// |
122 | /// # Example |
123 | /// |
124 | /// ``` |
125 | /// use icu::locid::Locale; |
126 | /// |
127 | /// let mut loc: Locale = |
128 | /// "und-t-mul-u-hello-ca-buddhist-hc-h12" .parse().unwrap(); |
129 | /// loc.extensions.unicode.clear(); |
130 | /// assert_eq!(loc, "und-t-mul" .parse().unwrap()); |
131 | /// ``` |
132 | pub fn clear(&mut self) { |
133 | self.keywords.clear(); |
134 | self.attributes.clear(); |
135 | } |
136 | |
137 | pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { |
138 | let mut attributes = ShortSlice::new(); |
139 | |
140 | while let Some(subtag) = iter.peek() { |
141 | if let Ok(attr) = Attribute::try_from_bytes(subtag) { |
142 | if let Err(idx) = attributes.binary_search(&attr) { |
143 | attributes.insert(idx, attr); |
144 | } |
145 | } else { |
146 | break; |
147 | } |
148 | iter.next(); |
149 | } |
150 | |
151 | let mut keywords = LiteMap::new(); |
152 | |
153 | let mut current_keyword = None; |
154 | let mut current_value = ShortSlice::new(); |
155 | |
156 | while let Some(subtag) = iter.peek() { |
157 | let slen = subtag.len(); |
158 | if slen == 2 { |
159 | if let Some(kw) = current_keyword.take() { |
160 | keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); |
161 | current_value = ShortSlice::new(); |
162 | } |
163 | current_keyword = Some(Key::try_from_bytes(subtag)?); |
164 | } else if current_keyword.is_some() { |
165 | match Value::parse_subtag(subtag) { |
166 | Ok(Some(t)) => current_value.push(t), |
167 | Ok(None) => {} |
168 | Err(_) => break, |
169 | } |
170 | } else { |
171 | break; |
172 | } |
173 | iter.next(); |
174 | } |
175 | |
176 | if let Some(kw) = current_keyword.take() { |
177 | keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); |
178 | } |
179 | |
180 | // Ensure we've defined at least one attribute or keyword |
181 | if attributes.is_empty() && keywords.is_empty() { |
182 | return Err(ParserError::InvalidExtension); |
183 | } |
184 | |
185 | Ok(Self { |
186 | keywords: keywords.into(), |
187 | attributes: Attributes::from_short_slice_unchecked(attributes), |
188 | }) |
189 | } |
190 | |
191 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
192 | where |
193 | F: FnMut(&str) -> Result<(), E>, |
194 | { |
195 | if self.is_empty() { |
196 | return Ok(()); |
197 | } |
198 | f("u" )?; |
199 | self.attributes.for_each_subtag_str(f)?; |
200 | self.keywords.for_each_subtag_str(f)?; |
201 | Ok(()) |
202 | } |
203 | } |
204 | |
205 | writeable::impl_display_with_writeable!(Unicode); |
206 | |
207 | impl writeable::Writeable for Unicode { |
208 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
209 | if self.is_empty() { |
210 | return Ok(()); |
211 | } |
212 | sink.write_str("u" )?; |
213 | if !self.attributes.is_empty() { |
214 | sink.write_char('-' )?; |
215 | writeable::Writeable::write_to(&self.attributes, sink)?; |
216 | } |
217 | if !self.keywords.is_empty() { |
218 | sink.write_char('-' )?; |
219 | writeable::Writeable::write_to(&self.keywords, sink)?; |
220 | } |
221 | Ok(()) |
222 | } |
223 | |
224 | fn writeable_length_hint(&self) -> writeable::LengthHint { |
225 | if self.is_empty() { |
226 | return writeable::LengthHint::exact(0); |
227 | } |
228 | let mut result = writeable::LengthHint::exact(1); |
229 | if !self.attributes.is_empty() { |
230 | result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1; |
231 | } |
232 | if !self.keywords.is_empty() { |
233 | result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1; |
234 | } |
235 | result |
236 | } |
237 | } |
238 | |