1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use core::borrow::Borrow; |
6 | use core::cmp::Ordering; |
7 | use core::iter::FromIterator; |
8 | use litemap::LiteMap; |
9 | use writeable::Writeable; |
10 | |
11 | use super::Key; |
12 | use super::Value; |
13 | #[allow (deprecated)] |
14 | use crate::ordering::SubtagOrderingResult; |
15 | use crate::shortvec::ShortBoxSlice; |
16 | |
17 | /// A list of [`Key`]-[`Value`] pairs representing functional information |
18 | /// about locale's internationalization preferences. |
19 | /// |
20 | /// Here are examples of fields used in Unicode: |
21 | /// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`) |
22 | /// - `ca` - Calendar (`buddhist`, `gregory`, ...) |
23 | /// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...) |
24 | /// |
25 | /// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML. |
26 | /// |
27 | /// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_ |
28 | /// |
29 | /// # Examples |
30 | /// |
31 | /// Manually build up a [`Keywords`] object: |
32 | /// |
33 | /// ``` |
34 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
35 | /// |
36 | /// let keywords = [(key!("hc" ), value!("h23" ))] |
37 | /// .into_iter() |
38 | /// .collect::<Keywords>(); |
39 | /// |
40 | /// assert_eq!(&keywords.to_string(), "hc-h23" ); |
41 | /// ``` |
42 | /// |
43 | /// Access a [`Keywords`] object from a [`Locale`]: |
44 | /// |
45 | /// ``` |
46 | /// use icu::locid::{ |
47 | /// extensions::unicode::{key, value}, |
48 | /// Locale, |
49 | /// }; |
50 | /// |
51 | /// let loc: Locale = "und-u-hc-h23-kc-true" .parse().expect("Valid BCP-47" ); |
52 | /// |
53 | /// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca" )), None); |
54 | /// assert_eq!( |
55 | /// loc.extensions.unicode.keywords.get(&key!("hc" )), |
56 | /// Some(&value!("h23" )) |
57 | /// ); |
58 | /// assert_eq!( |
59 | /// loc.extensions.unicode.keywords.get(&key!("kc" )), |
60 | /// Some(&value!("true" )) |
61 | /// ); |
62 | /// |
63 | /// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc" ); |
64 | /// ``` |
65 | /// |
66 | /// [`Locale`]: crate::Locale |
67 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] |
68 | pub struct Keywords(LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>); |
69 | |
70 | impl Keywords { |
71 | /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. |
72 | /// |
73 | /// # Examples |
74 | /// |
75 | /// ``` |
76 | /// use icu::locid::extensions::unicode::Keywords; |
77 | /// |
78 | /// assert_eq!(Keywords::new(), Keywords::default()); |
79 | /// ``` |
80 | #[inline ] |
81 | pub const fn new() -> Self { |
82 | Self(LiteMap::new()) |
83 | } |
84 | |
85 | /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context. |
86 | #[inline ] |
87 | pub const fn new_single(key: Key, value: Value) -> Self { |
88 | Self(LiteMap::from_sorted_store_unchecked( |
89 | ShortBoxSlice::new_single((key, value)), |
90 | )) |
91 | } |
92 | |
93 | /// Returns `true` if there are no keywords. |
94 | /// |
95 | /// # Examples |
96 | /// |
97 | /// ``` |
98 | /// use icu::locid::locale; |
99 | /// use icu::locid::Locale; |
100 | /// |
101 | /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid" ).unwrap(); |
102 | /// let loc2 = locale!("und-u-ca-buddhist" ); |
103 | /// |
104 | /// assert!(loc1.extensions.unicode.keywords.is_empty()); |
105 | /// assert!(!loc2.extensions.unicode.keywords.is_empty()); |
106 | /// ``` |
107 | pub fn is_empty(&self) -> bool { |
108 | self.0.is_empty() |
109 | } |
110 | |
111 | /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. |
112 | /// |
113 | /// |
114 | /// # Examples |
115 | /// |
116 | /// ``` |
117 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
118 | /// |
119 | /// let keywords = [(key!("ca" ), value!("gregory" ))] |
120 | /// .into_iter() |
121 | /// .collect::<Keywords>(); |
122 | /// |
123 | /// assert!(&keywords.contains_key(&key!("ca" ))); |
124 | /// ``` |
125 | pub fn contains_key<Q>(&self, key: &Q) -> bool |
126 | where |
127 | Key: Borrow<Q>, |
128 | Q: Ord, |
129 | { |
130 | self.0.contains_key(key) |
131 | } |
132 | |
133 | /// Returns a reference to the [`Value`] corresponding to the [`Key`]. |
134 | /// |
135 | /// |
136 | /// # Examples |
137 | /// |
138 | /// ``` |
139 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
140 | /// |
141 | /// let keywords = [(key!("ca" ), value!("buddhist" ))] |
142 | /// .into_iter() |
143 | /// .collect::<Keywords>(); |
144 | /// |
145 | /// assert_eq!(keywords.get(&key!("ca" )), Some(&value!("buddhist" ))); |
146 | /// ``` |
147 | pub fn get<Q>(&self, key: &Q) -> Option<&Value> |
148 | where |
149 | Key: Borrow<Q>, |
150 | Q: Ord, |
151 | { |
152 | self.0.get(key) |
153 | } |
154 | |
155 | /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`]. |
156 | /// |
157 | /// Returns `None` if the key doesn't exist or if the key has no value. |
158 | /// |
159 | /// # Examples |
160 | /// |
161 | /// ``` |
162 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
163 | /// |
164 | /// let mut keywords = [(key!("ca" ), value!("buddhist" ))] |
165 | /// .into_iter() |
166 | /// .collect::<Keywords>(); |
167 | /// |
168 | /// if let Some(value) = keywords.get_mut(&key!("ca" )) { |
169 | /// *value = value!("gregory" ); |
170 | /// } |
171 | /// assert_eq!(keywords.get(&key!("ca" )), Some(&value!("gregory" ))); |
172 | /// ``` |
173 | pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value> |
174 | where |
175 | Key: Borrow<Q>, |
176 | Q: Ord, |
177 | { |
178 | self.0.get_mut(key) |
179 | } |
180 | |
181 | /// Sets the specified keyword, returning the old value if it already existed. |
182 | /// |
183 | /// # Examples |
184 | /// |
185 | /// ``` |
186 | /// use icu::locid::extensions::unicode::{key, value}; |
187 | /// use icu::locid::Locale; |
188 | /// |
189 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" |
190 | /// .parse() |
191 | /// .expect("valid BCP-47 identifier" ); |
192 | /// let old_value = loc |
193 | /// .extensions |
194 | /// .unicode |
195 | /// .keywords |
196 | /// .set(key!("ca" ), value!("japanese" )); |
197 | /// |
198 | /// assert_eq!(old_value, Some(value!("buddhist" ))); |
199 | /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12" .parse().unwrap()); |
200 | /// ``` |
201 | pub fn set(&mut self, key: Key, value: Value) -> Option<Value> { |
202 | self.0.insert(key, value) |
203 | } |
204 | |
205 | /// Removes the specified keyword, returning the old value if it existed. |
206 | /// |
207 | /// # Examples |
208 | /// |
209 | /// ``` |
210 | /// use icu::locid::extensions::unicode::key; |
211 | /// use icu::locid::Locale; |
212 | /// |
213 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" |
214 | /// .parse() |
215 | /// .expect("valid BCP-47 identifier" ); |
216 | /// loc.extensions.unicode.keywords.remove(key!("ca" )); |
217 | /// assert_eq!(loc, "und-u-hello-hc-h12" .parse().unwrap()); |
218 | /// ``` |
219 | pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> { |
220 | self.0.remove(key.borrow()) |
221 | } |
222 | |
223 | /// Clears all Unicode extension keywords, leaving Unicode attributes. |
224 | /// |
225 | /// Returns the old Unicode extension keywords. |
226 | /// |
227 | /// # Example |
228 | /// |
229 | /// ``` |
230 | /// use icu::locid::Locale; |
231 | /// |
232 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" .parse().unwrap(); |
233 | /// loc.extensions.unicode.keywords.clear(); |
234 | /// assert_eq!(loc, "und-u-hello" .parse().unwrap()); |
235 | /// ``` |
236 | pub fn clear(&mut self) -> Self { |
237 | core::mem::take(self) |
238 | } |
239 | |
240 | /// Retains a subset of keywords as specified by the predicate function. |
241 | /// |
242 | /// # Examples |
243 | /// |
244 | /// ``` |
245 | /// use icu::locid::extensions::unicode::key; |
246 | /// use icu::locid::Locale; |
247 | /// |
248 | /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric" .parse().unwrap(); |
249 | /// |
250 | /// loc.extensions |
251 | /// .unicode |
252 | /// .keywords |
253 | /// .retain_by_key(|&k| k == key!("hc" )); |
254 | /// assert_eq!(loc, "und-u-hc-h12" .parse().unwrap()); |
255 | /// |
256 | /// loc.extensions |
257 | /// .unicode |
258 | /// .keywords |
259 | /// .retain_by_key(|&k| k == key!("ms" )); |
260 | /// assert_eq!(loc, Locale::UND); |
261 | /// ``` |
262 | pub fn retain_by_key<F>(&mut self, mut predicate: F) |
263 | where |
264 | F: FnMut(&Key) -> bool, |
265 | { |
266 | self.0.retain(|k, _| predicate(k)) |
267 | } |
268 | |
269 | /// Compare this [`Keywords`] with BCP-47 bytes. |
270 | /// |
271 | /// The return value is equivalent to what would happen if you first converted this |
272 | /// [`Keywords`] to a BCP-47 string and then performed a byte comparison. |
273 | /// |
274 | /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
275 | /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. |
276 | /// |
277 | /// # Examples |
278 | /// |
279 | /// ``` |
280 | /// use icu::locid::Locale; |
281 | /// use std::cmp::Ordering; |
282 | /// |
283 | /// let bcp47_strings: &[&str] = |
284 | /// &["ca-hebrew" , "ca-japanese" , "ca-japanese-nu-latn" , "nu-latn" ]; |
285 | /// |
286 | /// for ab in bcp47_strings.windows(2) { |
287 | /// let a = ab[0]; |
288 | /// let b = ab[1]; |
289 | /// assert!(a.cmp(b) == Ordering::Less); |
290 | /// let a_kwds = format!("und-u-{}" , a) |
291 | /// .parse::<Locale>() |
292 | /// .unwrap() |
293 | /// .extensions |
294 | /// .unicode |
295 | /// .keywords; |
296 | /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal); |
297 | /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less); |
298 | /// } |
299 | /// ``` |
300 | pub fn strict_cmp(&self, other: &[u8]) -> Ordering { |
301 | self.writeable_cmp_bytes(other) |
302 | } |
303 | |
304 | /// Compare this [`Keywords`] with an iterator of BCP-47 subtags. |
305 | /// |
306 | /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as |
307 | /// a more modular version that allows multiple subtag iterators to be chained together. |
308 | /// |
309 | /// For an additional example, see [`SubtagOrderingResult`]. |
310 | /// |
311 | /// # Examples |
312 | /// |
313 | /// ``` |
314 | /// use icu::locid::locale; |
315 | /// use std::cmp::Ordering; |
316 | /// |
317 | /// let subtags: &[&[u8]] = &[b"ca" , b"buddhist" ]; |
318 | /// |
319 | /// let kwds = locale!("und-u-ca-buddhist" ).extensions.unicode.keywords; |
320 | /// assert_eq!( |
321 | /// Ordering::Equal, |
322 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
323 | /// ); |
324 | /// |
325 | /// let kwds = locale!("und" ).extensions.unicode.keywords; |
326 | /// assert_eq!( |
327 | /// Ordering::Less, |
328 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
329 | /// ); |
330 | /// |
331 | /// let kwds = locale!("und-u-nu-latn" ).extensions.unicode.keywords; |
332 | /// assert_eq!( |
333 | /// Ordering::Greater, |
334 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
335 | /// ); |
336 | /// ``` |
337 | #[deprecated (since = "1.5.0" , note = "if you need this, please file an issue" )] |
338 | #[allow (deprecated)] |
339 | pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I> |
340 | where |
341 | I: Iterator<Item = &'l [u8]>, |
342 | { |
343 | let r = self.for_each_subtag_str(&mut |subtag| { |
344 | if let Some(other) = subtags.next() { |
345 | match subtag.as_bytes().cmp(other) { |
346 | Ordering::Equal => Ok(()), |
347 | not_equal => Err(not_equal), |
348 | } |
349 | } else { |
350 | Err(Ordering::Greater) |
351 | } |
352 | }); |
353 | match r { |
354 | Ok(_) => SubtagOrderingResult::Subtags(subtags), |
355 | Err(o) => SubtagOrderingResult::Ordering(o), |
356 | } |
357 | } |
358 | |
359 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
360 | where |
361 | F: FnMut(&str) -> Result<(), E>, |
362 | { |
363 | for (k, v) in self.0.iter() { |
364 | f(k.as_str())?; |
365 | v.for_each_subtag_str(f)?; |
366 | } |
367 | Ok(()) |
368 | } |
369 | |
370 | /// This needs to be its own method to help with type inference in helpers.rs |
371 | #[cfg (test)] |
372 | pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { |
373 | v.into_iter().collect() |
374 | } |
375 | } |
376 | |
377 | impl From<LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>> for Keywords { |
378 | fn from(map: LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>) -> Self { |
379 | Self(map) |
380 | } |
381 | } |
382 | |
383 | impl FromIterator<(Key, Value)> for Keywords { |
384 | fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self { |
385 | LiteMap::from_iter(iter).into() |
386 | } |
387 | } |
388 | |
389 | impl_writeable_for_key_value!(Keywords, "ca" , "islamic-civil" , "mm" , "mm" ); |
390 | |