1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use core::borrow::Borrow; |
6 | use core::cmp::Ordering; |
7 | use core::iter::FromIterator; |
8 | use litemap::LiteMap; |
9 | |
10 | use super::Key; |
11 | use super::Value; |
12 | use crate::helpers::ShortSlice; |
13 | use crate::ordering::SubtagOrderingResult; |
14 | |
15 | /// A list of [`Key`]-[`Value`] pairs representing functional information |
16 | /// about locale's internationalization preferences. |
17 | /// |
18 | /// Here are examples of fields used in Unicode: |
19 | /// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`) |
20 | /// - `ca` - Calendar (`buddhist`, `gregory`, ...) |
21 | /// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...) |
22 | /// |
23 | /// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML. |
24 | /// |
25 | /// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_ |
26 | /// |
27 | /// # Examples |
28 | /// |
29 | /// Manually build up a [`Keywords`] object: |
30 | /// |
31 | /// ``` |
32 | /// use icu::locid::{ |
33 | /// extensions::unicode::{key, value, Keywords}, |
34 | /// locale, |
35 | /// }; |
36 | /// |
37 | /// let keywords = [(key!("hc" ), value!("h23" ))] |
38 | /// .into_iter() |
39 | /// .collect::<Keywords>(); |
40 | /// |
41 | /// assert_eq!(&keywords.to_string(), "hc-h23" ); |
42 | /// ``` |
43 | /// |
44 | /// Access a [`Keywords`] object from a [`Locale`]: |
45 | /// |
46 | /// ``` |
47 | /// use icu::locid::{ |
48 | /// extensions::unicode::{key, value}, |
49 | /// Locale, |
50 | /// }; |
51 | /// |
52 | /// let loc: Locale = "und-u-hc-h23-kc-true" .parse().expect("Valid BCP-47" ); |
53 | /// |
54 | /// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca" )), None); |
55 | /// assert_eq!( |
56 | /// loc.extensions.unicode.keywords.get(&key!("hc" )), |
57 | /// Some(&value!("h23" )) |
58 | /// ); |
59 | /// assert_eq!( |
60 | /// loc.extensions.unicode.keywords.get(&key!("kc" )), |
61 | /// Some(&value!("true" )) |
62 | /// ); |
63 | /// |
64 | /// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc" ); |
65 | /// ``` |
66 | /// |
67 | /// [`Locale`]: crate::Locale |
68 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] |
69 | pub struct Keywords(LiteMap<Key, Value, ShortSlice<(Key, Value)>>); |
70 | |
71 | impl Keywords { |
72 | /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. |
73 | /// |
74 | /// # Examples |
75 | /// |
76 | /// ``` |
77 | /// use icu::locid::extensions::unicode::Keywords; |
78 | /// |
79 | /// assert_eq!(Keywords::new(), Keywords::default()); |
80 | /// ``` |
81 | #[inline ] |
82 | pub const fn new() -> Self { |
83 | Self(LiteMap::new()) |
84 | } |
85 | |
86 | /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context. |
87 | #[inline ] |
88 | pub const fn new_single(key: Key, value: Value) -> Self { |
89 | Self(LiteMap::from_sorted_store_unchecked( |
90 | ShortSlice::new_single((key, value)), |
91 | )) |
92 | } |
93 | |
94 | /// Returns `true` if there are no keywords. |
95 | /// |
96 | /// # Examples |
97 | /// |
98 | /// ``` |
99 | /// use icu::locid::extensions::unicode::Keywords; |
100 | /// use icu::locid::locale; |
101 | /// use icu::locid::Locale; |
102 | /// |
103 | /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid" ).unwrap(); |
104 | /// let loc2 = locale!("und-u-ca-buddhist" ); |
105 | /// |
106 | /// assert!(loc1.extensions.unicode.keywords.is_empty()); |
107 | /// assert!(!loc2.extensions.unicode.keywords.is_empty()); |
108 | /// ``` |
109 | pub fn is_empty(&self) -> bool { |
110 | self.0.is_empty() |
111 | } |
112 | |
113 | /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. |
114 | /// |
115 | /// |
116 | /// # Examples |
117 | /// |
118 | /// ``` |
119 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
120 | /// |
121 | /// let keywords = [(key!("ca" ), value!("gregory" ))] |
122 | /// .into_iter() |
123 | /// .collect::<Keywords>(); |
124 | /// |
125 | /// assert!(&keywords.contains_key(&key!("ca" ))); |
126 | /// ``` |
127 | pub fn contains_key<Q>(&self, key: &Q) -> bool |
128 | where |
129 | Key: Borrow<Q>, |
130 | Q: Ord, |
131 | { |
132 | self.0.contains_key(key) |
133 | } |
134 | |
135 | /// Returns a reference to the [`Value`] corresponding to the [`Key`]. |
136 | /// |
137 | /// |
138 | /// # Examples |
139 | /// |
140 | /// ``` |
141 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
142 | /// |
143 | /// let keywords = [(key!("ca" ), value!("buddhist" ))] |
144 | /// .into_iter() |
145 | /// .collect::<Keywords>(); |
146 | /// |
147 | /// assert_eq!(keywords.get(&key!("ca" )), Some(&value!("buddhist" ))); |
148 | /// ``` |
149 | pub fn get<Q>(&self, key: &Q) -> Option<&Value> |
150 | where |
151 | Key: Borrow<Q>, |
152 | Q: Ord, |
153 | { |
154 | self.0.get(key) |
155 | } |
156 | |
157 | /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`]. |
158 | /// |
159 | /// Returns `None` if the key doesn't exist or if the key has no value. |
160 | /// |
161 | /// # Examples |
162 | /// |
163 | /// ``` |
164 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
165 | /// |
166 | /// let mut keywords = [(key!("ca" ), value!("buddhist" ))] |
167 | /// .into_iter() |
168 | /// .collect::<Keywords>(); |
169 | /// |
170 | /// if let Some(value) = keywords.get_mut(&key!("ca" )) { |
171 | /// *value = value!("gregory" ); |
172 | /// } |
173 | /// assert_eq!(keywords.get(&key!("ca" )), Some(&value!("gregory" ))); |
174 | /// ``` |
175 | pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value> |
176 | where |
177 | Key: Borrow<Q>, |
178 | Q: Ord, |
179 | { |
180 | self.0.get_mut(key) |
181 | } |
182 | |
183 | /// Sets the specified keyword, returning the old value if it already existed. |
184 | /// |
185 | /// # Examples |
186 | /// |
187 | /// ``` |
188 | /// use icu::locid::extensions::unicode::Key; |
189 | /// use icu::locid::extensions::unicode::Value; |
190 | /// use icu::locid::extensions::unicode::{key, value}; |
191 | /// use icu::locid::Locale; |
192 | /// |
193 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" |
194 | /// .parse() |
195 | /// .expect("valid BCP-47 identifier" ); |
196 | /// let old_value = loc |
197 | /// .extensions |
198 | /// .unicode |
199 | /// .keywords |
200 | /// .set(key!("ca" ), value!("japanese" )); |
201 | /// |
202 | /// assert_eq!(old_value, Some(value!("buddhist" ))); |
203 | /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12" .parse().unwrap()); |
204 | /// ``` |
205 | pub fn set(&mut self, key: Key, value: Value) -> Option<Value> { |
206 | self.0.insert(key, value) |
207 | } |
208 | |
209 | /// Removes the specified keyword, returning the old value if it existed. |
210 | /// |
211 | /// # Examples |
212 | /// |
213 | /// ``` |
214 | /// use icu::locid::extensions::unicode::{key, Key}; |
215 | /// use icu::locid::Locale; |
216 | /// |
217 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" |
218 | /// .parse() |
219 | /// .expect("valid BCP-47 identifier" ); |
220 | /// loc.extensions.unicode.keywords.remove(key!("ca" )); |
221 | /// assert_eq!(loc, "und-u-hello-hc-h12" .parse().unwrap()); |
222 | /// ``` |
223 | pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> { |
224 | self.0.remove(key.borrow()) |
225 | } |
226 | |
227 | /// Clears all Unicode extension keywords, leaving Unicode attributes. |
228 | /// |
229 | /// Returns the old Unicode extension keywords. |
230 | /// |
231 | /// # Example |
232 | /// |
233 | /// ``` |
234 | /// use icu::locid::Locale; |
235 | /// |
236 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" .parse().unwrap(); |
237 | /// loc.extensions.unicode.keywords.clear(); |
238 | /// assert_eq!(loc, "und-u-hello" .parse().unwrap()); |
239 | /// ``` |
240 | pub fn clear(&mut self) -> Self { |
241 | core::mem::take(self) |
242 | } |
243 | |
244 | /// Retains a subset of keywords as specified by the predicate function. |
245 | /// |
246 | /// # Examples |
247 | /// |
248 | /// ``` |
249 | /// use icu::locid::extensions::unicode::key; |
250 | /// use icu::locid::Locale; |
251 | /// |
252 | /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric" .parse().unwrap(); |
253 | /// |
254 | /// loc.extensions |
255 | /// .unicode |
256 | /// .keywords |
257 | /// .retain_by_key(|&k| k == key!("hc" )); |
258 | /// assert_eq!(loc, "und-u-hc-h12" .parse().unwrap()); |
259 | /// |
260 | /// loc.extensions |
261 | /// .unicode |
262 | /// .keywords |
263 | /// .retain_by_key(|&k| k == key!("ms" )); |
264 | /// assert_eq!(loc, Locale::UND); |
265 | /// ``` |
266 | pub fn retain_by_key<F>(&mut self, mut predicate: F) |
267 | where |
268 | F: FnMut(&Key) -> bool, |
269 | { |
270 | self.0.retain(|k, _| predicate(k)) |
271 | } |
272 | |
273 | /// Compare this [`Keywords`] with BCP-47 bytes. |
274 | /// |
275 | /// The return value is equivalent to what would happen if you first converted this |
276 | /// [`Keywords`] to a BCP-47 string and then performed a byte comparison. |
277 | /// |
278 | /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
279 | /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. |
280 | /// |
281 | /// # Examples |
282 | /// |
283 | /// ``` |
284 | /// use icu::locid::extensions::unicode::Keywords; |
285 | /// use icu::locid::Locale; |
286 | /// use std::cmp::Ordering; |
287 | /// |
288 | /// let bcp47_strings: &[&str] = |
289 | /// &["ca-hebrew" , "ca-japanese" , "ca-japanese-nu-latn" , "nu-latn" ]; |
290 | /// |
291 | /// for ab in bcp47_strings.windows(2) { |
292 | /// let a = ab[0]; |
293 | /// let b = ab[1]; |
294 | /// assert!(a.cmp(b) == Ordering::Less); |
295 | /// let a_kwds = format!("und-u-{}" , a) |
296 | /// .parse::<Locale>() |
297 | /// .unwrap() |
298 | /// .extensions |
299 | /// .unicode |
300 | /// .keywords; |
301 | /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal); |
302 | /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less); |
303 | /// } |
304 | /// ``` |
305 | pub fn strict_cmp(&self, other: &[u8]) -> Ordering { |
306 | self.strict_cmp_iter(other.split(|b| *b == b'-' )).end() |
307 | } |
308 | |
309 | /// Compare this [`Keywords`] with an iterator of BCP-47 subtags. |
310 | /// |
311 | /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as |
312 | /// a more modular version that allows multiple subtag iterators to be chained together. |
313 | /// |
314 | /// For an additional example, see [`SubtagOrderingResult`]. |
315 | /// |
316 | /// # Examples |
317 | /// |
318 | /// ``` |
319 | /// use icu::locid::extensions::unicode::Keywords; |
320 | /// use icu::locid::locale; |
321 | /// use std::cmp::Ordering; |
322 | /// |
323 | /// let subtags: &[&[u8]] = &[b"ca" , b"buddhist" ]; |
324 | /// |
325 | /// let kwds = locale!("und-u-ca-buddhist" ).extensions.unicode.keywords; |
326 | /// assert_eq!( |
327 | /// Ordering::Equal, |
328 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
329 | /// ); |
330 | /// |
331 | /// let kwds = locale!("und" ).extensions.unicode.keywords; |
332 | /// assert_eq!( |
333 | /// Ordering::Less, |
334 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
335 | /// ); |
336 | /// |
337 | /// let kwds = locale!("und-u-nu-latn" ).extensions.unicode.keywords; |
338 | /// assert_eq!( |
339 | /// Ordering::Greater, |
340 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
341 | /// ); |
342 | /// ``` |
343 | pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I> |
344 | where |
345 | I: Iterator<Item = &'l [u8]>, |
346 | { |
347 | let r = self.for_each_subtag_str(&mut |subtag| { |
348 | if let Some(other) = subtags.next() { |
349 | match subtag.as_bytes().cmp(other) { |
350 | Ordering::Equal => Ok(()), |
351 | not_equal => Err(not_equal), |
352 | } |
353 | } else { |
354 | Err(Ordering::Greater) |
355 | } |
356 | }); |
357 | match r { |
358 | Ok(_) => SubtagOrderingResult::Subtags(subtags), |
359 | Err(o) => SubtagOrderingResult::Ordering(o), |
360 | } |
361 | } |
362 | |
363 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
364 | where |
365 | F: FnMut(&str) -> Result<(), E>, |
366 | { |
367 | for (k, v) in self.0.iter() { |
368 | f(k.as_str())?; |
369 | v.for_each_subtag_str(f)?; |
370 | } |
371 | Ok(()) |
372 | } |
373 | |
374 | /// This needs to be its own method to help with type inference in helpers.rs |
375 | #[cfg (test)] |
376 | pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { |
377 | v.into_iter().collect() |
378 | } |
379 | } |
380 | |
381 | impl From<LiteMap<Key, Value, ShortSlice<(Key, Value)>>> for Keywords { |
382 | fn from(map: LiteMap<Key, Value, ShortSlice<(Key, Value)>>) -> Self { |
383 | Self(map) |
384 | } |
385 | } |
386 | |
387 | impl FromIterator<(Key, Value)> for Keywords { |
388 | fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self { |
389 | LiteMap::from_iter(iter).into() |
390 | } |
391 | } |
392 | |
393 | impl_writeable_for_key_value!(Keywords, "ca" , "islamic-civil" , "mm" , "mm" ); |
394 | |