| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | use core::borrow::Borrow; |
| 6 | use core::cmp::Ordering; |
| 7 | use core::iter::FromIterator; |
| 8 | use litemap::LiteMap; |
| 9 | use writeable::Writeable; |
| 10 | |
| 11 | use super::Key; |
| 12 | use super::Value; |
| 13 | #[allow (deprecated)] |
| 14 | use crate::ordering::SubtagOrderingResult; |
| 15 | use crate::shortvec::ShortBoxSlice; |
| 16 | |
| 17 | /// A list of [`Key`]-[`Value`] pairs representing functional information |
| 18 | /// about locale's internationalization preferences. |
| 19 | /// |
| 20 | /// Here are examples of fields used in Unicode: |
| 21 | /// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`) |
| 22 | /// - `ca` - Calendar (`buddhist`, `gregory`, ...) |
| 23 | /// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...) |
| 24 | /// |
| 25 | /// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML. |
| 26 | /// |
| 27 | /// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_ |
| 28 | /// |
| 29 | /// # Examples |
| 30 | /// |
| 31 | /// Manually build up a [`Keywords`] object: |
| 32 | /// |
| 33 | /// ``` |
| 34 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
| 35 | /// |
| 36 | /// let keywords = [(key!("hc" ), value!("h23" ))] |
| 37 | /// .into_iter() |
| 38 | /// .collect::<Keywords>(); |
| 39 | /// |
| 40 | /// assert_eq!(&keywords.to_string(), "hc-h23" ); |
| 41 | /// ``` |
| 42 | /// |
| 43 | /// Access a [`Keywords`] object from a [`Locale`]: |
| 44 | /// |
| 45 | /// ``` |
| 46 | /// use icu::locid::{ |
| 47 | /// extensions::unicode::{key, value}, |
| 48 | /// Locale, |
| 49 | /// }; |
| 50 | /// |
| 51 | /// let loc: Locale = "und-u-hc-h23-kc-true" .parse().expect("Valid BCP-47" ); |
| 52 | /// |
| 53 | /// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca" )), None); |
| 54 | /// assert_eq!( |
| 55 | /// loc.extensions.unicode.keywords.get(&key!("hc" )), |
| 56 | /// Some(&value!("h23" )) |
| 57 | /// ); |
| 58 | /// assert_eq!( |
| 59 | /// loc.extensions.unicode.keywords.get(&key!("kc" )), |
| 60 | /// Some(&value!("true" )) |
| 61 | /// ); |
| 62 | /// |
| 63 | /// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc" ); |
| 64 | /// ``` |
| 65 | /// |
| 66 | /// [`Locale`]: crate::Locale |
| 67 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] |
| 68 | pub struct Keywords(LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>); |
| 69 | |
| 70 | impl Keywords { |
| 71 | /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`. |
| 72 | /// |
| 73 | /// # Examples |
| 74 | /// |
| 75 | /// ``` |
| 76 | /// use icu::locid::extensions::unicode::Keywords; |
| 77 | /// |
| 78 | /// assert_eq!(Keywords::new(), Keywords::default()); |
| 79 | /// ``` |
| 80 | #[inline ] |
| 81 | pub const fn new() -> Self { |
| 82 | Self(LiteMap::new()) |
| 83 | } |
| 84 | |
| 85 | /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context. |
| 86 | #[inline ] |
| 87 | pub const fn new_single(key: Key, value: Value) -> Self { |
| 88 | Self(LiteMap::from_sorted_store_unchecked( |
| 89 | ShortBoxSlice::new_single((key, value)), |
| 90 | )) |
| 91 | } |
| 92 | |
| 93 | /// Returns `true` if there are no keywords. |
| 94 | /// |
| 95 | /// # Examples |
| 96 | /// |
| 97 | /// ``` |
| 98 | /// use icu::locid::locale; |
| 99 | /// use icu::locid::Locale; |
| 100 | /// |
| 101 | /// let loc1 = Locale::try_from_bytes(b"und-t-h0-hybrid" ).unwrap(); |
| 102 | /// let loc2 = locale!("und-u-ca-buddhist" ); |
| 103 | /// |
| 104 | /// assert!(loc1.extensions.unicode.keywords.is_empty()); |
| 105 | /// assert!(!loc2.extensions.unicode.keywords.is_empty()); |
| 106 | /// ``` |
| 107 | pub fn is_empty(&self) -> bool { |
| 108 | self.0.is_empty() |
| 109 | } |
| 110 | |
| 111 | /// Returns `true` if the list contains a [`Value`] for the specified [`Key`]. |
| 112 | /// |
| 113 | /// |
| 114 | /// # Examples |
| 115 | /// |
| 116 | /// ``` |
| 117 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
| 118 | /// |
| 119 | /// let keywords = [(key!("ca" ), value!("gregory" ))] |
| 120 | /// .into_iter() |
| 121 | /// .collect::<Keywords>(); |
| 122 | /// |
| 123 | /// assert!(&keywords.contains_key(&key!("ca" ))); |
| 124 | /// ``` |
| 125 | pub fn contains_key<Q>(&self, key: &Q) -> bool |
| 126 | where |
| 127 | Key: Borrow<Q>, |
| 128 | Q: Ord, |
| 129 | { |
| 130 | self.0.contains_key(key) |
| 131 | } |
| 132 | |
| 133 | /// Returns a reference to the [`Value`] corresponding to the [`Key`]. |
| 134 | /// |
| 135 | /// |
| 136 | /// # Examples |
| 137 | /// |
| 138 | /// ``` |
| 139 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
| 140 | /// |
| 141 | /// let keywords = [(key!("ca" ), value!("buddhist" ))] |
| 142 | /// .into_iter() |
| 143 | /// .collect::<Keywords>(); |
| 144 | /// |
| 145 | /// assert_eq!(keywords.get(&key!("ca" )), Some(&value!("buddhist" ))); |
| 146 | /// ``` |
| 147 | pub fn get<Q>(&self, key: &Q) -> Option<&Value> |
| 148 | where |
| 149 | Key: Borrow<Q>, |
| 150 | Q: Ord, |
| 151 | { |
| 152 | self.0.get(key) |
| 153 | } |
| 154 | |
| 155 | /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`]. |
| 156 | /// |
| 157 | /// Returns `None` if the key doesn't exist or if the key has no value. |
| 158 | /// |
| 159 | /// # Examples |
| 160 | /// |
| 161 | /// ``` |
| 162 | /// use icu::locid::extensions::unicode::{key, value, Keywords}; |
| 163 | /// |
| 164 | /// let mut keywords = [(key!("ca" ), value!("buddhist" ))] |
| 165 | /// .into_iter() |
| 166 | /// .collect::<Keywords>(); |
| 167 | /// |
| 168 | /// if let Some(value) = keywords.get_mut(&key!("ca" )) { |
| 169 | /// *value = value!("gregory" ); |
| 170 | /// } |
| 171 | /// assert_eq!(keywords.get(&key!("ca" )), Some(&value!("gregory" ))); |
| 172 | /// ``` |
| 173 | pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value> |
| 174 | where |
| 175 | Key: Borrow<Q>, |
| 176 | Q: Ord, |
| 177 | { |
| 178 | self.0.get_mut(key) |
| 179 | } |
| 180 | |
| 181 | /// Sets the specified keyword, returning the old value if it already existed. |
| 182 | /// |
| 183 | /// # Examples |
| 184 | /// |
| 185 | /// ``` |
| 186 | /// use icu::locid::extensions::unicode::{key, value}; |
| 187 | /// use icu::locid::Locale; |
| 188 | /// |
| 189 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" |
| 190 | /// .parse() |
| 191 | /// .expect("valid BCP-47 identifier" ); |
| 192 | /// let old_value = loc |
| 193 | /// .extensions |
| 194 | /// .unicode |
| 195 | /// .keywords |
| 196 | /// .set(key!("ca" ), value!("japanese" )); |
| 197 | /// |
| 198 | /// assert_eq!(old_value, Some(value!("buddhist" ))); |
| 199 | /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12" .parse().unwrap()); |
| 200 | /// ``` |
| 201 | pub fn set(&mut self, key: Key, value: Value) -> Option<Value> { |
| 202 | self.0.insert(key, value) |
| 203 | } |
| 204 | |
| 205 | /// Removes the specified keyword, returning the old value if it existed. |
| 206 | /// |
| 207 | /// # Examples |
| 208 | /// |
| 209 | /// ``` |
| 210 | /// use icu::locid::extensions::unicode::key; |
| 211 | /// use icu::locid::Locale; |
| 212 | /// |
| 213 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" |
| 214 | /// .parse() |
| 215 | /// .expect("valid BCP-47 identifier" ); |
| 216 | /// loc.extensions.unicode.keywords.remove(key!("ca" )); |
| 217 | /// assert_eq!(loc, "und-u-hello-hc-h12" .parse().unwrap()); |
| 218 | /// ``` |
| 219 | pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> { |
| 220 | self.0.remove(key.borrow()) |
| 221 | } |
| 222 | |
| 223 | /// Clears all Unicode extension keywords, leaving Unicode attributes. |
| 224 | /// |
| 225 | /// Returns the old Unicode extension keywords. |
| 226 | /// |
| 227 | /// # Example |
| 228 | /// |
| 229 | /// ``` |
| 230 | /// use icu::locid::Locale; |
| 231 | /// |
| 232 | /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12" .parse().unwrap(); |
| 233 | /// loc.extensions.unicode.keywords.clear(); |
| 234 | /// assert_eq!(loc, "und-u-hello" .parse().unwrap()); |
| 235 | /// ``` |
| 236 | pub fn clear(&mut self) -> Self { |
| 237 | core::mem::take(self) |
| 238 | } |
| 239 | |
| 240 | /// Retains a subset of keywords as specified by the predicate function. |
| 241 | /// |
| 242 | /// # Examples |
| 243 | /// |
| 244 | /// ``` |
| 245 | /// use icu::locid::extensions::unicode::key; |
| 246 | /// use icu::locid::Locale; |
| 247 | /// |
| 248 | /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric" .parse().unwrap(); |
| 249 | /// |
| 250 | /// loc.extensions |
| 251 | /// .unicode |
| 252 | /// .keywords |
| 253 | /// .retain_by_key(|&k| k == key!("hc" )); |
| 254 | /// assert_eq!(loc, "und-u-hc-h12" .parse().unwrap()); |
| 255 | /// |
| 256 | /// loc.extensions |
| 257 | /// .unicode |
| 258 | /// .keywords |
| 259 | /// .retain_by_key(|&k| k == key!("ms" )); |
| 260 | /// assert_eq!(loc, Locale::UND); |
| 261 | /// ``` |
| 262 | pub fn retain_by_key<F>(&mut self, mut predicate: F) |
| 263 | where |
| 264 | F: FnMut(&Key) -> bool, |
| 265 | { |
| 266 | self.0.retain(|k, _| predicate(k)) |
| 267 | } |
| 268 | |
| 269 | /// Compare this [`Keywords`] with BCP-47 bytes. |
| 270 | /// |
| 271 | /// The return value is equivalent to what would happen if you first converted this |
| 272 | /// [`Keywords`] to a BCP-47 string and then performed a byte comparison. |
| 273 | /// |
| 274 | /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
| 275 | /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. |
| 276 | /// |
| 277 | /// # Examples |
| 278 | /// |
| 279 | /// ``` |
| 280 | /// use icu::locid::Locale; |
| 281 | /// use std::cmp::Ordering; |
| 282 | /// |
| 283 | /// let bcp47_strings: &[&str] = |
| 284 | /// &["ca-hebrew" , "ca-japanese" , "ca-japanese-nu-latn" , "nu-latn" ]; |
| 285 | /// |
| 286 | /// for ab in bcp47_strings.windows(2) { |
| 287 | /// let a = ab[0]; |
| 288 | /// let b = ab[1]; |
| 289 | /// assert!(a.cmp(b) == Ordering::Less); |
| 290 | /// let a_kwds = format!("und-u-{}" , a) |
| 291 | /// .parse::<Locale>() |
| 292 | /// .unwrap() |
| 293 | /// .extensions |
| 294 | /// .unicode |
| 295 | /// .keywords; |
| 296 | /// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal); |
| 297 | /// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less); |
| 298 | /// } |
| 299 | /// ``` |
| 300 | pub fn strict_cmp(&self, other: &[u8]) -> Ordering { |
| 301 | self.writeable_cmp_bytes(other) |
| 302 | } |
| 303 | |
| 304 | /// Compare this [`Keywords`] with an iterator of BCP-47 subtags. |
| 305 | /// |
| 306 | /// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as |
| 307 | /// a more modular version that allows multiple subtag iterators to be chained together. |
| 308 | /// |
| 309 | /// For an additional example, see [`SubtagOrderingResult`]. |
| 310 | /// |
| 311 | /// # Examples |
| 312 | /// |
| 313 | /// ``` |
| 314 | /// use icu::locid::locale; |
| 315 | /// use std::cmp::Ordering; |
| 316 | /// |
| 317 | /// let subtags: &[&[u8]] = &[b"ca" , b"buddhist" ]; |
| 318 | /// |
| 319 | /// let kwds = locale!("und-u-ca-buddhist" ).extensions.unicode.keywords; |
| 320 | /// assert_eq!( |
| 321 | /// Ordering::Equal, |
| 322 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
| 323 | /// ); |
| 324 | /// |
| 325 | /// let kwds = locale!("und" ).extensions.unicode.keywords; |
| 326 | /// assert_eq!( |
| 327 | /// Ordering::Less, |
| 328 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
| 329 | /// ); |
| 330 | /// |
| 331 | /// let kwds = locale!("und-u-nu-latn" ).extensions.unicode.keywords; |
| 332 | /// assert_eq!( |
| 333 | /// Ordering::Greater, |
| 334 | /// kwds.strict_cmp_iter(subtags.iter().copied()).end() |
| 335 | /// ); |
| 336 | /// ``` |
| 337 | #[deprecated (since = "1.5.0" , note = "if you need this, please file an issue" )] |
| 338 | #[allow (deprecated)] |
| 339 | pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I> |
| 340 | where |
| 341 | I: Iterator<Item = &'l [u8]>, |
| 342 | { |
| 343 | let r = self.for_each_subtag_str(&mut |subtag| { |
| 344 | if let Some(other) = subtags.next() { |
| 345 | match subtag.as_bytes().cmp(other) { |
| 346 | Ordering::Equal => Ok(()), |
| 347 | not_equal => Err(not_equal), |
| 348 | } |
| 349 | } else { |
| 350 | Err(Ordering::Greater) |
| 351 | } |
| 352 | }); |
| 353 | match r { |
| 354 | Ok(_) => SubtagOrderingResult::Subtags(subtags), |
| 355 | Err(o) => SubtagOrderingResult::Ordering(o), |
| 356 | } |
| 357 | } |
| 358 | |
| 359 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
| 360 | where |
| 361 | F: FnMut(&str) -> Result<(), E>, |
| 362 | { |
| 363 | for (k, v) in self.0.iter() { |
| 364 | f(k.as_str())?; |
| 365 | v.for_each_subtag_str(f)?; |
| 366 | } |
| 367 | Ok(()) |
| 368 | } |
| 369 | |
| 370 | /// This needs to be its own method to help with type inference in helpers.rs |
| 371 | #[cfg (test)] |
| 372 | pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self { |
| 373 | v.into_iter().collect() |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | impl From<LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>> for Keywords { |
| 378 | fn from(map: LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>) -> Self { |
| 379 | Self(map) |
| 380 | } |
| 381 | } |
| 382 | |
| 383 | impl FromIterator<(Key, Value)> for Keywords { |
| 384 | fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self { |
| 385 | LiteMap::from_iter(iter).into() |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | impl_writeable_for_key_value!(Keywords, "ca" , "islamic-civil" , "mm" , "mm" ); |
| 390 | |