1 | // This file is part of ICU4X. For terms of use, please see the file |
---|---|
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::{DataError, DataErrorKind}; |
6 | use core::cmp::Ordering; |
7 | use core::default::Default; |
8 | use core::fmt; |
9 | use core::fmt::Debug; |
10 | use core::hash::Hash; |
11 | use core::str::FromStr; |
12 | use icu_locid::extensions::unicode as unicode_ext; |
13 | use icu_locid::subtags::{Language, Region, Script, Variants}; |
14 | use icu_locid::{LanguageIdentifier, Locale}; |
15 | use writeable::{LengthHint, Writeable}; |
16 | |
17 | #[cfg(feature = "experimental")] |
18 | use alloc::string::String; |
19 | #[cfg(feature = "experimental")] |
20 | use core::ops::Deref; |
21 | #[cfg(feature = "experimental")] |
22 | use icu_locid::extensions::private::Subtag; |
23 | #[cfg(feature = "experimental")] |
24 | use tinystr::TinyAsciiStr; |
25 | |
26 | #[cfg(doc)] |
27 | use icu_locid::subtags::Variant; |
28 | |
29 | /// The request type passed into all data provider implementations. |
30 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] |
31 | #[allow(clippy::exhaustive_structs)] // this type is stable |
32 | pub struct DataRequest<'a> { |
33 | /// The locale for which to load data. |
34 | /// |
35 | /// If locale fallback is enabled, the resulting data may be from a different locale |
36 | /// than the one requested here. |
37 | pub locale: &'a DataLocale, |
38 | /// Metadata that may affect the behavior of the data provider. |
39 | pub metadata: DataRequestMetadata, |
40 | } |
41 | |
42 | impl fmt::Display for DataRequest<'_> { |
43 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
44 | fmt::Display::fmt(&self.locale, f) |
45 | } |
46 | } |
47 | |
48 | /// Metadata for data requests. This is currently empty, but it may be extended with options |
49 | /// for tuning locale fallback, buffer layout, and so forth. |
50 | #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] |
51 | #[non_exhaustive] |
52 | pub struct DataRequestMetadata { |
53 | /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks. |
54 | pub silent: bool, |
55 | } |
56 | |
57 | /// A locale type optimized for use in fallbacking and the ICU4X data pipeline. |
58 | /// |
59 | /// [`DataLocale`] contains less functionality than [`Locale`] but more than |
60 | /// [`LanguageIdentifier`] for better size and performance while still meeting |
61 | /// the needs of the ICU4X data pipeline. |
62 | /// |
63 | /// # Examples |
64 | /// |
65 | /// Convert a [`Locale`] to a [`DataLocale`] and back: |
66 | /// |
67 | /// ``` |
68 | /// use icu_locid::locale; |
69 | /// use icu_provider::DataLocale; |
70 | /// |
71 | /// let locale = locale!("en-u-ca-buddhist"); |
72 | /// let data_locale = DataLocale::from(locale); |
73 | /// let locale = data_locale.into_locale(); |
74 | /// |
75 | /// assert_eq!(locale, locale!("en-u-ca-buddhist")); |
76 | /// ``` |
77 | /// |
78 | /// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more |
79 | /// efficient than cloning the [`Locale`], but less efficient than converting an owned |
80 | /// [`Locale`]: |
81 | /// |
82 | /// ``` |
83 | /// use icu_locid::locale; |
84 | /// use icu_provider::DataLocale; |
85 | /// |
86 | /// let locale1 = locale!("en-u-ca-buddhist"); |
87 | /// let data_locale = DataLocale::from(&locale1); |
88 | /// let locale2 = data_locale.into_locale(); |
89 | /// |
90 | /// assert_eq!(locale1, locale2); |
91 | /// ``` |
92 | /// |
93 | /// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: |
94 | /// |
95 | /// ``` |
96 | /// use icu_locid::langid; |
97 | /// use icu_provider::DataLocale; |
98 | /// |
99 | /// let langid = langid!("es-CA-valencia"); |
100 | /// let data_locale = DataLocale::from(langid); |
101 | /// let langid = data_locale.get_langid(); |
102 | /// |
103 | /// assert_eq!(langid, langid!("es-CA-valencia")); |
104 | /// ``` |
105 | /// |
106 | /// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data |
107 | /// lookup and fallback. This may change in the future. |
108 | /// |
109 | /// ``` |
110 | /// use icu_locid::{locale, Locale}; |
111 | /// use icu_provider::DataLocale; |
112 | /// |
113 | /// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" |
114 | /// .parse::<Locale>() |
115 | /// .unwrap(); |
116 | /// let data_locale = DataLocale::from(locale); |
117 | /// |
118 | /// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist")); |
119 | /// ``` |
120 | #[derive(PartialEq, Clone, Default, Eq, Hash)] |
121 | pub struct DataLocale { |
122 | langid: LanguageIdentifier, |
123 | keywords: unicode_ext::Keywords, |
124 | #[cfg(feature = "experimental")] |
125 | aux: Option<AuxiliaryKeys>, |
126 | } |
127 | |
128 | impl<'a> Default for &'a DataLocale { |
129 | fn default() -> Self { |
130 | static DEFAULT: DataLocale = DataLocale { |
131 | langid: LanguageIdentifier::UND, |
132 | keywords: unicode_ext::Keywords::new(), |
133 | #[cfg(feature = "experimental")] |
134 | aux: None, |
135 | }; |
136 | &DEFAULT |
137 | } |
138 | } |
139 | |
140 | impl fmt::Debug for DataLocale { |
141 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
142 | write!(f, "DataLocale{{{self}}} ") |
143 | } |
144 | } |
145 | |
146 | impl Writeable for DataLocale { |
147 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
148 | self.langid.write_to(sink)?; |
149 | if !self.keywords.is_empty() { |
150 | sink.write_str("-u-")?; |
151 | self.keywords.write_to(sink)?; |
152 | } |
153 | #[cfg(feature = "experimental")] |
154 | if let Some(aux) = self.aux.as_ref() { |
155 | sink.write_str("-x-")?; |
156 | aux.write_to(sink)?; |
157 | } |
158 | Ok(()) |
159 | } |
160 | |
161 | fn writeable_length_hint(&self) -> LengthHint { |
162 | let mut length_hint = self.langid.writeable_length_hint(); |
163 | if !self.keywords.is_empty() { |
164 | length_hint += self.keywords.writeable_length_hint() + 3; |
165 | } |
166 | #[cfg(feature = "experimental")] |
167 | if let Some(aux) = self.aux.as_ref() { |
168 | length_hint += aux.writeable_length_hint() + 3; |
169 | } |
170 | length_hint |
171 | } |
172 | |
173 | fn write_to_string(&self) -> alloc::borrow::Cow<str> { |
174 | #[cfg_attr(not(feature = "experimental"), allow(unused_mut))] |
175 | let mut is_only_langid = self.keywords.is_empty(); |
176 | #[cfg(feature = "experimental")] |
177 | { |
178 | is_only_langid = is_only_langid && self.aux.is_none(); |
179 | } |
180 | if is_only_langid { |
181 | return self.langid.write_to_string(); |
182 | } |
183 | let mut string = |
184 | alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); |
185 | let _ = self.write_to(&mut string); |
186 | alloc::borrow::Cow::Owned(string) |
187 | } |
188 | } |
189 | |
190 | writeable::impl_display_with_writeable!(DataLocale); |
191 | |
192 | impl From<LanguageIdentifier> for DataLocale { |
193 | fn from(langid: LanguageIdentifier) -> Self { |
194 | Self { |
195 | langid, |
196 | keywords: unicode_ext::Keywords::new(), |
197 | #[cfg(feature = "experimental")] |
198 | aux: None, |
199 | } |
200 | } |
201 | } |
202 | |
203 | impl From<Locale> for DataLocale { |
204 | fn from(locale: Locale) -> Self { |
205 | Self { |
206 | langid: locale.id, |
207 | keywords: locale.extensions.unicode.keywords, |
208 | #[cfg(feature = "experimental")] |
209 | aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), |
210 | } |
211 | } |
212 | } |
213 | |
214 | impl From<&LanguageIdentifier> for DataLocale { |
215 | fn from(langid: &LanguageIdentifier) -> Self { |
216 | Self { |
217 | langid: langid.clone(), |
218 | keywords: unicode_ext::Keywords::new(), |
219 | #[cfg(feature = "experimental")] |
220 | aux: None, |
221 | } |
222 | } |
223 | } |
224 | |
225 | impl From<&Locale> for DataLocale { |
226 | fn from(locale: &Locale) -> Self { |
227 | Self { |
228 | langid: locale.id.clone(), |
229 | keywords: locale.extensions.unicode.keywords.clone(), |
230 | #[cfg(feature = "experimental")] |
231 | aux: AuxiliaryKeys::try_from_iter(locale.extensions.private.iter().copied()).ok(), |
232 | } |
233 | } |
234 | } |
235 | |
236 | impl FromStr for DataLocale { |
237 | type Err = DataError; |
238 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
239 | let locale: Locale = Locale::from_str(s).map_err(|e: ParserError| { |
240 | DataErrorKindDataError::KeyLocaleSyntax |
241 | .into_error() |
242 | .with_display_context(s) |
243 | .with_display_context(&e) |
244 | })?; |
245 | Ok(DataLocale::from(locale)) |
246 | } |
247 | } |
248 | |
249 | impl DataLocale { |
250 | /// Compare this [`DataLocale`] with BCP-47 bytes. |
251 | /// |
252 | /// The return value is equivalent to what would happen if you first converted this |
253 | /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. |
254 | /// |
255 | /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
256 | /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. |
257 | /// |
258 | /// # Examples |
259 | /// |
260 | /// ``` |
261 | /// use icu_provider::DataLocale; |
262 | /// use std::cmp::Ordering; |
263 | /// |
264 | /// let bcp47_strings: &[&str] = &[ |
265 | /// "ca", |
266 | /// "ca-ES", |
267 | /// "ca-ES-u-ca-buddhist", |
268 | /// "ca-ES-valencia", |
269 | /// "ca-ES-x-gbp", |
270 | /// "ca-ES-x-gbp-short", |
271 | /// "ca-ES-x-usd", |
272 | /// "ca-ES-xyzabc", |
273 | /// "ca-x-eur", |
274 | /// "cat", |
275 | /// "pl-Latn-PL", |
276 | /// "und", |
277 | /// "und-fonipa", |
278 | /// "und-u-ca-hebrew", |
279 | /// "und-u-ca-japanese", |
280 | /// "und-x-mxn", |
281 | /// "zh", |
282 | /// ]; |
283 | /// |
284 | /// for ab in bcp47_strings.windows(2) { |
285 | /// let a = ab[0]; |
286 | /// let b = ab[1]; |
287 | /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b); |
288 | /// let a_loc: DataLocale = a.parse().unwrap(); |
289 | /// assert_eq!( |
290 | /// a_loc.strict_cmp(a.as_bytes()), |
291 | /// Ordering::Equal, |
292 | /// "strict_cmp: {} == {}", |
293 | /// a_loc, |
294 | /// a |
295 | /// ); |
296 | /// assert_eq!( |
297 | /// a_loc.strict_cmp(b.as_bytes()), |
298 | /// Ordering::Less, |
299 | /// "strict_cmp: {} < {}", |
300 | /// a_loc, |
301 | /// b |
302 | /// ); |
303 | /// let b_loc: DataLocale = b.parse().unwrap(); |
304 | /// assert_eq!( |
305 | /// b_loc.strict_cmp(b.as_bytes()), |
306 | /// Ordering::Equal, |
307 | /// "strict_cmp: {} == {}", |
308 | /// b_loc, |
309 | /// b |
310 | /// ); |
311 | /// assert_eq!( |
312 | /// b_loc.strict_cmp(a.as_bytes()), |
313 | /// Ordering::Greater, |
314 | /// "strict_cmp: {} > {}", |
315 | /// b_loc, |
316 | /// a |
317 | /// ); |
318 | /// } |
319 | /// ``` |
320 | /// |
321 | /// Comparison against invalid strings: |
322 | /// |
323 | /// ``` |
324 | /// use icu_provider::DataLocale; |
325 | /// |
326 | /// let invalid_strings: &[&str] = &[ |
327 | /// // Less than "ca-ES" |
328 | /// "CA", |
329 | /// "ar-x-gbp-FOO", |
330 | /// // Greater than "ca-ES-x-gbp" |
331 | /// "ca_ES", |
332 | /// "ca-ES-x-gbp-FOO", |
333 | /// ]; |
334 | /// |
335 | /// let data_locale = "ca-ES-x-gbp".parse::<DataLocale>().unwrap(); |
336 | /// |
337 | /// for s in invalid_strings.iter() { |
338 | /// let expected_ordering = "ca-ES-x-gbp".cmp(s); |
339 | /// let actual_ordering = data_locale.strict_cmp(s.as_bytes()); |
340 | /// assert_eq!(expected_ordering, actual_ordering, "{}", s); |
341 | /// } |
342 | /// ``` |
343 | pub fn strict_cmp(&self, other: &[u8]) -> Ordering { |
344 | self.writeable_cmp_bytes(other) |
345 | } |
346 | } |
347 | |
348 | impl DataLocale { |
349 | /// Returns whether this [`DataLocale`] has all empty fields (no components). |
350 | /// |
351 | /// See also: |
352 | /// |
353 | /// - [`DataLocale::is_und()`] |
354 | /// - [`DataLocale::is_langid_und()`] |
355 | /// |
356 | /// # Examples |
357 | /// |
358 | /// ``` |
359 | /// use icu_provider::DataLocale; |
360 | /// |
361 | /// assert!("und".parse::<DataLocale>().unwrap().is_empty()); |
362 | /// assert!(!"und-u-ca-buddhist" |
363 | /// .parse::<DataLocale>() |
364 | /// .unwrap() |
365 | /// .is_empty()); |
366 | /// assert!(!"und-x-aux".parse::<DataLocale>().unwrap().is_empty()); |
367 | /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_empty()); |
368 | /// ``` |
369 | pub fn is_empty(&self) -> bool { |
370 | self == <&DataLocale>::default() |
371 | } |
372 | |
373 | /// Returns an ordering suitable for use in [`BTreeSet`]. |
374 | /// |
375 | /// The ordering may or may not be equivalent to string ordering, and it |
376 | /// may or may not be stable across ICU4X releases. |
377 | /// |
378 | /// [`BTreeSet`]: alloc::collections::BTreeSet |
379 | pub fn total_cmp(&self, other: &Self) -> Ordering { |
380 | self.langid |
381 | .total_cmp(&other.langid) |
382 | .then_with(|| self.keywords.cmp(&other.keywords)) |
383 | .then_with(|| { |
384 | #[cfg(feature = "experimental")] |
385 | return self.aux.cmp(&other.aux); |
386 | #[cfg(not(feature = "experimental"))] |
387 | return Ordering::Equal; |
388 | }) |
389 | } |
390 | |
391 | /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. |
392 | /// |
393 | /// This ignores auxiliary keys. |
394 | /// |
395 | /// See also: |
396 | /// |
397 | /// - [`DataLocale::is_empty()`] |
398 | /// - [`DataLocale::is_langid_und()`] |
399 | /// |
400 | /// # Examples |
401 | /// |
402 | /// ``` |
403 | /// use icu_provider::DataLocale; |
404 | /// |
405 | /// assert!("und".parse::<DataLocale>().unwrap().is_und()); |
406 | /// assert!(!"und-u-ca-buddhist".parse::<DataLocale>().unwrap().is_und()); |
407 | /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_und()); |
408 | /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_und()); |
409 | /// ``` |
410 | pub fn is_und(&self) -> bool { |
411 | self.langid == LanguageIdentifier::UND && self.keywords.is_empty() |
412 | } |
413 | |
414 | /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. |
415 | /// |
416 | /// This ignores extension keywords and auxiliary keys. |
417 | /// |
418 | /// See also: |
419 | /// |
420 | /// - [`DataLocale::is_empty()`] |
421 | /// - [`DataLocale::is_und()`] |
422 | /// |
423 | /// # Examples |
424 | /// |
425 | /// ``` |
426 | /// use icu_provider::DataLocale; |
427 | /// |
428 | /// assert!("und".parse::<DataLocale>().unwrap().is_langid_und()); |
429 | /// assert!("und-u-ca-buddhist" |
430 | /// .parse::<DataLocale>() |
431 | /// .unwrap() |
432 | /// .is_langid_und()); |
433 | /// assert!("und-x-aux".parse::<DataLocale>().unwrap().is_langid_und()); |
434 | /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_langid_und()); |
435 | /// ``` |
436 | pub fn is_langid_und(&self) -> bool { |
437 | self.langid == LanguageIdentifier::UND |
438 | } |
439 | |
440 | /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. |
441 | /// |
442 | /// This may allocate memory if there are variant subtags. If you need only the language, |
443 | /// script, and/or region subtag, use the specific getters for those subtags: |
444 | /// |
445 | /// - [`DataLocale::language()`] |
446 | /// - [`DataLocale::script()`] |
447 | /// - [`DataLocale::region()`] |
448 | /// |
449 | /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] |
450 | /// and then access the `id` field. |
451 | /// |
452 | /// # Examples |
453 | /// |
454 | /// ``` |
455 | /// use icu_locid::langid; |
456 | /// use icu_provider::prelude::*; |
457 | /// |
458 | /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1"); |
459 | /// |
460 | /// let req_no_langid = DataRequest { |
461 | /// locale: &Default::default(), |
462 | /// metadata: Default::default(), |
463 | /// }; |
464 | /// |
465 | /// let req_with_langid = DataRequest { |
466 | /// locale: &langid!("ar-EG").into(), |
467 | /// metadata: Default::default(), |
468 | /// }; |
469 | /// |
470 | /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und")); |
471 | /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG")); |
472 | /// ``` |
473 | pub fn get_langid(&self) -> LanguageIdentifier { |
474 | self.langid.clone() |
475 | } |
476 | |
477 | /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. |
478 | #[inline] |
479 | pub fn set_langid(&mut self, lid: LanguageIdentifier) { |
480 | self.langid = lid; |
481 | } |
482 | |
483 | /// Converts this [`DataLocale`] into a [`Locale`]. |
484 | /// |
485 | /// See also [`DataLocale::get_langid()`]. |
486 | /// |
487 | /// # Examples |
488 | /// |
489 | /// ``` |
490 | /// use icu_locid::{ |
491 | /// langid, locale, |
492 | /// subtags::{language, region}, |
493 | /// }; |
494 | /// use icu_provider::prelude::*; |
495 | /// |
496 | /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into(); |
497 | /// |
498 | /// assert_eq!(locale.get_langid(), langid!("it-IT")); |
499 | /// assert_eq!(locale.language(), language!("it")); |
500 | /// assert_eq!(locale.script(), None); |
501 | /// assert_eq!(locale.region(), Some(region!("IT"))); |
502 | /// |
503 | /// let locale = locale.into_locale(); |
504 | /// assert_eq!(locale, locale!("it-IT-u-ca-coptic")); |
505 | /// ``` |
506 | /// |
507 | /// Auxiliary keys are retained: |
508 | /// |
509 | /// ``` |
510 | /// use icu_provider::prelude::*; |
511 | /// use writeable::assert_writeable_eq; |
512 | /// |
513 | /// let data_locale: DataLocale = "und-u-nu-arab-x-gbp".parse().unwrap(); |
514 | /// assert_writeable_eq!(data_locale, "und-u-nu-arab-x-gbp"); |
515 | /// |
516 | /// let recovered_locale = data_locale.into_locale(); |
517 | /// assert_writeable_eq!(recovered_locale, "und-u-nu-arab-x-gbp"); |
518 | /// ``` |
519 | pub fn into_locale(self) -> Locale { |
520 | let mut loc = Locale { |
521 | id: self.langid, |
522 | ..Default::default() |
523 | }; |
524 | loc.extensions.unicode.keywords = self.keywords; |
525 | #[cfg(feature = "experimental")] |
526 | if let Some(aux) = self.aux { |
527 | loc.extensions.private = |
528 | icu_locid::extensions::private::Private::from_vec_unchecked(aux.iter().collect()); |
529 | } |
530 | loc |
531 | } |
532 | |
533 | /// Returns the [`Language`] for this [`DataLocale`]. |
534 | #[inline] |
535 | pub fn language(&self) -> Language { |
536 | self.langid.language |
537 | } |
538 | |
539 | /// Returns the [`Language`] for this [`DataLocale`]. |
540 | #[inline] |
541 | pub fn set_language(&mut self, language: Language) { |
542 | self.langid.language = language; |
543 | } |
544 | |
545 | /// Returns the [`Script`] for this [`DataLocale`]. |
546 | #[inline] |
547 | pub fn script(&self) -> Option<Script> { |
548 | self.langid.script |
549 | } |
550 | |
551 | /// Sets the [`Script`] for this [`DataLocale`]. |
552 | #[inline] |
553 | pub fn set_script(&mut self, script: Option<Script>) { |
554 | self.langid.script = script; |
555 | } |
556 | |
557 | /// Returns the [`Region`] for this [`DataLocale`]. |
558 | #[inline] |
559 | pub fn region(&self) -> Option<Region> { |
560 | self.langid.region |
561 | } |
562 | |
563 | /// Sets the [`Region`] for this [`DataLocale`]. |
564 | #[inline] |
565 | pub fn set_region(&mut self, region: Option<Region>) { |
566 | self.langid.region = region; |
567 | } |
568 | |
569 | /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`]. |
570 | #[inline] |
571 | pub fn has_variants(&self) -> bool { |
572 | !self.langid.variants.is_empty() |
573 | } |
574 | |
575 | /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously. |
576 | #[inline] |
577 | pub fn set_variants(&mut self, variants: Variants) { |
578 | self.langid.variants = variants; |
579 | } |
580 | |
581 | /// Removes all [`Variant`] subtags in this [`DataLocale`]. |
582 | #[inline] |
583 | pub fn clear_variants(&mut self) -> Variants { |
584 | self.langid.variants.clear() |
585 | } |
586 | |
587 | /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`]. |
588 | #[inline] |
589 | pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { |
590 | self.keywords.get(key).cloned() |
591 | } |
592 | |
593 | /// Returns whether there are any Unicode extension keywords in this [`DataLocale`]. |
594 | #[inline] |
595 | pub fn has_unicode_ext(&self) -> bool { |
596 | !self.keywords.is_empty() |
597 | } |
598 | |
599 | /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`]. |
600 | #[inline] |
601 | pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool { |
602 | self.keywords.contains_key(key) |
603 | } |
604 | |
605 | /// Returns whether this [`DataLocale`] contains a Unicode extension keyword |
606 | /// with the specified key and value. |
607 | /// |
608 | /// # Examples |
609 | /// |
610 | /// ``` |
611 | /// use icu_locid::extensions::unicode::{key, value}; |
612 | /// use icu_provider::prelude::*; |
613 | /// |
614 | /// let locale: DataLocale = "it-IT-u-ca-coptic".parse().expect( "Valid BCP-47"); |
615 | /// |
616 | /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None); |
617 | /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!( "coptic"))); |
618 | /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!( "coptic"),)); |
619 | /// ``` |
620 | #[inline] |
621 | pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool { |
622 | self.keywords.get(key) == Some(value) |
623 | } |
624 | |
625 | /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`]. |
626 | #[inline] |
627 | pub fn set_unicode_ext( |
628 | &mut self, |
629 | key: unicode_ext::Key, |
630 | value: unicode_ext::Value, |
631 | ) -> Option<unicode_ext::Value> { |
632 | self.keywords.set(key, value) |
633 | } |
634 | |
635 | /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning |
636 | /// the value if it was present. |
637 | #[inline] |
638 | pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { |
639 | self.keywords.remove(key) |
640 | } |
641 | |
642 | /// Retains a subset of keywords as specified by the predicate function. |
643 | #[inline] |
644 | pub fn retain_unicode_ext<F>(&mut self, predicate: F) |
645 | where |
646 | F: FnMut(&unicode_ext::Key) -> bool, |
647 | { |
648 | self.keywords.retain_by_key(predicate) |
649 | } |
650 | |
651 | /// Gets the auxiliary key for this [`DataLocale`]. |
652 | /// |
653 | /// For more information and examples, see [`AuxiliaryKeys`]. |
654 | #[cfg(feature = "experimental")] |
655 | pub fn get_aux(&self) -> Option<&AuxiliaryKeys> { |
656 | self.aux.as_ref() |
657 | } |
658 | |
659 | /// Returns whether this [`DataLocale`] has an auxiliary key. |
660 | /// |
661 | /// For more information and examples, see [`AuxiliaryKeys`]. |
662 | #[cfg(feature = "experimental")] |
663 | pub fn has_aux(&self) -> bool { |
664 | self.aux.is_some() |
665 | } |
666 | |
667 | /// Sets an auxiliary key on this [`DataLocale`]. |
668 | /// |
669 | /// Returns the previous auxiliary key if present. |
670 | /// |
671 | /// For more information and examples, see [`AuxiliaryKeys`]. |
672 | #[cfg(feature = "experimental")] |
673 | pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> { |
674 | self.aux.replace(value) |
675 | } |
676 | |
677 | /// Remove an auxiliary key, if present. Returns the removed auxiliary key. |
678 | /// |
679 | /// # Examples |
680 | /// |
681 | /// ``` |
682 | /// use icu_locid::langid; |
683 | /// use icu_provider::prelude::*; |
684 | /// use writeable::assert_writeable_eq; |
685 | /// |
686 | /// let mut data_locale: DataLocale = langid!("ar-EG").into(); |
687 | /// let aux = "gbp" |
688 | /// .parse::<AuxiliaryKeys>() |
689 | /// .expect("contains valid characters"); |
690 | /// data_locale.set_aux(aux); |
691 | /// assert_writeable_eq!(data_locale, "ar-EG-x-gbp"); |
692 | /// |
693 | /// let maybe_aux = data_locale.remove_aux(); |
694 | /// assert_writeable_eq!(data_locale, "ar-EG"); |
695 | /// assert_writeable_eq!(maybe_aux.unwrap(), "gbp"); |
696 | /// ``` |
697 | #[cfg(feature = "experimental")] |
698 | pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> { |
699 | self.aux.take() |
700 | } |
701 | } |
702 | |
703 | /// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary |
704 | /// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`]. |
705 | /// |
706 | /// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary |
707 | /// keys are stored as private use subtags following `-x-`. |
708 | /// |
709 | /// An auxiliary key currently allows 1-8 lowercase alphanumerics. |
710 | /// |
711 | /// <div class="stab unstable"> |
712 | /// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways, |
713 | /// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature |
714 | /// of the `icu_provider` crate. Use with caution. |
715 | /// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a> |
716 | /// </div> |
717 | /// |
718 | /// # Examples |
719 | /// |
720 | /// ``` |
721 | /// use icu_locid::langid; |
722 | /// use icu_provider::prelude::*; |
723 | /// use writeable::assert_writeable_eq; |
724 | /// |
725 | /// let mut data_locale: DataLocale = langid!("ar-EG").into(); |
726 | /// assert_writeable_eq!(data_locale, "ar-EG"); |
727 | /// assert!(!data_locale.has_aux()); |
728 | /// assert_eq!(data_locale.get_aux(), None); |
729 | /// |
730 | /// let aux = "gbp" |
731 | /// .parse::<AuxiliaryKeys>() |
732 | /// .expect("contains valid characters"); |
733 | /// |
734 | /// data_locale.set_aux(aux); |
735 | /// assert_writeable_eq!(data_locale, "ar-EG-x-gbp"); |
736 | /// assert!(data_locale.has_aux()); |
737 | /// assert_eq!(data_locale.get_aux(), Some(&"gbp".parse().unwrap())); |
738 | /// ``` |
739 | /// |
740 | /// Multiple auxiliary keys are allowed: |
741 | /// |
742 | /// ``` |
743 | /// use icu_provider::prelude::*; |
744 | /// use writeable::assert_writeable_eq; |
745 | /// |
746 | /// let data_locale = "ar-EG-x-gbp-long".parse::<DataLocale>().unwrap(); |
747 | /// assert_writeable_eq!(data_locale, "ar-EG-x-gbp-long"); |
748 | /// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2); |
749 | /// ``` |
750 | /// |
751 | /// Not all strings are valid auxiliary keys. |
752 | /// The string must be well-formed and case-normalized: |
753 | /// |
754 | /// ``` |
755 | /// use icu_provider::prelude::*; |
756 | /// |
757 | /// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok()); |
758 | /// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok()); |
759 | /// |
760 | /// assert!("".parse::<AuxiliaryKeys>().is_err()); |
761 | /// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err()); |
762 | /// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err()); |
763 | /// assert!("ABC123".parse::<AuxiliaryKeys>().is_err()); |
764 | /// ``` |
765 | /// |
766 | /// [`Keywords`]: unicode_ext::Keywords |
767 | #[derive(Debug, PartialEq, Clone, Eq, Hash, PartialOrd, Ord)] |
768 | #[cfg(feature = "experimental")] |
769 | pub struct AuxiliaryKeys { |
770 | value: AuxiliaryKeysInner, |
771 | } |
772 | |
773 | #[cfg(feature = "experimental")] |
774 | #[derive(Clone)] |
775 | enum AuxiliaryKeysInner { |
776 | Boxed(alloc::boxed::Box<str>), |
777 | Stack(TinyAsciiStr<23>), |
778 | // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")` |
779 | // Static(&'static str), |
780 | } |
781 | |
782 | #[cfg(feature = "experimental")] |
783 | impl Deref for AuxiliaryKeysInner { |
784 | type Target = str; |
785 | #[inline] |
786 | fn deref(&self) -> &Self::Target { |
787 | match self { |
788 | Self::Boxed(s) => s.deref(), |
789 | Self::Stack(s) => s.as_str(), |
790 | } |
791 | } |
792 | } |
793 | |
794 | #[cfg(feature = "experimental")] |
795 | impl PartialEq for AuxiliaryKeysInner { |
796 | #[inline] |
797 | fn eq(&self, other: &Self) -> bool { |
798 | self.deref() == other.deref() |
799 | } |
800 | } |
801 | |
802 | #[cfg(feature = "experimental")] |
803 | impl Eq for AuxiliaryKeysInner {} |
804 | |
805 | #[cfg(feature = "experimental")] |
806 | impl PartialOrd for AuxiliaryKeysInner { |
807 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
808 | Some(self.cmp(other)) |
809 | } |
810 | } |
811 | |
812 | #[cfg(feature = "experimental")] |
813 | impl Ord for AuxiliaryKeysInner { |
814 | fn cmp(&self, other: &Self) -> Ordering { |
815 | self.deref().cmp(other.deref()) |
816 | } |
817 | } |
818 | |
819 | #[cfg(feature = "experimental")] |
820 | impl Debug for AuxiliaryKeysInner { |
821 | #[inline] |
822 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
823 | self.deref().fmt(f) |
824 | } |
825 | } |
826 | |
827 | #[cfg(feature = "experimental")] |
828 | impl Hash for AuxiliaryKeysInner { |
829 | #[inline] |
830 | fn hash<H: core::hash::Hasher>(&self, state: &mut H) { |
831 | self.deref().hash(state) |
832 | } |
833 | } |
834 | |
835 | #[cfg(feature = "experimental")] |
836 | writeable::impl_display_with_writeable!(AuxiliaryKeys); |
837 | |
838 | #[cfg(feature = "experimental")] |
839 | impl Writeable for AuxiliaryKeys { |
840 | fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result { |
841 | self.value.write_to(sink) |
842 | } |
843 | fn writeable_length_hint(&self) -> LengthHint { |
844 | self.value.writeable_length_hint() |
845 | } |
846 | fn write_to_string(&self) -> alloc::borrow::Cow<str> { |
847 | self.value.write_to_string() |
848 | } |
849 | } |
850 | |
851 | #[cfg(feature = "experimental")] |
852 | impl FromStr for AuxiliaryKeys { |
853 | type Err = DataError; |
854 | |
855 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
856 | if !s.is_empty() |
857 | && s.split(Self::separator()).all(|b| { |
858 | if let Ok(subtag) = Subtag::from_str(b) { |
859 | // Enforces normalization: |
860 | b == subtag.as_str() |
861 | } else { |
862 | false |
863 | } |
864 | }) |
865 | { |
866 | if s.len() <= 23 { |
867 | #[allow(clippy::unwrap_used)] // we just checked that the string is ascii |
868 | Ok(Self { |
869 | value: AuxiliaryKeysInner::Stack(s.parse().unwrap()), |
870 | }) |
871 | } else { |
872 | Ok(Self { |
873 | value: AuxiliaryKeysInner::Boxed(s.into()), |
874 | }) |
875 | } |
876 | } else { |
877 | Err(DataErrorKind::KeyLocaleSyntax |
878 | .into_error() |
879 | .with_display_context(s)) |
880 | } |
881 | } |
882 | } |
883 | |
884 | #[cfg(feature = "experimental")] |
885 | impl AuxiliaryKeys { |
886 | /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys. |
887 | /// |
888 | /// # Examples |
889 | /// |
890 | /// ``` |
891 | /// use icu_locid::extensions::private::subtag; |
892 | /// use icu_provider::prelude::*; |
893 | /// |
894 | /// // Single auxiliary key: |
895 | /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc")]).unwrap(); |
896 | /// let b = "abc".parse::<AuxiliaryKeys>().unwrap(); |
897 | /// assert_eq!(a, b); |
898 | /// |
899 | /// // Multiple auxiliary keys: |
900 | /// let a = AuxiliaryKeys::try_from_iter([subtag!("abc"), subtag!("defg")]) |
901 | /// .unwrap(); |
902 | /// let b = "abc-defg".parse::<AuxiliaryKeys>().unwrap(); |
903 | /// assert_eq!(a, b); |
904 | /// ``` |
905 | /// |
906 | /// The iterator can't be empty: |
907 | /// |
908 | /// ``` |
909 | /// use icu_provider::prelude::*; |
910 | /// |
911 | /// assert!(AuxiliaryKeys::try_from_iter([]).is_err()); |
912 | /// ``` |
913 | pub fn try_from_iter(iter: impl IntoIterator<Item = Subtag>) -> Result<Self, DataError> { |
914 | // TODO: Avoid the allocation when possible |
915 | let mut builder = String::new(); |
916 | for item in iter { |
917 | if !builder.is_empty() { |
918 | builder.push(AuxiliaryKeys::separator()); |
919 | } |
920 | builder.push_str(item.as_str()) |
921 | } |
922 | if builder.is_empty() { |
923 | return Err(DataErrorKind::KeyLocaleSyntax.with_str_context("empty aux iterator")); |
924 | } |
925 | if builder.len() <= 23 { |
926 | #[allow(clippy::unwrap_used)] // we just checked that the string is ascii |
927 | Ok(Self { |
928 | value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()), |
929 | }) |
930 | } else { |
931 | Ok(Self { |
932 | value: AuxiliaryKeysInner::Boxed(builder.into()), |
933 | }) |
934 | } |
935 | } |
936 | |
937 | /// Creates an [`AuxiliaryKeys`] from a single subtag. |
938 | /// |
939 | /// # Examples |
940 | /// |
941 | /// ``` |
942 | /// use icu_locid::extensions::private::subtag; |
943 | /// use icu_provider::prelude::*; |
944 | /// |
945 | /// // Single auxiliary key: |
946 | /// let a = AuxiliaryKeys::from_subtag(subtag!("abc")); |
947 | /// let b = "abc".parse::<AuxiliaryKeys>().unwrap(); |
948 | /// assert_eq!(a, b); |
949 | /// ``` |
950 | pub const fn from_subtag(input: Subtag) -> Self { |
951 | Self { |
952 | value: AuxiliaryKeysInner::Stack(input.into_tinystr().resize()), |
953 | } |
954 | } |
955 | |
956 | /// Iterates over the components of the auxiliary key. |
957 | /// |
958 | /// # Example |
959 | /// |
960 | /// ``` |
961 | /// use icu_locid::extensions::private::subtag; |
962 | /// use icu_provider::AuxiliaryKeys; |
963 | /// |
964 | /// let aux: AuxiliaryKeys = "abc-defg".parse().unwrap(); |
965 | /// assert_eq!( |
966 | /// aux.iter().collect::<Vec<_>>(), |
967 | /// vec![subtag!("abc"), subtag!("defg")] |
968 | /// ); |
969 | /// ``` |
970 | pub fn iter(&self) -> impl Iterator<Item = Subtag> + '_ { |
971 | self.value |
972 | .split(Self::separator()) |
973 | .filter_map(|x| match x.parse() { |
974 | Ok(x) => Some(x), |
975 | Err(_) => { |
976 | debug_assert!(false, "failed to convert to subtag: {x}"); |
977 | None |
978 | } |
979 | }) |
980 | } |
981 | |
982 | /// Returns the internal separator byte used for auxiliary keys in data locales. |
983 | /// |
984 | /// This is, according to BCP-47, an ASCII hyphen. |
985 | #[inline] |
986 | pub(crate) const fn separator() -> char { |
987 | '-' |
988 | } |
989 | } |
990 | |
991 | #[cfg(feature = "experimental")] |
992 | impl From<Subtag> for AuxiliaryKeys { |
993 | fn from(subtag: Subtag) -> Self { |
994 | #[allow(clippy::expect_used)] // subtags definitely fit within auxiliary keys |
995 | Self { |
996 | value: AuxiliaryKeysInner::Stack( |
997 | TinyAsciiStr::from_bytes(subtag.as_str().as_bytes()) |
998 | .expect("Subtags are capped to 8 elements, AuxiliaryKeys supports up to 23"), |
999 | ), |
1000 | } |
1001 | } |
1002 | } |
1003 | |
1004 | #[test] |
1005 | fn test_data_locale_to_string() { |
1006 | struct TestCase { |
1007 | pub locale: &'static str, |
1008 | pub aux: Option<&'static str>, |
1009 | pub expected: &'static str, |
1010 | } |
1011 | |
1012 | for cas in [ |
1013 | TestCase { |
1014 | locale: "und", |
1015 | aux: None, |
1016 | expected: "und", |
1017 | }, |
1018 | TestCase { |
1019 | locale: "und-u-cu-gbp", |
1020 | aux: None, |
1021 | expected: "und-u-cu-gbp", |
1022 | }, |
1023 | TestCase { |
1024 | locale: "en-ZA-u-cu-gbp", |
1025 | aux: None, |
1026 | expected: "en-ZA-u-cu-gbp", |
1027 | }, |
1028 | #[cfg(feature = "experimental")] |
1029 | TestCase { |
1030 | locale: "en-ZA-u-nu-arab", |
1031 | aux: Some("gbp"), |
1032 | expected: "en-ZA-u-nu-arab-x-gbp", |
1033 | }, |
1034 | ] { |
1035 | let mut locale = cas.locale.parse::<DataLocale>().unwrap(); |
1036 | #[cfg(feature = "experimental")] |
1037 | if let Some(aux) = cas.aux { |
1038 | locale.set_aux(aux.parse().unwrap()); |
1039 | } |
1040 | writeable::assert_writeable_eq!(locale, cas.expected); |
1041 | } |
1042 | } |
1043 | |
1044 | #[test] |
1045 | fn test_data_locale_from_string() { |
1046 | #[derive(Debug)] |
1047 | struct TestCase { |
1048 | pub input: &'static str, |
1049 | pub success: bool, |
1050 | } |
1051 | |
1052 | for cas in [ |
1053 | TestCase { |
1054 | input: "und", |
1055 | success: true, |
1056 | }, |
1057 | TestCase { |
1058 | input: "und-u-cu-gbp", |
1059 | success: true, |
1060 | }, |
1061 | TestCase { |
1062 | input: "en-ZA-u-cu-gbp", |
1063 | success: true, |
1064 | }, |
1065 | TestCase { |
1066 | input: "en...", |
1067 | success: false, |
1068 | }, |
1069 | #[cfg(feature = "experimental")] |
1070 | TestCase { |
1071 | input: "en-ZA-u-nu-arab-x-gbp", |
1072 | success: true, |
1073 | }, |
1074 | #[cfg(not(feature = "experimental"))] |
1075 | TestCase { |
1076 | input: "en-ZA-u-nu-arab-x-gbp", |
1077 | success: false, |
1078 | }, |
1079 | ] { |
1080 | let data_locale = match (DataLocale::from_str(cas.input), cas.success) { |
1081 | (Ok(l), true) => l, |
1082 | (Err(_), false) => { |
1083 | continue; |
1084 | } |
1085 | (Ok(_), false) => { |
1086 | panic!("DataLocale parsed but it was supposed to fail: {cas:?}"); |
1087 | } |
1088 | (Err(_), true) => { |
1089 | panic!("DataLocale was supposed to parse but it failed: {cas:?}"); |
1090 | } |
1091 | }; |
1092 | writeable::assert_writeable_eq!(data_locale, cas.input); |
1093 | } |
1094 | } |
1095 |
Definitions
- DataRequest
- locale
- metadata
- fmt
- DataRequestMetadata
- silent
- DataLocale
- langid
- keywords
- default
- fmt
- write_to
- writeable_length_hint
- write_to_string
- from
- from
- from
- from
- Err
- from_str
- strict_cmp
- is_empty
- total_cmp
- is_und
- is_langid_und
- get_langid
- set_langid
- into_locale
- language
- set_language
- script
- set_script
- region
- set_region
- has_variants
- set_variants
- clear_variants
- get_unicode_ext
- has_unicode_ext
- contains_unicode_ext
- matches_unicode_ext
- set_unicode_ext
- remove_unicode_ext
Learn Rust with the experts
Find out more