1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::{DataError, DataErrorKind};
6use core::cmp::Ordering;
7use core::default::Default;
8use core::fmt;
9use core::fmt::Debug;
10use core::hash::Hash;
11use core::str::FromStr;
12use icu_locid::extensions::unicode as unicode_ext;
13use icu_locid::subtags::{Language, Region, Script, Variants};
14use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult};
15use writeable::{LengthHint, Writeable};
16
17#[cfg(feature = "experimental")]
18use alloc::string::String;
19#[cfg(feature = "experimental")]
20use core::ops::Deref;
21#[cfg(feature = "experimental")]
22use tinystr::TinyAsciiStr;
23
24#[cfg(doc)]
25use icu_locid::subtags::Variant;
26
27const AUXILIARY_KEY_SEPARATOR: u8 = b'+';
28
29/// The request type passed into all data provider implementations.
30#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
31#[allow(clippy::exhaustive_structs)] // this type is stable
32pub struct DataRequest<'a> {
33 /// The locale for which to load data.
34 ///
35 /// If locale fallback is enabled, the resulting data may be from a different locale
36 /// than the one requested here.
37 pub locale: &'a DataLocale,
38 /// Metadata that may affect the behavior of the data provider.
39 pub metadata: DataRequestMetadata,
40}
41
42impl fmt::Display for DataRequest<'_> {
43 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
44 fmt::Display::fmt(&self.locale, f)
45 }
46}
47
48/// Metadata for data requests. This is currently empty, but it may be extended with options
49/// for tuning locale fallback, buffer layout, and so forth.
50#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
51#[non_exhaustive]
52pub struct DataRequestMetadata {
53 /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks.
54 pub silent: bool,
55}
56
57/// A locale type optimized for use in fallbacking and the ICU4X data pipeline.
58///
59/// [`DataLocale`] contains less functionality than [`Locale`] but more than
60/// [`LanguageIdentifier`] for better size and performance while still meeting
61/// the needs of the ICU4X data pipeline.
62///
63/// # Examples
64///
65/// Convert a [`Locale`] to a [`DataLocale`] and back:
66///
67/// ```
68/// use icu_locid::locale;
69/// use icu_provider::DataLocale;
70///
71/// let locale = locale!("en-u-ca-buddhist");
72/// let data_locale = DataLocale::from(locale);
73/// let locale = data_locale.into_locale();
74///
75/// assert_eq!(locale, locale!("en-u-ca-buddhist"));
76/// ```
77///
78/// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more
79/// efficient than cloning the [`Locale`], but less efficient than converting an owned
80/// [`Locale`]:
81///
82/// ```
83/// use icu_locid::locale;
84/// use icu_provider::DataLocale;
85///
86/// let locale1 = locale!("en-u-ca-buddhist");
87/// let data_locale = DataLocale::from(&locale1);
88/// let locale2 = data_locale.into_locale();
89///
90/// assert_eq!(locale1, locale2);
91/// ```
92///
93/// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]:
94///
95/// ```
96/// use icu_locid::langid;
97/// use icu_provider::DataLocale;
98///
99/// let langid = langid!("es-CA-valencia");
100/// let data_locale = DataLocale::from(langid);
101/// let langid = data_locale.get_langid();
102///
103/// assert_eq!(langid, langid!("es-CA-valencia"));
104/// ```
105///
106/// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data
107/// lookup and fallback. This may change in the future.
108///
109/// ```
110/// use icu_locid::{locale, Locale};
111/// use icu_provider::DataLocale;
112///
113/// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist"
114/// .parse::<Locale>()
115/// .unwrap();
116/// let data_locale = DataLocale::from(locale);
117///
118/// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist"));
119/// ```
120#[derive(PartialEq, Clone, Default, Eq, Hash)]
121pub struct DataLocale {
122 langid: LanguageIdentifier,
123 keywords: unicode_ext::Keywords,
124 #[cfg(feature = "experimental")]
125 aux: Option<AuxiliaryKeys>,
126}
127
128impl<'a> Default for &'a DataLocale {
129 fn default() -> Self {
130 static DEFAULT: DataLocale = DataLocale {
131 langid: LanguageIdentifier::UND,
132 keywords: unicode_ext::Keywords::new(),
133 #[cfg(feature = "experimental")]
134 aux: None,
135 };
136 &DEFAULT
137 }
138}
139
140impl fmt::Debug for DataLocale {
141 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
142 write!(f, "DataLocale{{{self}}}")
143 }
144}
145
146impl Writeable for DataLocale {
147 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
148 self.langid.write_to(sink)?;
149 if !self.keywords.is_empty() {
150 sink.write_str("-u-")?;
151 self.keywords.write_to(sink)?;
152 }
153 #[cfg(feature = "experimental")]
154 if let Some(aux) = self.aux.as_ref() {
155 sink.write_char(AuxiliaryKeys::separator() as char)?;
156 aux.write_to(sink)?;
157 }
158 Ok(())
159 }
160
161 fn writeable_length_hint(&self) -> LengthHint {
162 let mut length_hint = self.langid.writeable_length_hint();
163 if !self.keywords.is_empty() {
164 length_hint += self.keywords.writeable_length_hint() + 3;
165 }
166 #[cfg(feature = "experimental")]
167 if let Some(aux) = self.aux.as_ref() {
168 length_hint += aux.writeable_length_hint() + 1;
169 }
170 length_hint
171 }
172
173 fn write_to_string(&self) -> alloc::borrow::Cow<str> {
174 #[cfg_attr(not(feature = "experimental"), allow(unused_mut))]
175 let mut is_only_langid = self.keywords.is_empty();
176 #[cfg(feature = "experimental")]
177 {
178 is_only_langid = is_only_langid && self.aux.is_none();
179 }
180 if is_only_langid {
181 return self.langid.write_to_string();
182 }
183 let mut string =
184 alloc::string::String::with_capacity(self.writeable_length_hint().capacity());
185 let _ = self.write_to(&mut string);
186 alloc::borrow::Cow::Owned(string)
187 }
188}
189
190writeable::impl_display_with_writeable!(DataLocale);
191
192impl From<LanguageIdentifier> for DataLocale {
193 fn from(langid: LanguageIdentifier) -> Self {
194 Self {
195 langid,
196 keywords: unicode_ext::Keywords::new(),
197 #[cfg(feature = "experimental")]
198 aux: None,
199 }
200 }
201}
202
203impl From<Locale> for DataLocale {
204 fn from(locale: Locale) -> Self {
205 Self {
206 langid: locale.id,
207 keywords: locale.extensions.unicode.keywords,
208 #[cfg(feature = "experimental")]
209 aux: None,
210 }
211 }
212}
213
214impl From<&LanguageIdentifier> for DataLocale {
215 fn from(langid: &LanguageIdentifier) -> Self {
216 Self {
217 langid: langid.clone(),
218 keywords: unicode_ext::Keywords::new(),
219 #[cfg(feature = "experimental")]
220 aux: None,
221 }
222 }
223}
224
225impl From<&Locale> for DataLocale {
226 fn from(locale: &Locale) -> Self {
227 Self {
228 langid: locale.id.clone(),
229 keywords: locale.extensions.unicode.keywords.clone(),
230 #[cfg(feature = "experimental")]
231 aux: None,
232 }
233 }
234}
235
236impl FromStr for DataLocale {
237 type Err = DataError;
238 fn from_str(s: &str) -> Result<Self, Self::Err> {
239 let mut aux_iter = s.splitn(2, AUXILIARY_KEY_SEPARATOR as char);
240 let Some(locale_str) = aux_iter.next() else {
241 return Err(DataErrorKind::KeyLocaleSyntax
242 .into_error()
243 .with_display_context(s));
244 };
245 let locale = Locale::from_str(locale_str).map_err(|e| {
246 DataErrorKind::KeyLocaleSyntax
247 .into_error()
248 .with_display_context(s)
249 .with_display_context(&e)
250 })?;
251 #[cfg_attr(not(feature = "experimental"), allow(unused_mut))]
252 let mut data_locale = DataLocale::from(locale);
253 #[cfg(feature = "experimental")]
254 if let Some(aux_str) = aux_iter.next() {
255 let aux = AuxiliaryKeys::from_str(aux_str)?;
256 data_locale.set_aux(aux);
257 }
258 if aux_iter.next().is_some() {
259 return Err(DataErrorKind::KeyLocaleSyntax
260 .into_error()
261 .with_display_context(s));
262 }
263 Ok(data_locale)
264 }
265}
266
267impl DataLocale {
268 /// Compare this [`DataLocale`] with BCP-47 bytes.
269 ///
270 /// The return value is equivalent to what would happen if you first converted this
271 /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison.
272 ///
273 /// This function is case-sensitive and results in a *total order*, so it is appropriate for
274 /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
275 ///
276 /// # Examples
277 ///
278 /// ```
279 /// use icu_locid::Locale;
280 /// use icu_provider::DataLocale;
281 /// use std::cmp::Ordering;
282 ///
283 /// let bcp47_strings: &[&str] = &[
284 /// "ca",
285 /// "ca+EUR",
286 /// "ca-ES",
287 /// "ca-ES+GBP",
288 /// "ca-ES+GBP+short",
289 /// "ca-ES+USD",
290 /// "ca-ES-u-ca-buddhist",
291 /// "ca-ES-valencia",
292 /// "cat",
293 /// "pl-Latn-PL",
294 /// "und",
295 /// "und+MXN",
296 /// "und-fonipa",
297 /// "und-u-ca-hebrew",
298 /// "und-u-ca-japanese",
299 /// "zh",
300 /// ];
301 ///
302 /// for ab in bcp47_strings.windows(2) {
303 /// let a = ab[0];
304 /// let b = ab[1];
305 /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}", a, b);
306 /// let a_loc: DataLocale = a.parse().unwrap();
307 /// assert_eq!(
308 /// a_loc.strict_cmp(a.as_bytes()),
309 /// Ordering::Equal,
310 /// "strict_cmp: {} == {}",
311 /// a_loc,
312 /// a
313 /// );
314 /// assert_eq!(
315 /// a_loc.strict_cmp(b.as_bytes()),
316 /// Ordering::Less,
317 /// "strict_cmp: {} < {}",
318 /// a_loc,
319 /// b
320 /// );
321 /// let b_loc: DataLocale = b.parse().unwrap();
322 /// assert_eq!(
323 /// b_loc.strict_cmp(b.as_bytes()),
324 /// Ordering::Equal,
325 /// "strict_cmp: {} == {}",
326 /// b_loc,
327 /// b
328 /// );
329 /// assert_eq!(
330 /// b_loc.strict_cmp(a.as_bytes()),
331 /// Ordering::Greater,
332 /// "strict_cmp: {} > {}",
333 /// b_loc,
334 /// a
335 /// );
336 /// }
337 /// ```
338 ///
339 /// Comparison against invalid strings:
340 ///
341 /// ```
342 /// use icu_provider::DataLocale;
343 ///
344 /// let invalid_strings: &[&str] = &[
345 /// // Less than "ca-ES"
346 /// "CA",
347 /// "ar+GBP+FOO",
348 /// // Greater than "ca-ES+GBP"
349 /// "ca_ES",
350 /// "ca-ES+GBP+FOO",
351 /// ];
352 ///
353 /// let data_locale = "ca-ES+GBP".parse::<DataLocale>().unwrap();
354 ///
355 /// for s in invalid_strings.iter() {
356 /// let expected_ordering = "ca-ES+GBP".cmp(s);
357 /// let actual_ordering = data_locale.strict_cmp(s.as_bytes());
358 /// assert_eq!(expected_ordering, actual_ordering, "{}", s);
359 /// }
360 /// ```
361 pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
362 let mut aux_iter = other.splitn(2, |b| *b == AUXILIARY_KEY_SEPARATOR);
363 let Some(locale_str) = aux_iter.next() else {
364 debug_assert!(other.is_empty());
365 return Ordering::Greater;
366 };
367 let aux_str = aux_iter.next();
368 let subtags = locale_str.split(|b| *b == b'-');
369 let mut subtag_result = self.langid.strict_cmp_iter(subtags);
370 if self.has_unicode_ext() {
371 let mut subtags = match subtag_result {
372 SubtagOrderingResult::Subtags(s) => s,
373 SubtagOrderingResult::Ordering(o) => return o,
374 };
375 match subtags.next() {
376 Some(b"u") => (),
377 Some(s) => return s.cmp(b"u").reverse(),
378 None => return Ordering::Greater,
379 }
380 subtag_result = self.keywords.strict_cmp_iter(subtags);
381 }
382 let has_more_subtags = match subtag_result {
383 SubtagOrderingResult::Subtags(mut s) => s.next().is_some(),
384 SubtagOrderingResult::Ordering(o) => return o,
385 };
386 // If we get here, `self` has equal or fewer subtags than the `other`.
387 // There are 2^3 = 8 cases to handle for auxiliary keys, expanded below.
388 match (has_more_subtags, self.get_aux(), aux_str) {
389 (false, None, None) => {
390 // foo == foo
391 Ordering::Equal
392 }
393 (false, Some(self_aux), Some(other_aux)) => {
394 // foo+BAR1 ?= foo+BAR2
395 let aux_ordering = self_aux.as_bytes().cmp(other_aux);
396 if aux_ordering != Ordering::Equal {
397 return aux_ordering;
398 }
399 Ordering::Equal
400 }
401 (false, Some(_), None) => {
402 // foo+BAR > foo
403 Ordering::Greater
404 }
405 (_, _, _) => {
406 // foo < foo-bar
407 // foo < foo-bar+BAR
408 // foo < foo+BAR
409 // foo+BAR < foo-bar
410 // foo+BAR < foo-bar+BAR
411 Ordering::Less
412 }
413 }
414 }
415}
416
417impl DataLocale {
418 /// Returns whether this [`DataLocale`] has all empty fields (no components).
419 ///
420 /// See also:
421 ///
422 /// - [`DataLocale::is_und()`]
423 /// - [`DataLocale::is_langid_und()`]
424 ///
425 /// # Examples
426 ///
427 /// ```
428 /// use icu_provider::DataLocale;
429 ///
430 /// assert!("und".parse::<DataLocale>().unwrap().is_empty());
431 /// assert!(!"und-u-ca-buddhist"
432 /// .parse::<DataLocale>()
433 /// .unwrap()
434 /// .is_empty());
435 /// assert!(!"und+auxiliary".parse::<DataLocale>().unwrap().is_empty());
436 /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_empty());
437 /// ```
438 pub fn is_empty(&self) -> bool {
439 self == <&DataLocale>::default()
440 }
441
442 /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion.
443 ///
444 /// This ignores auxiliary keys.
445 ///
446 /// See also:
447 ///
448 /// - [`DataLocale::is_empty()`]
449 /// - [`DataLocale::is_langid_und()`]
450 ///
451 /// # Examples
452 ///
453 /// ```
454 /// use icu_provider::DataLocale;
455 ///
456 /// assert!("und".parse::<DataLocale>().unwrap().is_und());
457 /// assert!(!"und-u-ca-buddhist".parse::<DataLocale>().unwrap().is_und());
458 /// assert!("und+auxiliary".parse::<DataLocale>().unwrap().is_und());
459 /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_und());
460 /// ```
461 pub fn is_und(&self) -> bool {
462 self.langid == LanguageIdentifier::UND && self.keywords.is_empty()
463 }
464
465 /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`.
466 ///
467 /// This ignores extension keywords and auxiliary keys.
468 ///
469 /// See also:
470 ///
471 /// - [`DataLocale::is_empty()`]
472 /// - [`DataLocale::is_und()`]
473 ///
474 /// # Examples
475 ///
476 /// ```
477 /// use icu_provider::DataLocale;
478 ///
479 /// assert!("und".parse::<DataLocale>().unwrap().is_langid_und());
480 /// assert!("und-u-ca-buddhist"
481 /// .parse::<DataLocale>()
482 /// .unwrap()
483 /// .is_langid_und());
484 /// assert!("und+auxiliary"
485 /// .parse::<DataLocale>()
486 /// .unwrap()
487 /// .is_langid_und());
488 /// assert!(!"ca-ES".parse::<DataLocale>().unwrap().is_langid_und());
489 /// ```
490 pub fn is_langid_und(&self) -> bool {
491 self.langid == LanguageIdentifier::UND
492 }
493
494 /// Gets the [`LanguageIdentifier`] for this [`DataLocale`].
495 ///
496 /// This may allocate memory if there are variant subtags. If you need only the language,
497 /// script, and/or region subtag, use the specific getters for those subtags:
498 ///
499 /// - [`DataLocale::language()`]
500 /// - [`DataLocale::script()`]
501 /// - [`DataLocale::region()`]
502 ///
503 /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`]
504 /// and then access the `id` field.
505 ///
506 /// # Examples
507 ///
508 /// ```
509 /// use icu_locid::langid;
510 /// use icu_provider::prelude::*;
511 ///
512 /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1");
513 ///
514 /// let req_no_langid = DataRequest {
515 /// locale: &Default::default(),
516 /// metadata: Default::default(),
517 /// };
518 ///
519 /// let req_with_langid = DataRequest {
520 /// locale: &langid!("ar-EG").into(),
521 /// metadata: Default::default(),
522 /// };
523 ///
524 /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und"));
525 /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG"));
526 /// ```
527 pub fn get_langid(&self) -> LanguageIdentifier {
528 self.langid.clone()
529 }
530
531 /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`].
532 #[inline]
533 pub fn set_langid(&mut self, lid: LanguageIdentifier) {
534 self.langid = lid;
535 }
536
537 /// Converts this [`DataLocale`] into a [`Locale`].
538 ///
539 /// See also [`DataLocale::get_langid()`].
540 ///
541 /// # Examples
542 ///
543 /// ```
544 /// use icu_locid::{
545 /// langid, locale,
546 /// subtags::{language, region},
547 /// Locale,
548 /// };
549 /// use icu_provider::prelude::*;
550 ///
551 /// let locale: DataLocale = locale!("it-IT-u-ca-coptic").into();
552 ///
553 /// assert_eq!(locale.get_langid(), langid!("it-IT"));
554 /// assert_eq!(locale.language(), language!("it"));
555 /// assert_eq!(locale.script(), None);
556 /// assert_eq!(locale.region(), Some(region!("IT")));
557 ///
558 /// let locale = locale.into_locale();
559 /// assert_eq!(locale, locale!("it-IT-u-ca-coptic"));
560 /// ```
561 pub fn into_locale(self) -> Locale {
562 let mut loc = Locale {
563 id: self.langid,
564 ..Default::default()
565 };
566 loc.extensions.unicode.keywords = self.keywords;
567 loc
568 }
569
570 /// Returns the [`Language`] for this [`DataLocale`].
571 #[inline]
572 pub fn language(&self) -> Language {
573 self.langid.language
574 }
575
576 /// Returns the [`Language`] for this [`DataLocale`].
577 #[inline]
578 pub fn set_language(&mut self, language: Language) {
579 self.langid.language = language;
580 }
581
582 /// Returns the [`Script`] for this [`DataLocale`].
583 #[inline]
584 pub fn script(&self) -> Option<Script> {
585 self.langid.script
586 }
587
588 /// Sets the [`Script`] for this [`DataLocale`].
589 #[inline]
590 pub fn set_script(&mut self, script: Option<Script>) {
591 self.langid.script = script;
592 }
593
594 /// Returns the [`Region`] for this [`DataLocale`].
595 #[inline]
596 pub fn region(&self) -> Option<Region> {
597 self.langid.region
598 }
599
600 /// Sets the [`Region`] for this [`DataLocale`].
601 #[inline]
602 pub fn set_region(&mut self, region: Option<Region>) {
603 self.langid.region = region;
604 }
605
606 /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`].
607 #[inline]
608 pub fn has_variants(&self) -> bool {
609 !self.langid.variants.is_empty()
610 }
611
612 /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously.
613 #[inline]
614 pub fn set_variants(&mut self, variants: Variants) {
615 self.langid.variants = variants;
616 }
617
618 /// Removes all [`Variant`] subtags in this [`DataLocale`].
619 #[inline]
620 pub fn clear_variants(&mut self) -> Variants {
621 self.langid.variants.clear()
622 }
623
624 /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`].
625 #[inline]
626 pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
627 self.keywords.get(key).cloned()
628 }
629
630 /// Returns whether there are any Unicode extension keywords in this [`DataLocale`].
631 #[inline]
632 pub fn has_unicode_ext(&self) -> bool {
633 !self.keywords.is_empty()
634 }
635
636 /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`].
637 #[inline]
638 pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool {
639 self.keywords.contains_key(key)
640 }
641
642 /// Returns whether this [`DataLocale`] contains a Unicode extension keyword
643 /// with the specified key and value.
644 ///
645 /// # Examples
646 ///
647 /// ```
648 /// use icu_locid::{
649 /// extensions::unicode::{key, value},
650 /// Locale,
651 /// };
652 /// use icu_provider::prelude::*;
653 ///
654 /// let locale: Locale = "it-IT-u-ca-coptic".parse().expect("Valid BCP-47");
655 /// let locale: DataLocale = locale.into();
656 ///
657 /// assert_eq!(locale.get_unicode_ext(&key!("hc")), None);
658 /// assert_eq!(locale.get_unicode_ext(&key!("ca")), Some(value!("coptic")));
659 /// assert!(locale.matches_unicode_ext(&key!("ca"), &value!("coptic"),));
660 /// ```
661 #[inline]
662 pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool {
663 self.keywords.get(key) == Some(value)
664 }
665
666 /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`].
667 #[inline]
668 pub fn set_unicode_ext(
669 &mut self,
670 key: unicode_ext::Key,
671 value: unicode_ext::Value,
672 ) -> Option<unicode_ext::Value> {
673 self.keywords.set(key, value)
674 }
675
676 /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning
677 /// the value if it was present.
678 #[inline]
679 pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> {
680 self.keywords.remove(key)
681 }
682
683 /// Retains a subset of keywords as specified by the predicate function.
684 #[inline]
685 pub fn retain_unicode_ext<F>(&mut self, predicate: F)
686 where
687 F: FnMut(&unicode_ext::Key) -> bool,
688 {
689 self.keywords.retain_by_key(predicate)
690 }
691
692 /// Gets the auxiliary key for this [`DataLocale`].
693 ///
694 /// For more information and examples, see [`AuxiliaryKeys`].
695 #[cfg(feature = "experimental")]
696 pub fn get_aux(&self) -> Option<&AuxiliaryKeys> {
697 self.aux.as_ref()
698 }
699
700 #[cfg(not(feature = "experimental"))]
701 pub(crate) fn get_aux(&self) -> Option<&str> {
702 None
703 }
704
705 /// Returns whether this [`DataLocale`] has an auxiliary key.
706 ///
707 /// For more information and examples, see [`AuxiliaryKeys`].
708 #[cfg(feature = "experimental")]
709 pub fn has_aux(&self) -> bool {
710 self.aux.is_some()
711 }
712
713 /// Sets an auxiliary key on this [`DataLocale`].
714 ///
715 /// Returns the previous auxiliary key if present.
716 ///
717 /// For more information and examples, see [`AuxiliaryKeys`].
718 #[cfg(feature = "experimental")]
719 pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> {
720 self.aux.replace(value)
721 }
722
723 /// Remove an auxiliary key, if present. Returns the removed auxiliary key.
724 ///
725 /// # Examples
726 ///
727 /// ```
728 /// use icu_locid::locale;
729 /// use icu_provider::prelude::*;
730 /// use writeable::assert_writeable_eq;
731 ///
732 /// let mut data_locale: DataLocale = locale!("ar-EG").into();
733 /// let aux = "GBP"
734 /// .parse::<AuxiliaryKeys>()
735 /// .expect("contains valid characters");
736 /// data_locale.set_aux(aux);
737 /// assert_writeable_eq!(data_locale, "ar-EG+GBP");
738 ///
739 /// let maybe_aux = data_locale.remove_aux();
740 /// assert_writeable_eq!(data_locale, "ar-EG");
741 /// assert_writeable_eq!(maybe_aux.unwrap(), "GBP");
742 /// ```
743 #[cfg(feature = "experimental")]
744 pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> {
745 self.aux.take()
746 }
747}
748
749/// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary
750/// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`].
751///
752/// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary
753/// keys are separated from the BCP-47 locale and from each other with the character returned by
754/// [`AuxiliaryKeys::separator()`].
755///
756/// An auxiliary key currently allows alphanumerics and `-`.
757///
758/// <div class="stab unstable">
759/// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways,
760/// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature
761/// of the `icu_provider` crate. Use with caution.
762/// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a>
763/// </div>
764///
765/// # Examples
766///
767/// ```
768/// use icu_locid::locale;
769/// use icu_provider::prelude::*;
770/// use writeable::assert_writeable_eq;
771///
772/// let mut data_locale: DataLocale = locale!("ar-EG").into();
773/// assert_writeable_eq!(data_locale, "ar-EG");
774/// assert!(!data_locale.has_aux());
775/// assert_eq!(data_locale.get_aux(), None);
776///
777/// let aux = "GBP"
778/// .parse::<AuxiliaryKeys>()
779/// .expect("contains valid characters");
780///
781/// data_locale.set_aux(aux);
782/// assert_writeable_eq!(data_locale, "ar-EG+GBP");
783/// assert!(data_locale.has_aux());
784/// assert_eq!(data_locale.get_aux(), Some(&"GBP".parse().unwrap()));
785/// ```
786///
787/// Multiple auxiliary keys are allowed:
788///
789/// ```
790/// use icu_locid::locale;
791/// use icu_provider::prelude::*;
792/// use writeable::assert_writeable_eq;
793///
794/// let data_locale = "ar-EG+GBP+long".parse::<DataLocale>().unwrap();
795/// assert_writeable_eq!(data_locale, "ar-EG+GBP+long");
796/// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2);
797/// ```
798///
799/// Not all strings are valid auxiliary keys:
800///
801/// ```
802/// use icu_provider::prelude::*;
803///
804/// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok());
805/// assert!("ABC123".parse::<AuxiliaryKeys>().is_ok());
806/// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok());
807///
808/// assert!("".parse::<AuxiliaryKeys>().is_err());
809/// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err());
810/// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err());
811/// ```
812///
813/// [`Keywords`]: unicode_ext::Keywords
814#[derive(Debug, PartialEq, Clone, Eq, Hash)]
815#[cfg(feature = "experimental")]
816pub struct AuxiliaryKeys {
817 // DISCUSS: SmallStr? TinyStrAuto?
818 // DISCUSS: Make this a dynamically sized type so references can be taken?
819 value: AuxiliaryKeysInner,
820}
821
822#[cfg(feature = "experimental")]
823#[derive(Clone)]
824enum AuxiliaryKeysInner {
825 Boxed(alloc::boxed::Box<str>),
826 Stack(TinyAsciiStr<23>),
827 // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")`
828 // Static(&'static str),
829}
830
831#[cfg(feature = "experimental")]
832impl Deref for AuxiliaryKeysInner {
833 type Target = str;
834 #[inline]
835 fn deref(&self) -> &Self::Target {
836 match self {
837 Self::Boxed(s) => s.deref(),
838 Self::Stack(s) => s.as_str(),
839 }
840 }
841}
842
843#[cfg(feature = "experimental")]
844impl PartialEq for AuxiliaryKeysInner {
845 #[inline]
846 fn eq(&self, other: &Self) -> bool {
847 self.deref() == other.deref()
848 }
849}
850
851#[cfg(feature = "experimental")]
852impl Eq for AuxiliaryKeysInner {}
853
854#[cfg(feature = "experimental")]
855impl Debug for AuxiliaryKeysInner {
856 #[inline]
857 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
858 self.deref().fmt(f)
859 }
860}
861
862#[cfg(feature = "experimental")]
863impl Hash for AuxiliaryKeysInner {
864 #[inline]
865 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
866 self.deref().hash(state)
867 }
868}
869
870#[cfg(feature = "experimental")]
871writeable::impl_display_with_writeable!(AuxiliaryKeys);
872
873#[cfg(feature = "experimental")]
874impl Writeable for AuxiliaryKeys {
875 fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
876 self.value.write_to(sink)
877 }
878 fn writeable_length_hint(&self) -> LengthHint {
879 self.value.writeable_length_hint()
880 }
881 fn write_to_string(&self) -> alloc::borrow::Cow<str> {
882 self.value.write_to_string()
883 }
884}
885
886#[cfg(feature = "experimental")]
887impl FromStr for AuxiliaryKeys {
888 type Err = DataError;
889
890 fn from_str(s: &str) -> Result<Self, Self::Err> {
891 Self::try_from_str(s)
892 }
893}
894
895#[cfg(feature = "experimental")]
896impl AuxiliaryKeys {
897 /// Returns this [`AuxiliaryKeys`] as a single byte slice.
898 ///
899 /// NOTE: Do not make this public because we might not always store these in a single string.
900 /// External clients who need this can use `<Self as Writeable>::write_to_string`.
901 #[inline]
902 pub(crate) fn as_bytes(&self) -> &[u8] {
903 self.value.as_bytes()
904 }
905
906 /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys.
907 ///
908 /// # Examples
909 ///
910 /// ```
911 /// use icu_provider::prelude::*;
912 ///
913 /// // Single auxiliary key:
914 /// let a = AuxiliaryKeys::try_from_iter(["abc"]).unwrap();
915 /// let b = "abc".parse::<AuxiliaryKeys>().unwrap();
916 /// assert_eq!(a, b);
917 ///
918 /// // Multiple auxiliary keys:
919 /// let a = AuxiliaryKeys::try_from_iter(["abc", "defg"]).unwrap();
920 /// let b = "abc+defg".parse::<AuxiliaryKeys>().unwrap();
921 /// assert_eq!(a, b);
922 /// ```
923 ///
924 /// Don't include the auxiliary key separator or other invalid chars in the iterator strings:
925 ///
926 /// ```
927 /// use icu_provider::prelude::*;
928 ///
929 /// assert!(AuxiliaryKeys::try_from_iter(["abc+defg"]).is_err());
930 /// assert!(AuxiliaryKeys::try_from_iter(["AB$C"]).is_err());
931 /// ```
932 pub fn try_from_iter<'a>(iter: impl IntoIterator<Item = &'a str>) -> Result<Self, DataError> {
933 // TODO: Avoid the allocation when possible
934 let mut builder = String::new();
935 for item in iter {
936 if !item.is_empty()
937 && item
938 .bytes()
939 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-'))
940 {
941 if !builder.is_empty() {
942 builder.push(AuxiliaryKeys::separator() as char);
943 }
944 builder.push_str(item)
945 } else {
946 return Err(DataErrorKind::KeyLocaleSyntax
947 .into_error()
948 .with_display_context(item));
949 }
950 }
951 if builder.len() <= 23 {
952 #[allow(clippy::unwrap_used)] // we just checked that the string is ascii
953 Ok(Self {
954 value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()),
955 })
956 } else {
957 Ok(Self {
958 value: AuxiliaryKeysInner::Boxed(builder.into()),
959 })
960 }
961 }
962
963 pub(crate) fn try_from_str(s: &str) -> Result<Self, DataError> {
964 if !s.is_empty()
965 && s.bytes()
966 .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'+'))
967 {
968 if s.len() <= 23 {
969 #[allow(clippy::unwrap_used)] // we just checked that the string is ascii
970 Ok(Self {
971 value: AuxiliaryKeysInner::Stack(s.parse().unwrap()),
972 })
973 } else {
974 Ok(Self {
975 value: AuxiliaryKeysInner::Boxed(s.into()),
976 })
977 }
978 } else {
979 Err(DataErrorKind::KeyLocaleSyntax
980 .into_error()
981 .with_display_context(s))
982 }
983 }
984
985 /// Iterates over the components of the auxiliary key.
986 ///
987 /// # Example
988 ///
989 /// ```
990 /// use icu_provider::AuxiliaryKeys;
991 ///
992 /// let aux: AuxiliaryKeys = "abc+defg".parse().unwrap();
993 /// assert_eq!(aux.iter().collect::<Vec<_>>(), vec!["abc", "defg"]);
994 /// ```
995 pub fn iter(&self) -> impl Iterator<Item = &str> + '_ {
996 self.value.split(Self::separator() as char)
997 }
998
999 /// Returns the separator byte used for auxiliary keys in data locales.
1000 ///
1001 /// # Examples
1002 ///
1003 /// ```
1004 /// use icu_provider::AuxiliaryKeys;
1005 ///
1006 /// assert_eq!(AuxiliaryKeys::separator(), b'+');
1007 /// ```
1008 #[inline]
1009 pub const fn separator() -> u8 {
1010 AUXILIARY_KEY_SEPARATOR
1011 }
1012}
1013
1014#[test]
1015fn test_data_locale_to_string() {
1016 use icu_locid::locale;
1017
1018 struct TestCase {
1019 pub locale: Locale,
1020 pub aux: Option<&'static str>,
1021 pub expected: &'static str,
1022 }
1023
1024 for cas in [
1025 TestCase {
1026 locale: Locale::UND,
1027 aux: None,
1028 expected: "und",
1029 },
1030 TestCase {
1031 locale: locale!("und-u-cu-gbp"),
1032 aux: None,
1033 expected: "und-u-cu-gbp",
1034 },
1035 TestCase {
1036 locale: locale!("en-ZA-u-cu-gbp"),
1037 aux: None,
1038 expected: "en-ZA-u-cu-gbp",
1039 },
1040 #[cfg(feature = "experimental")]
1041 TestCase {
1042 locale: locale!("en-ZA-u-nu-arab"),
1043 aux: Some("GBP"),
1044 expected: "en-ZA-u-nu-arab+GBP",
1045 },
1046 ] {
1047 let mut data_locale = DataLocale::from(cas.locale);
1048 #[cfg(feature = "experimental")]
1049 if let Some(aux) = cas.aux {
1050 data_locale.set_aux(aux.parse().unwrap());
1051 }
1052 writeable::assert_writeable_eq!(data_locale, cas.expected);
1053 }
1054}
1055
1056#[test]
1057fn test_data_locale_from_string() {
1058 #[derive(Debug)]
1059 struct TestCase {
1060 pub input: &'static str,
1061 pub success: bool,
1062 }
1063
1064 for cas in [
1065 TestCase {
1066 input: "und",
1067 success: true,
1068 },
1069 TestCase {
1070 input: "und-u-cu-gbp",
1071 success: true,
1072 },
1073 TestCase {
1074 input: "en-ZA-u-cu-gbp",
1075 success: true,
1076 },
1077 TestCase {
1078 input: "en...",
1079 success: false,
1080 },
1081 #[cfg(feature = "experimental")]
1082 TestCase {
1083 input: "en-ZA-u-nu-arab+GBP",
1084 success: true,
1085 },
1086 #[cfg(not(feature = "experimental"))]
1087 TestCase {
1088 input: "en-ZA-u-nu-arab+GBP",
1089 success: false,
1090 },
1091 ] {
1092 let data_locale = match (DataLocale::from_str(cas.input), cas.success) {
1093 (Ok(l), true) => l,
1094 (Err(_), false) => {
1095 continue;
1096 }
1097 (Ok(_), false) => {
1098 panic!("DataLocale parsed but it was supposed to fail: {cas:?}");
1099 }
1100 (Err(_), true) => {
1101 panic!("DataLocale was supposed to parse but it failed: {cas:?}");
1102 }
1103 };
1104 writeable::assert_writeable_eq!(data_locale, cas.input);
1105 }
1106}
1107