1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use crate::{DataError, DataErrorKind}; |
6 | use core::cmp::Ordering; |
7 | use core::default::Default; |
8 | use core::fmt; |
9 | use core::fmt::Debug; |
10 | use core::hash::Hash; |
11 | use core::str::FromStr; |
12 | use icu_locid::extensions::unicode as unicode_ext; |
13 | use icu_locid::subtags::{Language, Region, Script, Variants}; |
14 | use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult}; |
15 | use writeable::{LengthHint, Writeable}; |
16 | |
17 | #[cfg (feature = "experimental" )] |
18 | use alloc::string::String; |
19 | #[cfg (feature = "experimental" )] |
20 | use core::ops::Deref; |
21 | #[cfg (feature = "experimental" )] |
22 | use tinystr::TinyAsciiStr; |
23 | |
24 | #[cfg (doc)] |
25 | use icu_locid::subtags::Variant; |
26 | |
27 | const AUXILIARY_KEY_SEPARATOR: u8 = b'+' ; |
28 | |
29 | /// The request type passed into all data provider implementations. |
30 | #[derive (Default, Debug, Clone, Copy, PartialEq, Eq)] |
31 | #[allow (clippy::exhaustive_structs)] // this type is stable |
32 | pub struct DataRequest<'a> { |
33 | /// The locale for which to load data. |
34 | /// |
35 | /// If locale fallback is enabled, the resulting data may be from a different locale |
36 | /// than the one requested here. |
37 | pub locale: &'a DataLocale, |
38 | /// Metadata that may affect the behavior of the data provider. |
39 | pub metadata: DataRequestMetadata, |
40 | } |
41 | |
42 | impl fmt::Display for DataRequest<'_> { |
43 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
44 | fmt::Display::fmt(&self.locale, f) |
45 | } |
46 | } |
47 | |
48 | /// Metadata for data requests. This is currently empty, but it may be extended with options |
49 | /// for tuning locale fallback, buffer layout, and so forth. |
50 | #[derive (Default, Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] |
51 | #[non_exhaustive ] |
52 | pub struct DataRequestMetadata { |
53 | /// Silent requests do not log errors. This can be used for exploratory querying, such as fallbacks. |
54 | pub silent: bool, |
55 | } |
56 | |
57 | /// A locale type optimized for use in fallbacking and the ICU4X data pipeline. |
58 | /// |
59 | /// [`DataLocale`] contains less functionality than [`Locale`] but more than |
60 | /// [`LanguageIdentifier`] for better size and performance while still meeting |
61 | /// the needs of the ICU4X data pipeline. |
62 | /// |
63 | /// # Examples |
64 | /// |
65 | /// Convert a [`Locale`] to a [`DataLocale`] and back: |
66 | /// |
67 | /// ``` |
68 | /// use icu_locid::locale; |
69 | /// use icu_provider::DataLocale; |
70 | /// |
71 | /// let locale = locale!("en-u-ca-buddhist" ); |
72 | /// let data_locale = DataLocale::from(locale); |
73 | /// let locale = data_locale.into_locale(); |
74 | /// |
75 | /// assert_eq!(locale, locale!("en-u-ca-buddhist" )); |
76 | /// ``` |
77 | /// |
78 | /// You can alternatively create a [`DataLocale`] from a borrowed [`Locale`], which is more |
79 | /// efficient than cloning the [`Locale`], but less efficient than converting an owned |
80 | /// [`Locale`]: |
81 | /// |
82 | /// ``` |
83 | /// use icu_locid::locale; |
84 | /// use icu_provider::DataLocale; |
85 | /// |
86 | /// let locale1 = locale!("en-u-ca-buddhist" ); |
87 | /// let data_locale = DataLocale::from(&locale1); |
88 | /// let locale2 = data_locale.into_locale(); |
89 | /// |
90 | /// assert_eq!(locale1, locale2); |
91 | /// ``` |
92 | /// |
93 | /// If you are sure that you have no Unicode keywords, start with [`LanguageIdentifier`]: |
94 | /// |
95 | /// ``` |
96 | /// use icu_locid::langid; |
97 | /// use icu_provider::DataLocale; |
98 | /// |
99 | /// let langid = langid!("es-CA-valencia" ); |
100 | /// let data_locale = DataLocale::from(langid); |
101 | /// let langid = data_locale.get_langid(); |
102 | /// |
103 | /// assert_eq!(langid, langid!("es-CA-valencia" )); |
104 | /// ``` |
105 | /// |
106 | /// [`DataLocale`] only supports `-u` keywords, to reflect the current state of CLDR data |
107 | /// lookup and fallback. This may change in the future. |
108 | /// |
109 | /// ``` |
110 | /// use icu_locid::{locale, Locale}; |
111 | /// use icu_provider::DataLocale; |
112 | /// |
113 | /// let locale = "hi-t-en-h0-hybrid-u-attr-ca-buddhist" |
114 | /// .parse::<Locale>() |
115 | /// .unwrap(); |
116 | /// let data_locale = DataLocale::from(locale); |
117 | /// |
118 | /// assert_eq!(data_locale.into_locale(), locale!("hi-u-ca-buddhist" )); |
119 | /// ``` |
120 | #[derive (PartialEq, Clone, Default, Eq, Hash)] |
121 | pub struct DataLocale { |
122 | langid: LanguageIdentifier, |
123 | keywords: unicode_ext::Keywords, |
124 | #[cfg (feature = "experimental" )] |
125 | aux: Option<AuxiliaryKeys>, |
126 | } |
127 | |
128 | impl<'a> Default for &'a DataLocale { |
129 | fn default() -> Self { |
130 | static DEFAULT: DataLocale = DataLocale { |
131 | langid: LanguageIdentifier::UND, |
132 | keywords: unicode_ext::Keywords::new(), |
133 | #[cfg (feature = "experimental" )] |
134 | aux: None, |
135 | }; |
136 | &DEFAULT |
137 | } |
138 | } |
139 | |
140 | impl fmt::Debug for DataLocale { |
141 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
142 | write!(f, "DataLocale {{{self}}}" ) |
143 | } |
144 | } |
145 | |
146 | impl Writeable for DataLocale { |
147 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
148 | self.langid.write_to(sink)?; |
149 | if !self.keywords.is_empty() { |
150 | sink.write_str("-u-" )?; |
151 | self.keywords.write_to(sink)?; |
152 | } |
153 | #[cfg (feature = "experimental" )] |
154 | if let Some(aux) = self.aux.as_ref() { |
155 | sink.write_char(AuxiliaryKeys::separator() as char)?; |
156 | aux.write_to(sink)?; |
157 | } |
158 | Ok(()) |
159 | } |
160 | |
161 | fn writeable_length_hint(&self) -> LengthHint { |
162 | let mut length_hint = self.langid.writeable_length_hint(); |
163 | if !self.keywords.is_empty() { |
164 | length_hint += self.keywords.writeable_length_hint() + 3; |
165 | } |
166 | #[cfg (feature = "experimental" )] |
167 | if let Some(aux) = self.aux.as_ref() { |
168 | length_hint += aux.writeable_length_hint() + 1; |
169 | } |
170 | length_hint |
171 | } |
172 | |
173 | fn write_to_string(&self) -> alloc::borrow::Cow<str> { |
174 | #[cfg_attr (not(feature = "experimental" ), allow(unused_mut))] |
175 | let mut is_only_langid = self.keywords.is_empty(); |
176 | #[cfg (feature = "experimental" )] |
177 | { |
178 | is_only_langid = is_only_langid && self.aux.is_none(); |
179 | } |
180 | if is_only_langid { |
181 | return self.langid.write_to_string(); |
182 | } |
183 | let mut string = |
184 | alloc::string::String::with_capacity(self.writeable_length_hint().capacity()); |
185 | let _ = self.write_to(&mut string); |
186 | alloc::borrow::Cow::Owned(string) |
187 | } |
188 | } |
189 | |
190 | writeable::impl_display_with_writeable!(DataLocale); |
191 | |
192 | impl From<LanguageIdentifier> for DataLocale { |
193 | fn from(langid: LanguageIdentifier) -> Self { |
194 | Self { |
195 | langid, |
196 | keywords: unicode_ext::Keywords::new(), |
197 | #[cfg (feature = "experimental" )] |
198 | aux: None, |
199 | } |
200 | } |
201 | } |
202 | |
203 | impl From<Locale> for DataLocale { |
204 | fn from(locale: Locale) -> Self { |
205 | Self { |
206 | langid: locale.id, |
207 | keywords: locale.extensions.unicode.keywords, |
208 | #[cfg (feature = "experimental" )] |
209 | aux: None, |
210 | } |
211 | } |
212 | } |
213 | |
214 | impl From<&LanguageIdentifier> for DataLocale { |
215 | fn from(langid: &LanguageIdentifier) -> Self { |
216 | Self { |
217 | langid: langid.clone(), |
218 | keywords: unicode_ext::Keywords::new(), |
219 | #[cfg (feature = "experimental" )] |
220 | aux: None, |
221 | } |
222 | } |
223 | } |
224 | |
225 | impl From<&Locale> for DataLocale { |
226 | fn from(locale: &Locale) -> Self { |
227 | Self { |
228 | langid: locale.id.clone(), |
229 | keywords: locale.extensions.unicode.keywords.clone(), |
230 | #[cfg (feature = "experimental" )] |
231 | aux: None, |
232 | } |
233 | } |
234 | } |
235 | |
236 | impl FromStr for DataLocale { |
237 | type Err = DataError; |
238 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
239 | let mut aux_iter = s.splitn(2, AUXILIARY_KEY_SEPARATOR as char); |
240 | let Some(locale_str) = aux_iter.next() else { |
241 | return Err(DataErrorKind::KeyLocaleSyntax |
242 | .into_error() |
243 | .with_display_context(s)); |
244 | }; |
245 | let locale = Locale::from_str(locale_str).map_err(|e| { |
246 | DataErrorKind::KeyLocaleSyntax |
247 | .into_error() |
248 | .with_display_context(s) |
249 | .with_display_context(&e) |
250 | })?; |
251 | #[cfg_attr (not(feature = "experimental" ), allow(unused_mut))] |
252 | let mut data_locale = DataLocale::from(locale); |
253 | #[cfg (feature = "experimental" )] |
254 | if let Some(aux_str) = aux_iter.next() { |
255 | let aux = AuxiliaryKeys::from_str(aux_str)?; |
256 | data_locale.set_aux(aux); |
257 | } |
258 | if aux_iter.next().is_some() { |
259 | return Err(DataErrorKind::KeyLocaleSyntax |
260 | .into_error() |
261 | .with_display_context(s)); |
262 | } |
263 | Ok(data_locale) |
264 | } |
265 | } |
266 | |
267 | impl DataLocale { |
268 | /// Compare this [`DataLocale`] with BCP-47 bytes. |
269 | /// |
270 | /// The return value is equivalent to what would happen if you first converted this |
271 | /// [`DataLocale`] to a BCP-47 string and then performed a byte comparison. |
272 | /// |
273 | /// This function is case-sensitive and results in a *total order*, so it is appropriate for |
274 | /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`. |
275 | /// |
276 | /// # Examples |
277 | /// |
278 | /// ``` |
279 | /// use icu_locid::Locale; |
280 | /// use icu_provider::DataLocale; |
281 | /// use std::cmp::Ordering; |
282 | /// |
283 | /// let bcp47_strings: &[&str] = &[ |
284 | /// "ca" , |
285 | /// "ca+EUR" , |
286 | /// "ca-ES" , |
287 | /// "ca-ES+GBP" , |
288 | /// "ca-ES+GBP+short" , |
289 | /// "ca-ES+USD" , |
290 | /// "ca-ES-u-ca-buddhist" , |
291 | /// "ca-ES-valencia" , |
292 | /// "cat" , |
293 | /// "pl-Latn-PL" , |
294 | /// "und" , |
295 | /// "und+MXN" , |
296 | /// "und-fonipa" , |
297 | /// "und-u-ca-hebrew" , |
298 | /// "und-u-ca-japanese" , |
299 | /// "zh" , |
300 | /// ]; |
301 | /// |
302 | /// for ab in bcp47_strings.windows(2) { |
303 | /// let a = ab[0]; |
304 | /// let b = ab[1]; |
305 | /// assert_eq!(a.cmp(b), Ordering::Less, "strings: {} < {}" , a, b); |
306 | /// let a_loc: DataLocale = a.parse().unwrap(); |
307 | /// assert_eq!( |
308 | /// a_loc.strict_cmp(a.as_bytes()), |
309 | /// Ordering::Equal, |
310 | /// "strict_cmp: {} == {}" , |
311 | /// a_loc, |
312 | /// a |
313 | /// ); |
314 | /// assert_eq!( |
315 | /// a_loc.strict_cmp(b.as_bytes()), |
316 | /// Ordering::Less, |
317 | /// "strict_cmp: {} < {}" , |
318 | /// a_loc, |
319 | /// b |
320 | /// ); |
321 | /// let b_loc: DataLocale = b.parse().unwrap(); |
322 | /// assert_eq!( |
323 | /// b_loc.strict_cmp(b.as_bytes()), |
324 | /// Ordering::Equal, |
325 | /// "strict_cmp: {} == {}" , |
326 | /// b_loc, |
327 | /// b |
328 | /// ); |
329 | /// assert_eq!( |
330 | /// b_loc.strict_cmp(a.as_bytes()), |
331 | /// Ordering::Greater, |
332 | /// "strict_cmp: {} > {}" , |
333 | /// b_loc, |
334 | /// a |
335 | /// ); |
336 | /// } |
337 | /// ``` |
338 | /// |
339 | /// Comparison against invalid strings: |
340 | /// |
341 | /// ``` |
342 | /// use icu_provider::DataLocale; |
343 | /// |
344 | /// let invalid_strings: &[&str] = &[ |
345 | /// // Less than "ca-ES" |
346 | /// "CA" , |
347 | /// "ar+GBP+FOO" , |
348 | /// // Greater than "ca-ES+GBP" |
349 | /// "ca_ES" , |
350 | /// "ca-ES+GBP+FOO" , |
351 | /// ]; |
352 | /// |
353 | /// let data_locale = "ca-ES+GBP" .parse::<DataLocale>().unwrap(); |
354 | /// |
355 | /// for s in invalid_strings.iter() { |
356 | /// let expected_ordering = "ca-ES+GBP" .cmp(s); |
357 | /// let actual_ordering = data_locale.strict_cmp(s.as_bytes()); |
358 | /// assert_eq!(expected_ordering, actual_ordering, "{}" , s); |
359 | /// } |
360 | /// ``` |
361 | pub fn strict_cmp(&self, other: &[u8]) -> Ordering { |
362 | let mut aux_iter = other.splitn(2, |b| *b == AUXILIARY_KEY_SEPARATOR); |
363 | let Some(locale_str) = aux_iter.next() else { |
364 | debug_assert!(other.is_empty()); |
365 | return Ordering::Greater; |
366 | }; |
367 | let aux_str = aux_iter.next(); |
368 | let subtags = locale_str.split(|b| *b == b'-' ); |
369 | let mut subtag_result = self.langid.strict_cmp_iter(subtags); |
370 | if self.has_unicode_ext() { |
371 | let mut subtags = match subtag_result { |
372 | SubtagOrderingResult::Subtags(s) => s, |
373 | SubtagOrderingResult::Ordering(o) => return o, |
374 | }; |
375 | match subtags.next() { |
376 | Some(b"u" ) => (), |
377 | Some(s) => return s.cmp(b"u" ).reverse(), |
378 | None => return Ordering::Greater, |
379 | } |
380 | subtag_result = self.keywords.strict_cmp_iter(subtags); |
381 | } |
382 | let has_more_subtags = match subtag_result { |
383 | SubtagOrderingResult::Subtags(mut s) => s.next().is_some(), |
384 | SubtagOrderingResult::Ordering(o) => return o, |
385 | }; |
386 | // If we get here, `self` has equal or fewer subtags than the `other`. |
387 | // There are 2^3 = 8 cases to handle for auxiliary keys, expanded below. |
388 | match (has_more_subtags, self.get_aux(), aux_str) { |
389 | (false, None, None) => { |
390 | // foo == foo |
391 | Ordering::Equal |
392 | } |
393 | (false, Some(self_aux), Some(other_aux)) => { |
394 | // foo+BAR1 ?= foo+BAR2 |
395 | let aux_ordering = self_aux.as_bytes().cmp(other_aux); |
396 | if aux_ordering != Ordering::Equal { |
397 | return aux_ordering; |
398 | } |
399 | Ordering::Equal |
400 | } |
401 | (false, Some(_), None) => { |
402 | // foo+BAR > foo |
403 | Ordering::Greater |
404 | } |
405 | (_, _, _) => { |
406 | // foo < foo-bar |
407 | // foo < foo-bar+BAR |
408 | // foo < foo+BAR |
409 | // foo+BAR < foo-bar |
410 | // foo+BAR < foo-bar+BAR |
411 | Ordering::Less |
412 | } |
413 | } |
414 | } |
415 | } |
416 | |
417 | impl DataLocale { |
418 | /// Returns whether this [`DataLocale`] has all empty fields (no components). |
419 | /// |
420 | /// See also: |
421 | /// |
422 | /// - [`DataLocale::is_und()`] |
423 | /// - [`DataLocale::is_langid_und()`] |
424 | /// |
425 | /// # Examples |
426 | /// |
427 | /// ``` |
428 | /// use icu_provider::DataLocale; |
429 | /// |
430 | /// assert!("und" .parse::<DataLocale>().unwrap().is_empty()); |
431 | /// assert!(!"und-u-ca-buddhist" |
432 | /// .parse::<DataLocale>() |
433 | /// .unwrap() |
434 | /// .is_empty()); |
435 | /// assert!(!"und+auxiliary" .parse::<DataLocale>().unwrap().is_empty()); |
436 | /// assert!(!"ca-ES" .parse::<DataLocale>().unwrap().is_empty()); |
437 | /// ``` |
438 | pub fn is_empty(&self) -> bool { |
439 | self == <&DataLocale>::default() |
440 | } |
441 | |
442 | /// Returns whether this [`DataLocale`] is `und` in the locale and extensions portion. |
443 | /// |
444 | /// This ignores auxiliary keys. |
445 | /// |
446 | /// See also: |
447 | /// |
448 | /// - [`DataLocale::is_empty()`] |
449 | /// - [`DataLocale::is_langid_und()`] |
450 | /// |
451 | /// # Examples |
452 | /// |
453 | /// ``` |
454 | /// use icu_provider::DataLocale; |
455 | /// |
456 | /// assert!("und" .parse::<DataLocale>().unwrap().is_und()); |
457 | /// assert!(!"und-u-ca-buddhist" .parse::<DataLocale>().unwrap().is_und()); |
458 | /// assert!("und+auxiliary" .parse::<DataLocale>().unwrap().is_und()); |
459 | /// assert!(!"ca-ES" .parse::<DataLocale>().unwrap().is_und()); |
460 | /// ``` |
461 | pub fn is_und(&self) -> bool { |
462 | self.langid == LanguageIdentifier::UND && self.keywords.is_empty() |
463 | } |
464 | |
465 | /// Returns whether the [`LanguageIdentifier`] associated with this request is `und`. |
466 | /// |
467 | /// This ignores extension keywords and auxiliary keys. |
468 | /// |
469 | /// See also: |
470 | /// |
471 | /// - [`DataLocale::is_empty()`] |
472 | /// - [`DataLocale::is_und()`] |
473 | /// |
474 | /// # Examples |
475 | /// |
476 | /// ``` |
477 | /// use icu_provider::DataLocale; |
478 | /// |
479 | /// assert!("und" .parse::<DataLocale>().unwrap().is_langid_und()); |
480 | /// assert!("und-u-ca-buddhist" |
481 | /// .parse::<DataLocale>() |
482 | /// .unwrap() |
483 | /// .is_langid_und()); |
484 | /// assert!("und+auxiliary" |
485 | /// .parse::<DataLocale>() |
486 | /// .unwrap() |
487 | /// .is_langid_und()); |
488 | /// assert!(!"ca-ES" .parse::<DataLocale>().unwrap().is_langid_und()); |
489 | /// ``` |
490 | pub fn is_langid_und(&self) -> bool { |
491 | self.langid == LanguageIdentifier::UND |
492 | } |
493 | |
494 | /// Gets the [`LanguageIdentifier`] for this [`DataLocale`]. |
495 | /// |
496 | /// This may allocate memory if there are variant subtags. If you need only the language, |
497 | /// script, and/or region subtag, use the specific getters for those subtags: |
498 | /// |
499 | /// - [`DataLocale::language()`] |
500 | /// - [`DataLocale::script()`] |
501 | /// - [`DataLocale::region()`] |
502 | /// |
503 | /// If you have ownership over the `DataLocale`, use [`DataLocale::into_locale()`] |
504 | /// and then access the `id` field. |
505 | /// |
506 | /// # Examples |
507 | /// |
508 | /// ``` |
509 | /// use icu_locid::langid; |
510 | /// use icu_provider::prelude::*; |
511 | /// |
512 | /// const FOO_BAR: DataKey = icu_provider::data_key!("foo/bar@1" ); |
513 | /// |
514 | /// let req_no_langid = DataRequest { |
515 | /// locale: &Default::default(), |
516 | /// metadata: Default::default(), |
517 | /// }; |
518 | /// |
519 | /// let req_with_langid = DataRequest { |
520 | /// locale: &langid!("ar-EG" ).into(), |
521 | /// metadata: Default::default(), |
522 | /// }; |
523 | /// |
524 | /// assert_eq!(req_no_langid.locale.get_langid(), langid!("und" )); |
525 | /// assert_eq!(req_with_langid.locale.get_langid(), langid!("ar-EG" )); |
526 | /// ``` |
527 | pub fn get_langid(&self) -> LanguageIdentifier { |
528 | self.langid.clone() |
529 | } |
530 | |
531 | /// Overrides the entire [`LanguageIdentifier`] portion of this [`DataLocale`]. |
532 | #[inline ] |
533 | pub fn set_langid(&mut self, lid: LanguageIdentifier) { |
534 | self.langid = lid; |
535 | } |
536 | |
537 | /// Converts this [`DataLocale`] into a [`Locale`]. |
538 | /// |
539 | /// See also [`DataLocale::get_langid()`]. |
540 | /// |
541 | /// # Examples |
542 | /// |
543 | /// ``` |
544 | /// use icu_locid::{ |
545 | /// langid, locale, |
546 | /// subtags::{language, region}, |
547 | /// Locale, |
548 | /// }; |
549 | /// use icu_provider::prelude::*; |
550 | /// |
551 | /// let locale: DataLocale = locale!("it-IT-u-ca-coptic" ).into(); |
552 | /// |
553 | /// assert_eq!(locale.get_langid(), langid!("it-IT" )); |
554 | /// assert_eq!(locale.language(), language!("it" )); |
555 | /// assert_eq!(locale.script(), None); |
556 | /// assert_eq!(locale.region(), Some(region!("IT" ))); |
557 | /// |
558 | /// let locale = locale.into_locale(); |
559 | /// assert_eq!(locale, locale!("it-IT-u-ca-coptic" )); |
560 | /// ``` |
561 | pub fn into_locale(self) -> Locale { |
562 | let mut loc = Locale { |
563 | id: self.langid, |
564 | ..Default::default() |
565 | }; |
566 | loc.extensions.unicode.keywords = self.keywords; |
567 | loc |
568 | } |
569 | |
570 | /// Returns the [`Language`] for this [`DataLocale`]. |
571 | #[inline ] |
572 | pub fn language(&self) -> Language { |
573 | self.langid.language |
574 | } |
575 | |
576 | /// Returns the [`Language`] for this [`DataLocale`]. |
577 | #[inline ] |
578 | pub fn set_language(&mut self, language: Language) { |
579 | self.langid.language = language; |
580 | } |
581 | |
582 | /// Returns the [`Script`] for this [`DataLocale`]. |
583 | #[inline ] |
584 | pub fn script(&self) -> Option<Script> { |
585 | self.langid.script |
586 | } |
587 | |
588 | /// Sets the [`Script`] for this [`DataLocale`]. |
589 | #[inline ] |
590 | pub fn set_script(&mut self, script: Option<Script>) { |
591 | self.langid.script = script; |
592 | } |
593 | |
594 | /// Returns the [`Region`] for this [`DataLocale`]. |
595 | #[inline ] |
596 | pub fn region(&self) -> Option<Region> { |
597 | self.langid.region |
598 | } |
599 | |
600 | /// Sets the [`Region`] for this [`DataLocale`]. |
601 | #[inline ] |
602 | pub fn set_region(&mut self, region: Option<Region>) { |
603 | self.langid.region = region; |
604 | } |
605 | |
606 | /// Returns whether there are any [`Variant`] subtags in this [`DataLocale`]. |
607 | #[inline ] |
608 | pub fn has_variants(&self) -> bool { |
609 | !self.langid.variants.is_empty() |
610 | } |
611 | |
612 | /// Sets all [`Variants`] on this [`DataLocale`], overwriting any that were there previously. |
613 | #[inline ] |
614 | pub fn set_variants(&mut self, variants: Variants) { |
615 | self.langid.variants = variants; |
616 | } |
617 | |
618 | /// Removes all [`Variant`] subtags in this [`DataLocale`]. |
619 | #[inline ] |
620 | pub fn clear_variants(&mut self) -> Variants { |
621 | self.langid.variants.clear() |
622 | } |
623 | |
624 | /// Gets the value of the specified Unicode extension keyword for this [`DataLocale`]. |
625 | #[inline ] |
626 | pub fn get_unicode_ext(&self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { |
627 | self.keywords.get(key).cloned() |
628 | } |
629 | |
630 | /// Returns whether there are any Unicode extension keywords in this [`DataLocale`]. |
631 | #[inline ] |
632 | pub fn has_unicode_ext(&self) -> bool { |
633 | !self.keywords.is_empty() |
634 | } |
635 | |
636 | /// Returns whether a specific Unicode extension keyword is present in this [`DataLocale`]. |
637 | #[inline ] |
638 | pub fn contains_unicode_ext(&self, key: &unicode_ext::Key) -> bool { |
639 | self.keywords.contains_key(key) |
640 | } |
641 | |
642 | /// Returns whether this [`DataLocale`] contains a Unicode extension keyword |
643 | /// with the specified key and value. |
644 | /// |
645 | /// # Examples |
646 | /// |
647 | /// ``` |
648 | /// use icu_locid::{ |
649 | /// extensions::unicode::{key, value}, |
650 | /// Locale, |
651 | /// }; |
652 | /// use icu_provider::prelude::*; |
653 | /// |
654 | /// let locale: Locale = "it-IT-u-ca-coptic" .parse().expect("Valid BCP-47" ); |
655 | /// let locale: DataLocale = locale.into(); |
656 | /// |
657 | /// assert_eq!(locale.get_unicode_ext(&key!("hc" )), None); |
658 | /// assert_eq!(locale.get_unicode_ext(&key!("ca" )), Some(value!("coptic" ))); |
659 | /// assert!(locale.matches_unicode_ext(&key!("ca" ), &value!("coptic" ),)); |
660 | /// ``` |
661 | #[inline ] |
662 | pub fn matches_unicode_ext(&self, key: &unicode_ext::Key, value: &unicode_ext::Value) -> bool { |
663 | self.keywords.get(key) == Some(value) |
664 | } |
665 | |
666 | /// Sets the value for a specific Unicode extension keyword on this [`DataLocale`]. |
667 | #[inline ] |
668 | pub fn set_unicode_ext( |
669 | &mut self, |
670 | key: unicode_ext::Key, |
671 | value: unicode_ext::Value, |
672 | ) -> Option<unicode_ext::Value> { |
673 | self.keywords.set(key, value) |
674 | } |
675 | |
676 | /// Removes a specific Unicode extension keyword from this [`DataLocale`], returning |
677 | /// the value if it was present. |
678 | #[inline ] |
679 | pub fn remove_unicode_ext(&mut self, key: &unicode_ext::Key) -> Option<unicode_ext::Value> { |
680 | self.keywords.remove(key) |
681 | } |
682 | |
683 | /// Retains a subset of keywords as specified by the predicate function. |
684 | #[inline ] |
685 | pub fn retain_unicode_ext<F>(&mut self, predicate: F) |
686 | where |
687 | F: FnMut(&unicode_ext::Key) -> bool, |
688 | { |
689 | self.keywords.retain_by_key(predicate) |
690 | } |
691 | |
692 | /// Gets the auxiliary key for this [`DataLocale`]. |
693 | /// |
694 | /// For more information and examples, see [`AuxiliaryKeys`]. |
695 | #[cfg (feature = "experimental" )] |
696 | pub fn get_aux(&self) -> Option<&AuxiliaryKeys> { |
697 | self.aux.as_ref() |
698 | } |
699 | |
700 | #[cfg (not(feature = "experimental" ))] |
701 | pub(crate) fn get_aux(&self) -> Option<&str> { |
702 | None |
703 | } |
704 | |
705 | /// Returns whether this [`DataLocale`] has an auxiliary key. |
706 | /// |
707 | /// For more information and examples, see [`AuxiliaryKeys`]. |
708 | #[cfg (feature = "experimental" )] |
709 | pub fn has_aux(&self) -> bool { |
710 | self.aux.is_some() |
711 | } |
712 | |
713 | /// Sets an auxiliary key on this [`DataLocale`]. |
714 | /// |
715 | /// Returns the previous auxiliary key if present. |
716 | /// |
717 | /// For more information and examples, see [`AuxiliaryKeys`]. |
718 | #[cfg (feature = "experimental" )] |
719 | pub fn set_aux(&mut self, value: AuxiliaryKeys) -> Option<AuxiliaryKeys> { |
720 | self.aux.replace(value) |
721 | } |
722 | |
723 | /// Remove an auxiliary key, if present. Returns the removed auxiliary key. |
724 | /// |
725 | /// # Examples |
726 | /// |
727 | /// ``` |
728 | /// use icu_locid::locale; |
729 | /// use icu_provider::prelude::*; |
730 | /// use writeable::assert_writeable_eq; |
731 | /// |
732 | /// let mut data_locale: DataLocale = locale!("ar-EG").into(); |
733 | /// let aux = "GBP" |
734 | /// .parse::<AuxiliaryKeys>() |
735 | /// .expect("contains valid characters"); |
736 | /// data_locale.set_aux(aux); |
737 | /// assert_writeable_eq!(data_locale, "ar-EG+GBP"); |
738 | /// |
739 | /// let maybe_aux = data_locale.remove_aux(); |
740 | /// assert_writeable_eq!(data_locale, "ar-EG"); |
741 | /// assert_writeable_eq!(maybe_aux.unwrap(), "GBP"); |
742 | /// ``` |
743 | #[cfg (feature = "experimental" )] |
744 | pub fn remove_aux(&mut self) -> Option<AuxiliaryKeys> { |
745 | self.aux.take() |
746 | } |
747 | } |
748 | |
749 | /// The "auxiliary key" is an annotation on [`DataLocale`] that can contain an arbitrary |
750 | /// information that does not fit into the [`LanguageIdentifier`] or [`Keywords`]. |
751 | /// |
752 | /// A [`DataLocale`] can have multiple auxiliary keys, represented by this struct. The auxiliary |
753 | /// keys are separated from the BCP-47 locale and from each other with the character returned by |
754 | /// [`AuxiliaryKeys::separator()`]. |
755 | /// |
756 | /// An auxiliary key currently allows alphanumerics and `-`. |
757 | /// |
758 | /// <div class="stab unstable"> |
759 | /// 🚧 This code is experimental; it may change at any time, in breaking or non-breaking ways, |
760 | /// including in SemVer minor releases. It can be enabled with the "experimental" Cargo feature |
761 | /// of the `icu_provider` crate. Use with caution. |
762 | /// <a href="https://github.com/unicode-org/icu4x/issues/3632">#3632</a> |
763 | /// </div> |
764 | /// |
765 | /// # Examples |
766 | /// |
767 | /// ``` |
768 | /// use icu_locid::locale; |
769 | /// use icu_provider::prelude::*; |
770 | /// use writeable::assert_writeable_eq; |
771 | /// |
772 | /// let mut data_locale: DataLocale = locale!("ar-EG").into(); |
773 | /// assert_writeable_eq!(data_locale, "ar-EG"); |
774 | /// assert!(!data_locale.has_aux()); |
775 | /// assert_eq!(data_locale.get_aux(), None); |
776 | /// |
777 | /// let aux = "GBP" |
778 | /// .parse::<AuxiliaryKeys>() |
779 | /// .expect("contains valid characters"); |
780 | /// |
781 | /// data_locale.set_aux(aux); |
782 | /// assert_writeable_eq!(data_locale, "ar-EG+GBP"); |
783 | /// assert!(data_locale.has_aux()); |
784 | /// assert_eq!(data_locale.get_aux(), Some(&"GBP".parse().unwrap())); |
785 | /// ``` |
786 | /// |
787 | /// Multiple auxiliary keys are allowed: |
788 | /// |
789 | /// ``` |
790 | /// use icu_locid::locale; |
791 | /// use icu_provider::prelude::*; |
792 | /// use writeable::assert_writeable_eq; |
793 | /// |
794 | /// let data_locale = "ar-EG+GBP+long".parse::<DataLocale>().unwrap(); |
795 | /// assert_writeable_eq!(data_locale, "ar-EG+GBP+long"); |
796 | /// assert_eq!(data_locale.get_aux().unwrap().iter().count(), 2); |
797 | /// ``` |
798 | /// |
799 | /// Not all strings are valid auxiliary keys: |
800 | /// |
801 | /// ``` |
802 | /// use icu_provider::prelude::*; |
803 | /// |
804 | /// assert!("abcdefg".parse::<AuxiliaryKeys>().is_ok()); |
805 | /// assert!("ABC123".parse::<AuxiliaryKeys>().is_ok()); |
806 | /// assert!("abc-xyz".parse::<AuxiliaryKeys>().is_ok()); |
807 | /// |
808 | /// assert!("".parse::<AuxiliaryKeys>().is_err()); |
809 | /// assert!("!@#$%".parse::<AuxiliaryKeys>().is_err()); |
810 | /// assert!("abc_xyz".parse::<AuxiliaryKeys>().is_err()); |
811 | /// ``` |
812 | /// |
813 | /// [`Keywords`]: unicode_ext::Keywords |
814 | #[derive (Debug, PartialEq, Clone, Eq, Hash)] |
815 | #[cfg (feature = "experimental" )] |
816 | pub struct AuxiliaryKeys { |
817 | // DISCUSS: SmallStr? TinyStrAuto? |
818 | // DISCUSS: Make this a dynamically sized type so references can be taken? |
819 | value: AuxiliaryKeysInner, |
820 | } |
821 | |
822 | #[cfg (feature = "experimental" )] |
823 | #[derive (Clone)] |
824 | enum AuxiliaryKeysInner { |
825 | Boxed(alloc::boxed::Box<str>), |
826 | Stack(TinyAsciiStr<23>), |
827 | // NOTE: In the future, a `Static` variant could be added to allow `data_locale!("...")` |
828 | // Static(&'static str), |
829 | } |
830 | |
831 | #[cfg (feature = "experimental" )] |
832 | impl Deref for AuxiliaryKeysInner { |
833 | type Target = str; |
834 | #[inline ] |
835 | fn deref(&self) -> &Self::Target { |
836 | match self { |
837 | Self::Boxed(s) => s.deref(), |
838 | Self::Stack(s) => s.as_str(), |
839 | } |
840 | } |
841 | } |
842 | |
843 | #[cfg (feature = "experimental" )] |
844 | impl PartialEq for AuxiliaryKeysInner { |
845 | #[inline ] |
846 | fn eq(&self, other: &Self) -> bool { |
847 | self.deref() == other.deref() |
848 | } |
849 | } |
850 | |
851 | #[cfg (feature = "experimental" )] |
852 | impl Eq for AuxiliaryKeysInner {} |
853 | |
854 | #[cfg (feature = "experimental" )] |
855 | impl Debug for AuxiliaryKeysInner { |
856 | #[inline ] |
857 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
858 | self.deref().fmt(f) |
859 | } |
860 | } |
861 | |
862 | #[cfg (feature = "experimental" )] |
863 | impl Hash for AuxiliaryKeysInner { |
864 | #[inline ] |
865 | fn hash<H: core::hash::Hasher>(&self, state: &mut H) { |
866 | self.deref().hash(state) |
867 | } |
868 | } |
869 | |
870 | #[cfg (feature = "experimental" )] |
871 | writeable::impl_display_with_writeable!(AuxiliaryKeys); |
872 | |
873 | #[cfg (feature = "experimental" )] |
874 | impl Writeable for AuxiliaryKeys { |
875 | fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result { |
876 | self.value.write_to(sink) |
877 | } |
878 | fn writeable_length_hint(&self) -> LengthHint { |
879 | self.value.writeable_length_hint() |
880 | } |
881 | fn write_to_string(&self) -> alloc::borrow::Cow<str> { |
882 | self.value.write_to_string() |
883 | } |
884 | } |
885 | |
886 | #[cfg (feature = "experimental" )] |
887 | impl FromStr for AuxiliaryKeys { |
888 | type Err = DataError; |
889 | |
890 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
891 | Self::try_from_str(s) |
892 | } |
893 | } |
894 | |
895 | #[cfg (feature = "experimental" )] |
896 | impl AuxiliaryKeys { |
897 | /// Returns this [`AuxiliaryKeys`] as a single byte slice. |
898 | /// |
899 | /// NOTE: Do not make this public because we might not always store these in a single string. |
900 | /// External clients who need this can use `<Self as Writeable>::write_to_string`. |
901 | #[inline ] |
902 | pub(crate) fn as_bytes(&self) -> &[u8] { |
903 | self.value.as_bytes() |
904 | } |
905 | |
906 | /// Creates an [`AuxiliaryKeys`] from an iterator of individual keys. |
907 | /// |
908 | /// # Examples |
909 | /// |
910 | /// ``` |
911 | /// use icu_provider::prelude::*; |
912 | /// |
913 | /// // Single auxiliary key: |
914 | /// let a = AuxiliaryKeys::try_from_iter(["abc"]).unwrap(); |
915 | /// let b = "abc".parse::<AuxiliaryKeys>().unwrap(); |
916 | /// assert_eq!(a, b); |
917 | /// |
918 | /// // Multiple auxiliary keys: |
919 | /// let a = AuxiliaryKeys::try_from_iter(["abc", "defg"]).unwrap(); |
920 | /// let b = "abc+defg".parse::<AuxiliaryKeys>().unwrap(); |
921 | /// assert_eq!(a, b); |
922 | /// ``` |
923 | /// |
924 | /// Don't include the auxiliary key separator or other invalid chars in the iterator strings: |
925 | /// |
926 | /// ``` |
927 | /// use icu_provider::prelude::*; |
928 | /// |
929 | /// assert!(AuxiliaryKeys::try_from_iter(["abc+defg"]).is_err()); |
930 | /// assert!(AuxiliaryKeys::try_from_iter(["AB$C"]).is_err()); |
931 | /// ``` |
932 | pub fn try_from_iter<'a>(iter: impl IntoIterator<Item = &'a str>) -> Result<Self, DataError> { |
933 | // TODO: Avoid the allocation when possible |
934 | let mut builder = String::new(); |
935 | for item in iter { |
936 | if !item.is_empty() |
937 | && item |
938 | .bytes() |
939 | .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' )) |
940 | { |
941 | if !builder.is_empty() { |
942 | builder.push(AuxiliaryKeys::separator() as char); |
943 | } |
944 | builder.push_str(item) |
945 | } else { |
946 | return Err(DataErrorKind::KeyLocaleSyntax |
947 | .into_error() |
948 | .with_display_context(item)); |
949 | } |
950 | } |
951 | if builder.len() <= 23 { |
952 | #[allow (clippy::unwrap_used)] // we just checked that the string is ascii |
953 | Ok(Self { |
954 | value: AuxiliaryKeysInner::Stack(builder.parse().unwrap()), |
955 | }) |
956 | } else { |
957 | Ok(Self { |
958 | value: AuxiliaryKeysInner::Boxed(builder.into()), |
959 | }) |
960 | } |
961 | } |
962 | |
963 | pub(crate) fn try_from_str(s: &str) -> Result<Self, DataError> { |
964 | if !s.is_empty() |
965 | && s.bytes() |
966 | .all(|b| b.is_ascii_alphanumeric() || matches!(b, b'-' | b'+' )) |
967 | { |
968 | if s.len() <= 23 { |
969 | #[allow (clippy::unwrap_used)] // we just checked that the string is ascii |
970 | Ok(Self { |
971 | value: AuxiliaryKeysInner::Stack(s.parse().unwrap()), |
972 | }) |
973 | } else { |
974 | Ok(Self { |
975 | value: AuxiliaryKeysInner::Boxed(s.into()), |
976 | }) |
977 | } |
978 | } else { |
979 | Err(DataErrorKind::KeyLocaleSyntax |
980 | .into_error() |
981 | .with_display_context(s)) |
982 | } |
983 | } |
984 | |
985 | /// Iterates over the components of the auxiliary key. |
986 | /// |
987 | /// # Example |
988 | /// |
989 | /// ``` |
990 | /// use icu_provider::AuxiliaryKeys; |
991 | /// |
992 | /// let aux: AuxiliaryKeys = "abc+defg".parse().unwrap(); |
993 | /// assert_eq!(aux.iter().collect::<Vec<_>>(), vec!["abc", "defg"]); |
994 | /// ``` |
995 | pub fn iter(&self) -> impl Iterator<Item = &str> + '_ { |
996 | self.value.split(Self::separator() as char) |
997 | } |
998 | |
999 | /// Returns the separator byte used for auxiliary keys in data locales. |
1000 | /// |
1001 | /// # Examples |
1002 | /// |
1003 | /// ``` |
1004 | /// use icu_provider::AuxiliaryKeys; |
1005 | /// |
1006 | /// assert_eq!(AuxiliaryKeys::separator(), b'+'); |
1007 | /// ``` |
1008 | #[inline ] |
1009 | pub const fn separator() -> u8 { |
1010 | AUXILIARY_KEY_SEPARATOR |
1011 | } |
1012 | } |
1013 | |
1014 | #[test ] |
1015 | fn test_data_locale_to_string() { |
1016 | use icu_locid::locale; |
1017 | |
1018 | struct TestCase { |
1019 | pub locale: Locale, |
1020 | pub aux: Option<&'static str>, |
1021 | pub expected: &'static str, |
1022 | } |
1023 | |
1024 | for cas in [ |
1025 | TestCase { |
1026 | locale: Locale::UND, |
1027 | aux: None, |
1028 | expected: "und" , |
1029 | }, |
1030 | TestCase { |
1031 | locale: locale!("und-u-cu-gbp" ), |
1032 | aux: None, |
1033 | expected: "und-u-cu-gbp" , |
1034 | }, |
1035 | TestCase { |
1036 | locale: locale!("en-ZA-u-cu-gbp" ), |
1037 | aux: None, |
1038 | expected: "en-ZA-u-cu-gbp" , |
1039 | }, |
1040 | #[cfg (feature = "experimental" )] |
1041 | TestCase { |
1042 | locale: locale!("en-ZA-u-nu-arab" ), |
1043 | aux: Some("GBP" ), |
1044 | expected: "en-ZA-u-nu-arab+GBP" , |
1045 | }, |
1046 | ] { |
1047 | let mut data_locale = DataLocale::from(cas.locale); |
1048 | #[cfg (feature = "experimental" )] |
1049 | if let Some(aux) = cas.aux { |
1050 | data_locale.set_aux(aux.parse().unwrap()); |
1051 | } |
1052 | writeable::assert_writeable_eq!(data_locale, cas.expected); |
1053 | } |
1054 | } |
1055 | |
1056 | #[test ] |
1057 | fn test_data_locale_from_string() { |
1058 | #[derive (Debug)] |
1059 | struct TestCase { |
1060 | pub input: &'static str, |
1061 | pub success: bool, |
1062 | } |
1063 | |
1064 | for cas in [ |
1065 | TestCase { |
1066 | input: "und" , |
1067 | success: true, |
1068 | }, |
1069 | TestCase { |
1070 | input: "und-u-cu-gbp" , |
1071 | success: true, |
1072 | }, |
1073 | TestCase { |
1074 | input: "en-ZA-u-cu-gbp" , |
1075 | success: true, |
1076 | }, |
1077 | TestCase { |
1078 | input: "en..." , |
1079 | success: false, |
1080 | }, |
1081 | #[cfg (feature = "experimental" )] |
1082 | TestCase { |
1083 | input: "en-ZA-u-nu-arab+GBP" , |
1084 | success: true, |
1085 | }, |
1086 | #[cfg (not(feature = "experimental" ))] |
1087 | TestCase { |
1088 | input: "en-ZA-u-nu-arab+GBP" , |
1089 | success: false, |
1090 | }, |
1091 | ] { |
1092 | let data_locale = match (DataLocale::from_str(cas.input), cas.success) { |
1093 | (Ok(l), true) => l, |
1094 | (Err(_), false) => { |
1095 | continue; |
1096 | } |
1097 | (Ok(_), false) => { |
1098 | panic!("DataLocale parsed but it was supposed to fail: {cas:?}" ); |
1099 | } |
1100 | (Err(_), true) => { |
1101 | panic!("DataLocale was supposed to parse but it failed: {cas:?}" ); |
1102 | } |
1103 | }; |
1104 | writeable::assert_writeable_eq!(data_locale, cas.input); |
1105 | } |
1106 | } |
1107 | |