1 | // This file is part of ICU4X. For terms of use, please see the file |
---|---|
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! The collection of code for locale canonicalization. |
6 | |
7 | use crate::provider::*; |
8 | use crate::LocaleTransformError; |
9 | use alloc::vec::Vec; |
10 | use core::cmp::Ordering; |
11 | |
12 | use crate::LocaleExpander; |
13 | use crate::TransformResult; |
14 | use icu_locid::extensions::Extensions; |
15 | use icu_locid::subtags::{Language, Region, Script}; |
16 | use icu_locid::{ |
17 | extensions::unicode::key, |
18 | subtags::{language, Variant, Variants}, |
19 | LanguageIdentifier, Locale, |
20 | }; |
21 | use icu_provider::prelude::*; |
22 | use tinystr::TinyAsciiStr; |
23 | |
24 | /// Implements the algorithm defined in *[UTS #35: Annex C, LocaleId Canonicalization]*. |
25 | /// |
26 | /// # Examples |
27 | /// |
28 | /// ``` |
29 | /// use icu::locid::Locale; |
30 | /// use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; |
31 | /// |
32 | /// let lc = LocaleCanonicalizer::new(); |
33 | /// |
34 | /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); |
35 | /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); |
36 | /// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap()); |
37 | /// ``` |
38 | /// |
39 | /// [UTS #35: Annex C, LocaleId Canonicalization]: http://unicode.org/reports/tr35/#LocaleId_Canonicalization |
40 | #[derive(Debug)] |
41 | pub struct LocaleCanonicalizer { |
42 | /// Data to support canonicalization. |
43 | aliases: DataPayload<AliasesV2Marker>, |
44 | /// Likely subtags implementation for delegation. |
45 | expander: LocaleExpander, |
46 | } |
47 | |
48 | fn uts35_rule_matches<'a, I>( |
49 | source: &LanguageIdentifier, |
50 | language: Language, |
51 | script: Option<Script>, |
52 | region: Option<Region>, |
53 | raw_variants: I, |
54 | ) -> bool |
55 | where |
56 | I: Iterator<Item = &'a str>, |
57 | { |
58 | (language.is_empty() || language == source.language) |
59 | && (script.is_none() || script == source.script) |
60 | && (region.is_none() || region == source.region) |
61 | && { |
62 | // Checks if variants are a subset of source variants. |
63 | // As both iterators are sorted, this can be done linearly. |
64 | let mut source_variants = source.variants.iter(); |
65 | 'outer: for raw_variant in raw_variants { |
66 | for source_variant in source_variants.by_ref() { |
67 | match source_variant.strict_cmp(raw_variant.as_bytes()) { |
68 | Ordering::Equal => { |
69 | // The source_variant is equal, move to next raw_variant |
70 | continue 'outer; |
71 | } |
72 | Ordering::Less => { |
73 | // The source_variant is smaller, take the next source_variant |
74 | } |
75 | Ordering::Greater => { |
76 | // The source_variant is greater, |
77 | // raw_variants is not a subset of source_variants |
78 | return false; |
79 | } |
80 | } |
81 | } |
82 | // There are raw_variants left after we exhausted source_variants |
83 | return false; |
84 | } |
85 | true |
86 | } |
87 | } |
88 | |
89 | fn uts35_replacement<'a, I>( |
90 | source: &mut LanguageIdentifier, |
91 | ruletype_has_language: bool, |
92 | ruletype_has_script: bool, |
93 | ruletype_has_region: bool, |
94 | ruletype_variants: Option<I>, |
95 | replacement: &LanguageIdentifier, |
96 | ) where |
97 | I: Iterator<Item = &'a str>, |
98 | { |
99 | if ruletype_has_language || (source.language.is_empty() && !replacement.language.is_empty()) { |
100 | source.language = replacement.language; |
101 | } |
102 | if ruletype_has_script || (source.script.is_none() && replacement.script.is_some()) { |
103 | source.script = replacement.script; |
104 | } |
105 | if ruletype_has_region || (source.region.is_none() && replacement.region.is_some()) { |
106 | source.region = replacement.region; |
107 | } |
108 | if let Some(skips) = ruletype_variants { |
109 | // The rule matches if the ruletype variants are a subset of the source variants. |
110 | // This means ja-Latn-fonipa-hepburn-heploc matches against the rule for |
111 | // hepburn-heploc and is canonicalized to ja-Latn-alalc97-fonipa |
112 | |
113 | // We're merging three sorted deduped iterators into a new sequence: |
114 | // sources - skips + replacements |
115 | |
116 | let mut sources = source.variants.iter().peekable(); |
117 | let mut replacements = replacement.variants.iter().peekable(); |
118 | let mut skips = skips.peekable(); |
119 | |
120 | let mut variants: Vec<Variant> = Vec::new(); |
121 | |
122 | loop { |
123 | match (sources.peek(), skips.peek(), replacements.peek()) { |
124 | (Some(&source), Some(skip), _) |
125 | if source.strict_cmp(skip.as_bytes()) == Ordering::Greater => |
126 | { |
127 | skips.next(); |
128 | } |
129 | (Some(&source), Some(skip), _) |
130 | if source.strict_cmp(skip.as_bytes()) == Ordering::Equal => |
131 | { |
132 | skips.next(); |
133 | sources.next(); |
134 | } |
135 | (Some(&source), _, Some(&replacement)) |
136 | if replacement.cmp(source) == Ordering::Less => |
137 | { |
138 | variants.push(*replacement); |
139 | replacements.next(); |
140 | } |
141 | (Some(&source), _, Some(&replacement)) |
142 | if replacement.cmp(source) == Ordering::Equal => |
143 | { |
144 | variants.push(*source); |
145 | sources.next(); |
146 | replacements.next(); |
147 | } |
148 | (Some(&source), _, _) => { |
149 | variants.push(*source); |
150 | sources.next(); |
151 | } |
152 | (None, _, Some(&replacement)) => { |
153 | variants.push(*replacement); |
154 | replacements.next(); |
155 | } |
156 | (None, _, None) => { |
157 | break; |
158 | } |
159 | } |
160 | } |
161 | source.variants = Variants::from_vec_unchecked(variants); |
162 | } |
163 | } |
164 | |
165 | #[inline] |
166 | fn uts35_check_language_rules( |
167 | langid: &mut LanguageIdentifier, |
168 | alias_data: &DataPayload<AliasesV2Marker>, |
169 | ) -> TransformResult { |
170 | if !langid.language.is_empty() { |
171 | let lang: TinyAsciiStr<3> = langid.language.into(); |
172 | let replacement = if lang.len() == 2 { |
173 | alias_data |
174 | .get() |
175 | .language_len2 |
176 | .get(&lang.resize().to_unvalidated()) |
177 | } else { |
178 | alias_data.get().language_len3.get(&lang.to_unvalidated()) |
179 | }; |
180 | |
181 | if let Some(replacement) = replacement { |
182 | if let Ok(new_langid) = replacement.parse() { |
183 | uts35_replacement::<core::iter::Empty<&str>>( |
184 | langid, |
185 | true, |
186 | false, |
187 | false, |
188 | None, |
189 | &new_langid, |
190 | ); |
191 | return TransformResult::Modified; |
192 | } |
193 | } |
194 | } |
195 | |
196 | TransformResult::Unmodified |
197 | } |
198 | |
199 | #[cfg(feature = "compiled_data")] |
200 | impl Default for LocaleCanonicalizer { |
201 | fn default() -> Self { |
202 | Self::new() |
203 | } |
204 | } |
205 | |
206 | impl LocaleCanonicalizer { |
207 | /// A constructor which creates a [`LocaleCanonicalizer`] from compiled data. |
208 | /// |
209 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
210 | /// |
211 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
212 | #[cfg(feature = "compiled_data")] |
213 | pub const fn new() -> Self { |
214 | Self::new_with_expander(LocaleExpander::new_extended()) |
215 | } |
216 | |
217 | // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed |
218 | #[doc= icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new)] |
219 | pub fn try_new_with_any_provider( |
220 | provider: &(impl AnyProvider + ?Sized), |
221 | ) -> Result<Self, LocaleTransformError> { |
222 | let expander = LocaleExpander::try_new_with_any_provider(provider)?; |
223 | Self::try_new_with_expander_compat(&provider.as_downcasting(), expander) |
224 | } |
225 | |
226 | // Note: This is a custom impl because the bounds on LocaleExpander::try_new_unstable changed |
227 | #[doc= icu_provider::gen_any_buffer_unstable_docs!(BUFFER, Self::new)] |
228 | #[cfg(feature = "serde")] |
229 | pub fn try_new_with_buffer_provider( |
230 | provider: &(impl BufferProvider + ?Sized), |
231 | ) -> Result<Self, LocaleTransformError> { |
232 | let expander = LocaleExpander::try_new_with_buffer_provider(provider)?; |
233 | Self::try_new_with_expander_compat(&provider.as_deserializing(), expander) |
234 | } |
235 | |
236 | #[doc= icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
237 | pub fn try_new_unstable<P>(provider: &P) -> Result<Self, LocaleTransformError> |
238 | where |
239 | P: DataProvider<AliasesV2Marker> |
240 | + DataProvider<LikelySubtagsForLanguageV1Marker> |
241 | + DataProvider<LikelySubtagsForScriptRegionV1Marker> |
242 | + ?Sized, |
243 | { |
244 | let expander = LocaleExpander::try_new_unstable(provider)?; |
245 | Self::try_new_with_expander_unstable(provider, expander) |
246 | } |
247 | |
248 | /// Creates a [`LocaleCanonicalizer`] with a custom [`LocaleExpander`] and compiled data. |
249 | /// |
250 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
251 | /// |
252 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
253 | #[cfg(feature = "compiled_data")] |
254 | pub const fn new_with_expander(expander: LocaleExpander) -> Self { |
255 | Self { |
256 | aliases: DataPayload::from_static_ref( |
257 | crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_ALIASES_V2, |
258 | ), |
259 | expander, |
260 | } |
261 | } |
262 | |
263 | fn try_new_with_expander_compat<P>( |
264 | provider: &P, |
265 | expander: LocaleExpander, |
266 | ) -> Result<Self, LocaleTransformError> |
267 | where |
268 | P: DataProvider<AliasesV2Marker> + DataProvider<AliasesV1Marker> + ?Sized, |
269 | { |
270 | let payload_v2: Result<DataPayload<AliasesV2Marker>, _> = provider |
271 | .load(Default::default()) |
272 | .and_then(DataResponse::take_payload); |
273 | let aliases = if let Ok(payload) = payload_v2 { |
274 | payload |
275 | } else { |
276 | let payload_v1: DataPayload<AliasesV1Marker> = provider |
277 | .load(Default::default()) |
278 | .and_then(DataResponse::take_payload)?; |
279 | payload_v1.try_map_project(|st, _| st.try_into())? |
280 | }; |
281 | |
282 | Ok(Self { aliases, expander }) |
283 | } |
284 | |
285 | #[doc= icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)] |
286 | pub fn try_new_with_expander_unstable<P>( |
287 | provider: &P, |
288 | expander: LocaleExpander, |
289 | ) -> Result<Self, LocaleTransformError> |
290 | where |
291 | P: DataProvider<AliasesV2Marker> + ?Sized, |
292 | { |
293 | let aliases: DataPayload<AliasesV2Marker> = |
294 | provider.load(Default::default())?.take_payload()?; |
295 | |
296 | Ok(Self { aliases, expander }) |
297 | } |
298 | |
299 | #[doc= icu_provider::gen_any_buffer_unstable_docs!(ANY, Self::new_with_expander)] |
300 | pub fn try_new_with_expander_with_any_provider( |
301 | provider: &(impl AnyProvider + ?Sized), |
302 | options: LocaleExpander, |
303 | ) -> Result<Self, LocaleTransformError> { |
304 | Self::try_new_with_expander_compat(&provider.as_downcasting(), options) |
305 | } |
306 | |
307 | #[cfg(feature = "serde")] |
308 | #[doc= icu_provider::gen_any_buffer_unstable_docs!(BUFFER,Self::new_with_expander)] |
309 | pub fn try_new_with_expander_with_buffer_provider( |
310 | provider: &(impl BufferProvider + ?Sized), |
311 | options: LocaleExpander, |
312 | ) -> Result<Self, LocaleTransformError> { |
313 | Self::try_new_with_expander_compat(&provider.as_deserializing(), options) |
314 | } |
315 | |
316 | /// The canonicalize method potentially updates a passed in locale in place |
317 | /// depending up the results of running the canonicalization algorithm |
318 | /// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. |
319 | /// |
320 | /// Some BCP47 canonicalization data is not part of the CLDR json package. Because |
321 | /// of this, some canonicalizations are not performed, e.g. the canonicalization of |
322 | /// `und-u-ca-islamicc` to `und-u-ca-islamic-civil`. This will be fixed in a future |
323 | /// release once the missing data has been added to the CLDR json data. See: |
324 | /// <https://github.com/unicode-org/icu4x/issues/746> |
325 | /// |
326 | /// # Examples |
327 | /// |
328 | /// ``` |
329 | /// use icu::locid::Locale; |
330 | /// use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; |
331 | /// |
332 | /// let lc = LocaleCanonicalizer::new(); |
333 | /// |
334 | /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); |
335 | /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); |
336 | /// assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse().unwrap()); |
337 | /// ``` |
338 | pub fn canonicalize(&self, locale: &mut Locale) -> TransformResult { |
339 | let mut result = TransformResult::Unmodified; |
340 | |
341 | // This loops until we get a 'fixed point', where applying the rules do not |
342 | // result in any more changes. |
343 | loop { |
344 | // These are linear searches due to the ordering imposed by the canonicalization |
345 | // rules, where rules with more variants should be considered first. With the |
346 | // current data in CLDR, we will only do this for locales which have variants, |
347 | // or new rules which we haven't special-cased yet (of which there are fewer |
348 | // than 20). |
349 | let modified = if locale.id.variants.is_empty() { |
350 | self.canonicalize_absolute_language_fallbacks(&mut locale.id) |
351 | } else { |
352 | self.canonicalize_language_variant_fallbacks(&mut locale.id) |
353 | }; |
354 | if modified { |
355 | result = TransformResult::Modified; |
356 | continue; |
357 | } |
358 | |
359 | if !locale.id.language.is_empty() { |
360 | // If the region is specified, check sgn-region rules first |
361 | if let Some(region) = locale.id.region { |
362 | if locale.id.language == language!("sgn") { |
363 | if let Some(&sgn_lang) = self |
364 | .aliases |
365 | .get() |
366 | .sgn_region |
367 | .get(®ion.into_tinystr().to_unvalidated()) |
368 | { |
369 | uts35_replacement::<core::iter::Empty<&str>>( |
370 | &mut locale.id, |
371 | true, |
372 | false, |
373 | true, |
374 | None, |
375 | &sgn_lang.into(), |
376 | ); |
377 | result = TransformResult::Modified; |
378 | continue; |
379 | } |
380 | } |
381 | } |
382 | |
383 | if uts35_check_language_rules(&mut locale.id, &self.aliases) |
384 | == TransformResult::Modified |
385 | { |
386 | result = TransformResult::Modified; |
387 | continue; |
388 | } |
389 | } |
390 | |
391 | if let Some(script) = locale.id.script { |
392 | if let Some(&replacement) = self |
393 | .aliases |
394 | .get() |
395 | .script |
396 | .get(&script.into_tinystr().to_unvalidated()) |
397 | { |
398 | locale.id.script = Some(replacement); |
399 | result = TransformResult::Modified; |
400 | continue; |
401 | } |
402 | } |
403 | |
404 | if let Some(region) = locale.id.region { |
405 | let replacement = if region.is_alphabetic() { |
406 | self.aliases |
407 | .get() |
408 | .region_alpha |
409 | .get(®ion.into_tinystr().resize().to_unvalidated()) |
410 | } else { |
411 | self.aliases |
412 | .get() |
413 | .region_num |
414 | .get(®ion.into_tinystr().to_unvalidated()) |
415 | }; |
416 | if let Some(&replacement) = replacement { |
417 | locale.id.region = Some(replacement); |
418 | result = TransformResult::Modified; |
419 | continue; |
420 | } |
421 | |
422 | if let Some(regions) = self |
423 | .aliases |
424 | .get() |
425 | .complex_region |
426 | .get(®ion.into_tinystr().to_unvalidated()) |
427 | { |
428 | // Skip if regions are empty |
429 | if let Some(default_region) = regions.get(0) { |
430 | let mut maximized = LanguageIdentifier { |
431 | language: locale.id.language, |
432 | script: locale.id.script, |
433 | region: None, |
434 | variants: Variants::default(), |
435 | }; |
436 | |
437 | locale.id.region = Some( |
438 | match (self.expander.maximize(&mut maximized), maximized.region) { |
439 | (TransformResult::Modified, Some(candidate)) |
440 | if regions.iter().any(|x| x == candidate) => |
441 | { |
442 | candidate |
443 | } |
444 | _ => default_region, |
445 | }, |
446 | ); |
447 | result = TransformResult::Modified; |
448 | continue; |
449 | } |
450 | } |
451 | } |
452 | |
453 | if !locale.id.variants.is_empty() { |
454 | let mut modified = Vec::with_capacity(0); |
455 | for (idx, &variant) in locale.id.variants.iter().enumerate() { |
456 | if let Some(&updated) = self |
457 | .aliases |
458 | .get() |
459 | .variant |
460 | .get(&variant.into_tinystr().to_unvalidated()) |
461 | { |
462 | if modified.is_empty() { |
463 | modified = locale.id.variants.to_vec(); |
464 | } |
465 | #[allow(clippy::indexing_slicing)] |
466 | let _ = core::mem::replace(&mut modified[idx], updated); |
467 | } |
468 | } |
469 | |
470 | if !modified.is_empty() { |
471 | modified.sort(); |
472 | modified.dedup(); |
473 | locale.id.variants = Variants::from_vec_unchecked(modified); |
474 | result = TransformResult::Modified; |
475 | continue; |
476 | } |
477 | } |
478 | |
479 | // Nothing matched in this iteration, we're done. |
480 | break; |
481 | } |
482 | |
483 | if !locale.extensions.transform.is_empty() || !locale.extensions.unicode.is_empty() { |
484 | self.canonicalize_extensions(&mut locale.extensions, &mut result); |
485 | } |
486 | result |
487 | } |
488 | |
489 | fn canonicalize_extensions(&self, extensions: &mut Extensions, result: &mut TransformResult) { |
490 | // Handle Locale extensions in their own loops, because these rules do not interact |
491 | // with each other. |
492 | if let Some(ref mut lang) = extensions.transform.lang { |
493 | while uts35_check_language_rules(lang, &self.aliases) == TransformResult::Modified { |
494 | *result = TransformResult::Modified; |
495 | } |
496 | } |
497 | |
498 | if !extensions.unicode.keywords.is_empty() { |
499 | for key in [key!("rg"), key!( "sd")] { |
500 | if let Some(value) = extensions.unicode.keywords.get_mut(&key) { |
501 | if let &[only_value] = value.as_tinystr_slice() { |
502 | if let Some(modified_value) = self |
503 | .aliases |
504 | .get() |
505 | .subdivision |
506 | .get(&only_value.resize().to_unvalidated()) |
507 | { |
508 | if let Ok(modified_value) = modified_value.parse() { |
509 | *value = modified_value; |
510 | *result = TransformResult::Modified; |
511 | } |
512 | } |
513 | } |
514 | } |
515 | } |
516 | } |
517 | } |
518 | |
519 | fn canonicalize_language_variant_fallbacks(&self, lid: &mut LanguageIdentifier) -> bool { |
520 | // These language/variant comibnations have around 20 rules |
521 | for LanguageStrStrPair(lang, raw_variants, raw_to) in self |
522 | .aliases |
523 | .get() |
524 | .language_variants |
525 | .iter() |
526 | .map(zerofrom::ZeroFrom::zero_from) |
527 | { |
528 | let raw_variants = raw_variants.split('-'); |
529 | // if is_iter_sorted(raw_variants.clone()) { // can we sort at construction? |
530 | if uts35_rule_matches(lid, lang, None, None, raw_variants.clone()) { |
531 | if let Ok(to) = raw_to.parse() { |
532 | uts35_replacement(lid, !lang.is_empty(), false, false, Some(raw_variants), &to); |
533 | return true; |
534 | } |
535 | } |
536 | } |
537 | false |
538 | } |
539 | |
540 | fn canonicalize_absolute_language_fallbacks(&self, lid: &mut LanguageIdentifier) -> bool { |
541 | for StrStrPair(raw_from, raw_to) in self |
542 | .aliases |
543 | .get() |
544 | .language |
545 | .iter() |
546 | .map(zerofrom::ZeroFrom::zero_from) |
547 | { |
548 | if let Ok(from) = raw_from.parse::<LanguageIdentifier>() { |
549 | if uts35_rule_matches( |
550 | lid, |
551 | from.language, |
552 | from.script, |
553 | from.region, |
554 | from.variants.iter().map(Variant::as_str), |
555 | ) { |
556 | if let Ok(to) = raw_to.parse() { |
557 | uts35_replacement( |
558 | lid, |
559 | !from.language.is_empty(), |
560 | from.script.is_some(), |
561 | from.region.is_some(), |
562 | Some(from.variants.iter().map(Variant::as_str)), |
563 | &to, |
564 | ); |
565 | return true; |
566 | } |
567 | } |
568 | } |
569 | } |
570 | false |
571 | } |
572 | } |
573 | |
574 | #[cfg(test)] |
575 | mod test { |
576 | use super::*; |
577 | |
578 | #[test] |
579 | fn test_uts35_rule_matches() { |
580 | for (source, rule, result) in [ |
581 | ("ja", "und", true), |
582 | ("und-heploc-hepburn", "und-hepburn", true), |
583 | ("ja-heploc-hepburn", "und-hepburn", true), |
584 | ("ja-hepburn", "und-hepburn-heploc", false), |
585 | ] { |
586 | let source = source.parse().unwrap(); |
587 | let rule = rule.parse::<LanguageIdentifier>().unwrap(); |
588 | assert_eq!( |
589 | uts35_rule_matches( |
590 | &source, |
591 | rule.language, |
592 | rule.script, |
593 | rule.region, |
594 | rule.variants.iter().map(Variant::as_str), |
595 | ), |
596 | result, |
597 | "{}", |
598 | source |
599 | ); |
600 | } |
601 | } |
602 | |
603 | #[test] |
604 | fn test_uts35_replacement() { |
605 | for (locale, rule_0, rule_1, result) in [ |
606 | ( |
607 | "ja-Latn-fonipa-hepburn-heploc", |
608 | "und-hepburn-heploc", |
609 | "und-alalc97", |
610 | "ja-Latn-alalc97-fonipa", |
611 | ), |
612 | ("sgn-DD", "und-DD", "und-DE", "sgn-DE"), |
613 | ("sgn-DE", "sgn-DE", "gsg", "gsg"), |
614 | ] { |
615 | let mut locale: Locale = locale.parse().unwrap(); |
616 | let rule_0 = rule_0.parse::<LanguageIdentifier>().unwrap(); |
617 | let rule_1 = rule_1.parse().unwrap(); |
618 | let result = result.parse::<Locale>().unwrap(); |
619 | uts35_replacement( |
620 | &mut locale.id, |
621 | !rule_0.language.is_empty(), |
622 | rule_0.script.is_some(), |
623 | rule_0.region.is_some(), |
624 | Some(rule_0.variants.iter().map(Variant::as_str)), |
625 | &rule_1, |
626 | ); |
627 | assert_eq!(result, locale); |
628 | } |
629 | } |
630 | } |
631 | |
632 | #[cfg(feature = "serde")] |
633 | #[cfg(test)] |
634 | mod tests { |
635 | use super::*; |
636 | use icu_locid::locale; |
637 | |
638 | struct RejectByKeyProvider { |
639 | keys: Vec<DataKey>, |
640 | } |
641 | |
642 | impl AnyProvider for RejectByKeyProvider { |
643 | fn load_any(&self, key: DataKey, _: DataRequest) -> Result<AnyResponse, DataError> { |
644 | use alloc::borrow::Cow; |
645 | |
646 | println!("{:#?}", key); |
647 | if self.keys.contains(&key) { |
648 | return Err(DataErrorKind::MissingDataKey.with_str_context("rejected")); |
649 | } |
650 | |
651 | let aliases_v2 = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_ALIASES_V2; |
652 | let l = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_L_V1; |
653 | let ext = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_EXT_V1; |
654 | let sr = crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_LIKELYSUBTAGS_SR_V1; |
655 | |
656 | let payload = if key.hashed() == AliasesV1Marker::KEY.hashed() { |
657 | let aliases_v1 = AliasesV1 { |
658 | language_variants: zerovec::VarZeroVec::from(&[StrStrPair( |
659 | Cow::Borrowed("aa-saaho"), |
660 | Cow::Borrowed("ssy"), |
661 | )]), |
662 | ..Default::default() |
663 | }; |
664 | DataPayload::<AliasesV1Marker>::from_owned(aliases_v1).wrap_into_any_payload() |
665 | } else if key.hashed() == AliasesV2Marker::KEY.hashed() { |
666 | DataPayload::<AliasesV2Marker>::from_static_ref(aliases_v2).wrap_into_any_payload() |
667 | } else if key.hashed() == LikelySubtagsForLanguageV1Marker::KEY.hashed() { |
668 | DataPayload::<LikelySubtagsForLanguageV1Marker>::from_static_ref(l) |
669 | .wrap_into_any_payload() |
670 | } else if key.hashed() == LikelySubtagsExtendedV1Marker::KEY.hashed() { |
671 | DataPayload::<LikelySubtagsExtendedV1Marker>::from_static_ref(ext) |
672 | .wrap_into_any_payload() |
673 | } else if key.hashed() == LikelySubtagsForScriptRegionV1Marker::KEY.hashed() { |
674 | DataPayload::<LikelySubtagsForScriptRegionV1Marker>::from_static_ref(sr) |
675 | .wrap_into_any_payload() |
676 | } else { |
677 | return Err(DataErrorKind::MissingDataKey.into_error()); |
678 | }; |
679 | |
680 | Ok(AnyResponse { |
681 | payload: Some(payload), |
682 | metadata: Default::default(), |
683 | }) |
684 | } |
685 | } |
686 | |
687 | #[test] |
688 | fn test_old_keys() { |
689 | let provider = RejectByKeyProvider { |
690 | keys: vec![AliasesV2Marker::KEY], |
691 | }; |
692 | let lc = LocaleCanonicalizer::try_new_with_any_provider(&provider) |
693 | .expect("should create with old keys"); |
694 | let mut locale = locale!("aa-saaho"); |
695 | assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); |
696 | assert_eq!(locale, locale!("ssy")); |
697 | } |
698 | |
699 | #[test] |
700 | fn test_new_keys() { |
701 | let provider = RejectByKeyProvider { |
702 | keys: vec![AliasesV1Marker::KEY], |
703 | }; |
704 | let lc = LocaleCanonicalizer::try_new_with_any_provider(&provider) |
705 | .expect("should create with old keys"); |
706 | let mut locale = locale!("aa-saaho"); |
707 | assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); |
708 | assert_eq!(locale, locale!("ssy")); |
709 | } |
710 | |
711 | #[test] |
712 | fn test_no_keys() { |
713 | let provider = RejectByKeyProvider { |
714 | keys: vec![AliasesV1Marker::KEY, AliasesV2Marker::KEY], |
715 | }; |
716 | if LocaleCanonicalizer::try_new_with_any_provider(&provider).is_ok() { |
717 | panic!("should not create: no data present") |
718 | }; |
719 | } |
720 | } |
721 |
Definitions
- LocaleCanonicalizer
- aliases
- expander
- uts35_rule_matches
- uts35_replacement
- uts35_check_language_rules
- default
- new
- try_new_with_any_provider
- try_new_unstable
- new_with_expander
- try_new_with_expander_compat
- try_new_with_expander_unstable
- try_new_with_expander_with_any_provider
- canonicalize
- canonicalize_extensions
- canonicalize_language_variant_fallbacks
Learn Rust with the experts
Find out more