1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | use super::*; |
6 | use icu_locid::subtags::{Language, Region, Script, Variant}; |
7 | use icu_provider::prelude::*; |
8 | use tinystr::UnvalidatedTinyAsciiStr; |
9 | use zerovec::{VarZeroVec, ZeroMap, ZeroSlice}; |
10 | |
11 | #[icu_provider::data_struct (marker(AliasesV1Marker, "locid_transform/aliases@1" , singleton))] |
12 | #[derive (PartialEq, Clone, Default)] |
13 | #[cfg_attr ( |
14 | feature = "datagen" , |
15 | derive(serde::Serialize, databake::Bake), |
16 | databake(path = icu_locid_transform::provider), |
17 | )] |
18 | #[cfg_attr (feature = "serde" , derive(serde::Deserialize))] |
19 | #[yoke(prove_covariance_manually)] |
20 | /// This alias data is used for locale canonicalization. Each field defines a |
21 | /// mapping from an old identifier to a new identifier, based upon the rules in |
22 | /// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data |
23 | /// is stored in sorted order, allowing for binary search to identify rules to |
24 | /// apply. It is broken down into smaller vectors based upon some characteristic |
25 | /// of the data, to help avoid unnecessary searches. For example, the `sgn_region` |
26 | /// field contains aliases for sign language and region, so that it is not |
27 | /// necessary to search the data unless the input is a sign language. |
28 | /// |
29 | /// The algorithm in tr35 is not guaranteed to terminate on data other than what |
30 | /// is currently in CLDR. For this reason, it is not a good idea to attempt to add |
31 | /// or modify aliases for use in this structure. |
32 | /// |
33 | /// <div class="stab unstable"> |
34 | /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, |
35 | /// including in SemVer minor releases. While the serde representation of data structs is guaranteed |
36 | /// to be stable, their Rust representation might not be. Use with caution. |
37 | /// </div> |
38 | // TODO: Use validated types as value types |
39 | #[derive (Debug)] |
40 | pub struct AliasesV1<'data> { |
41 | /// `[language(-variant)+\] -> [langid]` |
42 | /// This is not a map as it's searched linearly according to the canonicalization rules. |
43 | #[cfg_attr (feature = "serde" , serde(borrow))] |
44 | pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, |
45 | /// `sgn-[region] -> [language]` |
46 | #[cfg_attr (feature = "serde" , serde(borrow))] |
47 | pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>, |
48 | /// `[language{2}] -> [langid]` |
49 | #[cfg_attr (feature = "serde" , serde(borrow))] |
50 | pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>, |
51 | /// `[language{3}] -> [langid]` |
52 | #[cfg_attr (feature = "serde" , serde(borrow))] |
53 | pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>, |
54 | /// `[langid] -> [langid]` |
55 | /// This is not a map as it's searched linearly according to the canonicalization rules. |
56 | #[cfg_attr (feature = "serde" , serde(borrow))] |
57 | pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>, |
58 | |
59 | /// `[script] -> [script]` |
60 | #[cfg_attr (feature = "serde" , serde(borrow))] |
61 | pub script: ZeroMap<'data, UnvalidatedScript, Script>, |
62 | |
63 | /// `[region{2}] -> [region]` |
64 | #[cfg_attr (feature = "serde" , serde(borrow))] |
65 | pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>, |
66 | /// `[region{3}] -> [region]` |
67 | #[cfg_attr (feature = "serde" , serde(borrow))] |
68 | pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>, |
69 | |
70 | /// `[region] -> [region]+` |
71 | #[cfg_attr (feature = "serde" , serde(borrow))] |
72 | pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>, |
73 | |
74 | /// `[variant] -> [variant]` |
75 | #[cfg_attr (feature = "serde" , serde(borrow))] |
76 | pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>, |
77 | |
78 | /// `[value{7}] -> [value{7}]` |
79 | #[cfg_attr (feature = "serde" , serde(borrow))] |
80 | pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>, |
81 | } |
82 | |