1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::*;
6use icu_locid::subtags::{Language, Region, Script, Variant};
7use icu_provider::prelude::*;
8use tinystr::UnvalidatedTinyAsciiStr;
9use zerovec::{VarZeroVec, ZeroMap, ZeroSlice};
10
11#[icu_provider::data_struct(marker(AliasesV1Marker, "locid_transform/aliases@1", singleton))]
12#[derive(PartialEq, Clone, Default)]
13#[cfg_attr(
14 feature = "datagen",
15 derive(serde::Serialize, databake::Bake),
16 databake(path = icu_locid_transform::provider),
17)]
18#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
19#[yoke(prove_covariance_manually)]
20/// This alias data is used for locale canonicalization. Each field defines a
21/// mapping from an old identifier to a new identifier, based upon the rules in
22/// from <http://unicode.org/reports/tr35/#LocaleId_Canonicalization>. The data
23/// is stored in sorted order, allowing for binary search to identify rules to
24/// apply. It is broken down into smaller vectors based upon some characteristic
25/// of the data, to help avoid unnecessary searches. For example, the `sgn_region`
26/// field contains aliases for sign language and region, so that it is not
27/// necessary to search the data unless the input is a sign language.
28///
29/// The algorithm in tr35 is not guaranteed to terminate on data other than what
30/// is currently in CLDR. For this reason, it is not a good idea to attempt to add
31/// or modify aliases for use in this structure.
32///
33/// <div class="stab unstable">
34/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
35/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
36/// to be stable, their Rust representation might not be. Use with caution.
37/// </div>
38// TODO: Use validated types as value types
39#[derive(Debug)]
40pub struct AliasesV1<'data> {
41 /// `[language(-variant)+\] -> [langid]`
42 /// This is not a map as it's searched linearly according to the canonicalization rules.
43 #[cfg_attr(feature = "serde", serde(borrow))]
44 pub language_variants: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
45 /// `sgn-[region] -> [language]`
46 #[cfg_attr(feature = "serde", serde(borrow))]
47 pub sgn_region: ZeroMap<'data, UnvalidatedRegion, Language>,
48 /// `[language{2}] -> [langid]`
49 #[cfg_attr(feature = "serde", serde(borrow))]
50 pub language_len2: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, UnvalidatedLanguageIdentifier>,
51 /// `[language{3}] -> [langid]`
52 #[cfg_attr(feature = "serde", serde(borrow))]
53 pub language_len3: ZeroMap<'data, UnvalidatedLanguage, UnvalidatedLanguageIdentifier>,
54 /// `[langid] -> [langid]`
55 /// This is not a map as it's searched linearly according to the canonicalization rules.
56 #[cfg_attr(feature = "serde", serde(borrow))]
57 pub language: VarZeroVec<'data, UnvalidatedLanguageIdentifierPair>,
58
59 /// `[script] -> [script]`
60 #[cfg_attr(feature = "serde", serde(borrow))]
61 pub script: ZeroMap<'data, UnvalidatedScript, Script>,
62
63 /// `[region{2}] -> [region]`
64 #[cfg_attr(feature = "serde", serde(borrow))]
65 pub region_alpha: ZeroMap<'data, UnvalidatedTinyAsciiStr<2>, Region>,
66 /// `[region{3}] -> [region]`
67 #[cfg_attr(feature = "serde", serde(borrow))]
68 pub region_num: ZeroMap<'data, UnvalidatedRegion, Region>,
69
70 /// `[region] -> [region]+`
71 #[cfg_attr(feature = "serde", serde(borrow))]
72 pub complex_region: ZeroMap<'data, UnvalidatedRegion, ZeroSlice<Region>>,
73
74 /// `[variant] -> [variant]`
75 #[cfg_attr(feature = "serde", serde(borrow))]
76 pub variant: ZeroMap<'data, UnvalidatedVariant, Variant>,
77
78 /// `[value{7}] -> [value{7}]`
79 #[cfg_attr(feature = "serde", serde(borrow))]
80 pub subdivision: ZeroMap<'data, UnvalidatedSubdivision, SemivalidatedSubdivision>,
81}
82