expander.rs source code [crates/icu_locid_transform/src/provider/expander.rs]

1	// This file is part of ICU4X. For terms of use, please see the file
2	// called LICENSE at the top level of the ICU4X source tree
3	// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5	use super::*;
6	use icu_locid::subtags::{Language, Region, Script};
7	use icu_provider::prelude::*;
8	use zerovec::ZeroMap;
9
10	#[icu_provider::data_struct(marker(
11	LikelySubtagsV1Marker,
12	"locid_transform/likelysubtags@1",
13	singleton
14	))]
15	#[derive(Debug, PartialEq, Clone)]
16	#[cfg_attr(
17	feature = "datagen",
18	derive(serde::Serialize, databake::Bake),
19	databake(path = icu_locid_transform::provider),
20	)]
21	#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
22	/// This likely subtags data is used for the minimize and maximize operations.
23	/// Each field defines a mapping from an old identifier to a new identifier,
24	/// based upon the rules in
25	/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
26	///
27	/// The data is stored is broken down into smaller vectors based upon the rules
28	/// defined for the likely subtags maximize algorithm.
29	///
30	/// For efficiency, only the relevant part of the LanguageIdentifier is stored
31	/// for searching and replacing. E.g., the `language_script` field is used to store
32	/// rules for `LanguageIdentifier`s that contain a language and a script, but not a
33	/// region.
34	///
35	/// <div class="stab unstable">
36	/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
37	/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
38	/// to be stable, their Rust representation might not be. Use with caution.
39	/// </div>
40	#[yoke(prove_covariance_manually)]
41	pub struct LikelySubtagsV1<'data> {
42	/// Language and script.
43	#[cfg_attr(feature = "serde", serde(borrow))]
44	pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
45	/// Language and region.
46	#[cfg_attr(feature = "serde", serde(borrow))]
47	pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
48	/// Just language.
49	#[cfg_attr(feature = "serde", serde(borrow))]
50	pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
51	/// Script and region.
52	#[cfg_attr(feature = "serde", serde(borrow))]
53	pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
54	/// Just script.
55	#[cfg_attr(feature = "serde", serde(borrow))]
56	pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
57	/// Just region.
58	#[cfg_attr(feature = "serde", serde(borrow))]
59	pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
60	/// Undefined.
61	pub und: (Language, Script, Region),
62	}
63
64	#[icu_provider::data_struct(marker(
65	LikelySubtagsForLanguageV1Marker,
66	"locid_transform/likelysubtags_l@1",
67	singleton
68	))]
69	#[derive(Debug, PartialEq, Clone)]
70	#[cfg_attr(
71	feature = "datagen",
72	derive(serde::Serialize, databake::Bake),
73	databake(path = icu_locid_transform::provider),
74	)]
75	#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
76	/// This likely subtags data is used for the minimize and maximize operations.
77	/// Each field defines a mapping from an old identifier to a new identifier,
78	/// based upon the rules in
79	/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
80	///
81	/// The data is stored is broken down into smaller vectors based upon the rules
82	/// defined for the likely subtags maximize algorithm.
83	///
84	/// For efficiency, only the relevant part of the LanguageIdentifier is stored
85	/// for searching and replacing. E.g., the `language_script` field is used to store
86	/// rules for `LanguageIdentifier`s that contain a language and a script, but not a
87	/// region.
88	///
89	/// This struct contains mappings when the input contains a language subtag.
90	/// Also see [`LikelySubtagsForScriptRegionV1`].
91	///
92	/// <div class="stab unstable">
93	/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
94	/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
95	/// to be stable, their Rust representation might not be. Use with caution.
96	/// </div>
97	#[yoke(prove_covariance_manually)]
98	pub struct LikelySubtagsForLanguageV1<'data> {
99	/// Language and script.
100	#[cfg_attr(feature = "serde", serde(borrow))]
101	pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
102	/// Language and region.
103	#[cfg_attr(feature = "serde", serde(borrow))]
104	pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
105	/// Just language.
106	#[cfg_attr(feature = "serde", serde(borrow))]
107	pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
108	/// Undefined.
109	pub und: (Language, Script, Region),
110	}
111
112	impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForLanguageV1<'data> {
113	fn from(other: LikelySubtagsV1<'data>) -> Self {
114	Self {
115	language_script: other.language_script,
116	language_region: other.language_region,
117	language: other.language,
118	und: other.und,
119	}
120	}
121	}
122
123	impl<'data> LikelySubtagsForLanguageV1<'data> {
124	pub(crate) fn clone_from_borrowed(other: &LikelySubtagsV1<'data>) -> Self {
125	Self {
126	language_script: other.language_script.clone(),
127	language_region: other.language_region.clone(),
128	language: other.language.clone(),
129	und: other.und,
130	}
131	}
132	}
133
134	#[icu_provider::data_struct(marker(
135	LikelySubtagsForScriptRegionV1Marker,
136	"locid_transform/likelysubtags_sr@1",
137	singleton
138	))]
139	#[derive(Debug, PartialEq, Clone)]
140	#[cfg_attr(
141	feature = "datagen",
142	derive(serde::Serialize, databake::Bake),
143	databake(path = icu_locid_transform::provider),
144	)]
145	#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
146	/// This likely subtags data is used for the minimize and maximize operations.
147	/// Each field defines a mapping from an old identifier to a new identifier,
148	/// based upon the rules in
149	/// <https://www.unicode.org/reports/tr35/#Likely_Subtags>.
150	///
151	/// The data is stored is broken down into smaller vectors based upon the rules
152	/// defined for the likely subtags maximize algorithm.
153	///
154	/// For efficiency, only the relevant part of the LanguageIdentifier is stored
155	/// for searching and replacing. E.g., the `script_region` field is used to store
156	/// rules for `LanguageIdentifier`s that contain a script and a region, but not a
157	/// language.
158	///
159	/// This struct contains mappings when the input does not contain a language subtag.
160	/// Also see [`LikelySubtagsForLanguageV1`].
161	///
162	/// <div class="stab unstable">
163	/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
164	/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
165	/// to be stable, their Rust representation might not be. Use with caution.
166	/// </div>
167	#[yoke(prove_covariance_manually)]
168	pub struct LikelySubtagsForScriptRegionV1<'data> {
169	/// Script and region.
170	#[cfg_attr(feature = "serde", serde(borrow))]
171	pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
172	/// Just script.
173	#[cfg_attr(feature = "serde", serde(borrow))]
174	pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
175	/// Just region.
176	#[cfg_attr(feature = "serde", serde(borrow))]
177	pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
178	}
179
180	impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsForScriptRegionV1<'data> {
181	fn from(other: LikelySubtagsV1<'data>) -> Self {
182	Self {
183	script_region: other.script_region,
184	script: other.script,
185	region: other.region,
186	}
187	}
188	}
189
190	#[icu_provider::data_struct(marker(
191	LikelySubtagsExtendedV1Marker,
192	"locid_transform/likelysubtags_ext@1",
193	singleton
194	))]
195	#[derive(Debug, PartialEq, Clone)]
196	#[cfg_attr(
197	feature = "datagen",
198	derive(serde::Serialize, databake::Bake),
199	databake(path = icu_locid_transform::provider),
200	)]
201	#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
202	/// This likely subtags data is used for full coverage of locales, including ones that
203	/// don't otherwise have data in the Common Locale Data Repository (CLDR).
204	///
205	/// <div class="stab unstable">
206	/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
207	/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
208	/// to be stable, their Rust representation might not be. Use with caution.
209	/// </div>
210	#[yoke(prove_covariance_manually)]
211	pub struct LikelySubtagsExtendedV1<'data> {
212	/// Language and script.
213	#[cfg_attr(feature = "serde", serde(borrow))]
214	pub language_script: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedScript), Region>,
215	/// Language and region.
216	#[cfg_attr(feature = "serde", serde(borrow))]
217	pub language_region: ZeroMap<'data, (UnvalidatedLanguage, UnvalidatedRegion), Script>,
218	/// Just language.
219	#[cfg_attr(feature = "serde", serde(borrow))]
220	pub language: ZeroMap<'data, UnvalidatedLanguage, (Script, Region)>,
221	/// Script and region.
222	#[cfg_attr(feature = "serde", serde(borrow))]
223	pub script_region: ZeroMap<'data, (UnvalidatedScript, UnvalidatedRegion), Language>,
224	/// Just script.
225	#[cfg_attr(feature = "serde", serde(borrow))]
226	pub script: ZeroMap<'data, UnvalidatedScript, (Language, Region)>,
227	/// Just region.
228	#[cfg_attr(feature = "serde", serde(borrow))]
229	pub region: ZeroMap<'data, UnvalidatedRegion, (Language, Script)>,
230	}
231
232	impl<'data> From<LikelySubtagsV1<'data>> for LikelySubtagsExtendedV1<'data> {
233	fn from(other: LikelySubtagsV1<'data>) -> Self {
234	Self {
235	language_script: other.language_script,
236	language_region: other.language_region,
237	language: other.language,
238	script_region: other.script_region,
239	script: other.script,
240	region: other.region,
241	}
242	}
243	}
244