1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! Tools for locale fallback, enabling arbitrary input locales to be mapped into the nearest |
6 | //! locale with data. |
7 | //! |
8 | //! The algorithm implemented in this module is called [Flexible Vertical Fallback]( |
9 | //! https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit). |
10 | //! Watch [#2243](https://github.com/unicode-org/icu4x/issues/2243) to track improvements to |
11 | //! this algorithm and steps to enshrine the algorithm in CLDR. |
12 | //! |
13 | //! # Examples |
14 | //! |
15 | //! ``` |
16 | //! use icu_locid::locale; |
17 | //! use icu_locid_transform::LocaleFallbacker; |
18 | //! |
19 | //! // Set up a LocaleFallbacker with data. |
20 | //! let fallbacker = LocaleFallbacker::new(); |
21 | //! |
22 | //! // Create a LocaleFallbackerIterator with a default configuration. |
23 | //! // By default, uses language priority with no additional extension keywords. |
24 | //! let mut fallback_iterator = fallbacker |
25 | //! .for_config(Default::default()) |
26 | //! .fallback_for(locale!("hi-Latn-IN" ).into()); |
27 | //! |
28 | //! // Run the algorithm and check the results. |
29 | //! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN" ).into()); |
30 | //! fallback_iterator.step(); |
31 | //! assert_eq!(fallback_iterator.get(), &locale!("hi-Latn" ).into()); |
32 | //! fallback_iterator.step(); |
33 | //! assert_eq!(fallback_iterator.get(), &locale!("en-IN" ).into()); |
34 | //! fallback_iterator.step(); |
35 | //! assert_eq!(fallback_iterator.get(), &locale!("en-001" ).into()); |
36 | //! fallback_iterator.step(); |
37 | //! assert_eq!(fallback_iterator.get(), &locale!("en" ).into()); |
38 | //! fallback_iterator.step(); |
39 | //! assert_eq!(fallback_iterator.get(), &locale!("und" ).into()); |
40 | //! ``` |
41 | |
42 | use crate::provider::*; |
43 | use icu_locid::extensions::unicode::Value; |
44 | use icu_locid::subtags::Variants; |
45 | use icu_provider::prelude::*; |
46 | |
47 | #[doc (inline)] |
48 | pub use icu_provider::fallback::*; |
49 | |
50 | mod algorithms; |
51 | |
52 | /// Implements the algorithm defined in *[UTS #35: Locale Inheritance and Matching]*. |
53 | /// |
54 | /// Note that this implementation performs some additional steps compared to the *UTS #35* |
55 | /// algorithm, see *[the design doc]* for a detailed description, and [#2243]( |
56 | /// https://github.com/unicode-org/icu4x/issues/2243) to track aligment with *UTS #35*. |
57 | /// |
58 | /// # Examples |
59 | /// |
60 | /// ``` |
61 | /// use icu_locid::locale; |
62 | /// use icu_locid_transform::fallback::LocaleFallbacker; |
63 | /// |
64 | /// // Set up a LocaleFallbacker with data. |
65 | /// let fallbacker = LocaleFallbacker::new(); |
66 | /// |
67 | /// // Create a LocaleFallbackerIterator with a default configuration. |
68 | /// // By default, uses language priority with no additional extension keywords. |
69 | /// let mut fallback_iterator = fallbacker |
70 | /// .for_config(Default::default()) |
71 | /// .fallback_for(locale!("hi-Latn-IN" ).into()); |
72 | /// |
73 | /// // Run the algorithm and check the results. |
74 | /// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN" ).into()); |
75 | /// fallback_iterator.step(); |
76 | /// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn" ).into()); |
77 | /// fallback_iterator.step(); |
78 | /// assert_eq!(fallback_iterator.get(), &locale!("en-IN" ).into()); |
79 | /// fallback_iterator.step(); |
80 | /// assert_eq!(fallback_iterator.get(), &locale!("en-001" ).into()); |
81 | /// fallback_iterator.step(); |
82 | /// assert_eq!(fallback_iterator.get(), &locale!("en" ).into()); |
83 | /// fallback_iterator.step(); |
84 | /// assert_eq!(fallback_iterator.get(), &locale!("und" ).into()); |
85 | /// ``` |
86 | /// |
87 | /// [UTS #35: Locale Inheritance and Matching]: https://www.unicode.org/reports/tr35/#Locale_Inheritance |
88 | /// [the design doc]: https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit |
89 | #[doc (hidden)] |
90 | #[derive (Debug, Clone, PartialEq)] |
91 | pub struct LocaleFallbacker { |
92 | likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>, |
93 | parents: DataPayload<LocaleFallbackParentsV1Marker>, |
94 | collation_supplement: Option<DataPayload<CollationFallbackSupplementV1Marker>>, |
95 | } |
96 | |
97 | /// Borrowed version of [`LocaleFallbacker`]. |
98 | #[derive (Debug, Clone, Copy, PartialEq)] |
99 | pub struct LocaleFallbackerBorrowed<'a> { |
100 | likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, |
101 | parents: &'a LocaleFallbackParentsV1<'a>, |
102 | collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, |
103 | } |
104 | |
105 | /// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`]. |
106 | #[derive (Debug, Clone, Copy, PartialEq)] |
107 | pub struct LocaleFallbackerWithConfig<'a> { |
108 | likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, |
109 | parents: &'a LocaleFallbackParentsV1<'a>, |
110 | supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, |
111 | config: LocaleFallbackConfig, |
112 | } |
113 | |
114 | /// Inner iteration type. Does not own the item under fallback. |
115 | #[derive (Debug)] |
116 | struct LocaleFallbackIteratorInner<'a> { |
117 | likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, |
118 | parents: &'a LocaleFallbackParentsV1<'a>, |
119 | supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, |
120 | config: LocaleFallbackConfig, |
121 | backup_extension: Option<Value>, |
122 | backup_subdivision: Option<Value>, |
123 | backup_variants: Option<Variants>, |
124 | } |
125 | |
126 | /// Iteration type for locale fallback operations. |
127 | /// |
128 | /// Because the `Iterator` trait does not allow items to borrow from the iterator, this class does |
129 | /// not implement that trait. Instead, use `.step()` and `.get()`. |
130 | #[derive (Debug)] |
131 | pub struct LocaleFallbackIterator<'a, 'b> { |
132 | current: DataLocale, |
133 | inner: LocaleFallbackIteratorInner<'a>, |
134 | phantom: core::marker::PhantomData<&'b ()>, |
135 | } |
136 | |
137 | impl LocaleFallbacker { |
138 | /// Creates a [`LocaleFallbacker`] with compiled fallback data (likely subtags and parent locales). |
139 | /// |
140 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
141 | /// |
142 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
143 | #[cfg (feature = "compiled_data" )] |
144 | #[allow (clippy::new_ret_no_self)] // keeping constructors together |
145 | pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> { |
146 | let tickstatic = LocaleFallbackerBorrowed { |
147 | likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1, |
148 | parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1, |
149 | collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1), |
150 | }; |
151 | // Shitty covariance because the zeromaps confuse the compiler |
152 | unsafe { core::mem::transmute(tickstatic) } |
153 | } |
154 | |
155 | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError, |
156 | #[cfg (skip)] |
157 | functions: [ |
158 | new, |
159 | try_new_with_any_provider, |
160 | try_new_with_buffer_provider, |
161 | try_new_unstable, |
162 | Self |
163 | ]); |
164 | |
165 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
166 | pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError> |
167 | where |
168 | P: DataProvider<LocaleFallbackLikelySubtagsV1Marker> |
169 | + DataProvider<LocaleFallbackParentsV1Marker> |
170 | + DataProvider<CollationFallbackSupplementV1Marker> |
171 | + ?Sized, |
172 | { |
173 | let likely_subtags = provider.load(Default::default())?.take_payload()?; |
174 | let parents = provider.load(Default::default())?.take_payload()?; |
175 | let collation_supplement = match DataProvider::<CollationFallbackSupplementV1Marker>::load( |
176 | provider, |
177 | Default::default(), |
178 | ) { |
179 | Ok(response) => Some(response.take_payload()?), |
180 | // It is expected that not all keys are present |
181 | Err(DataError { |
182 | kind: DataErrorKind::MissingDataKey, |
183 | .. |
184 | }) => None, |
185 | Err(e) => return Err(e), |
186 | }; |
187 | Ok(LocaleFallbacker { |
188 | likely_subtags, |
189 | parents, |
190 | collation_supplement, |
191 | }) |
192 | } |
193 | |
194 | /// Creates a [`LocaleFallbacker`] without fallback data. Using this constructor may result in |
195 | /// surprising behavior, especially in multi-script languages. |
196 | pub fn new_without_data() -> Self { |
197 | LocaleFallbacker { |
198 | likely_subtags: DataPayload::from_owned(Default::default()), |
199 | parents: DataPayload::from_owned(Default::default()), |
200 | collation_supplement: None, |
201 | } |
202 | } |
203 | |
204 | /// Associates a configuration with this fallbacker. |
205 | #[inline ] |
206 | pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig { |
207 | self.as_borrowed().for_config(config) |
208 | } |
209 | |
210 | /// Derives a configuration from a [`DataKey`] and associates it |
211 | /// with this fallbacker. |
212 | #[inline ] |
213 | #[doc (hidden)] // will be removed in 2.0 |
214 | pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig { |
215 | self.for_config(data_key.fallback_config()) |
216 | } |
217 | |
218 | /// Creates a borrowed version of this fallbacker for performance. |
219 | pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed { |
220 | LocaleFallbackerBorrowed { |
221 | likely_subtags: self.likely_subtags.get(), |
222 | parents: self.parents.get(), |
223 | collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()), |
224 | } |
225 | } |
226 | } |
227 | |
228 | impl<'a> LocaleFallbackerBorrowed<'a> { |
229 | /// Associates a configuration with this fallbacker. |
230 | #[inline ] |
231 | pub const fn for_config(self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig<'a> { |
232 | LocaleFallbackerWithConfig { |
233 | likely_subtags: self.likely_subtags, |
234 | parents: self.parents, |
235 | supplement: match config.fallback_supplement { |
236 | Some(LocaleFallbackSupplement::Collation) => self.collation_supplement, |
237 | _ => None, |
238 | }, |
239 | config, |
240 | } |
241 | } |
242 | } |
243 | |
244 | impl LocaleFallbackerBorrowed<'static> { |
245 | /// Cheaply converts a `LocaleFallbackerBorrowed<'static>` into a `LocaleFallbacker`. |
246 | pub const fn static_to_owned(self) -> LocaleFallbacker { |
247 | LocaleFallbacker { |
248 | likely_subtags: DataPayload::from_static_ref(self.likely_subtags), |
249 | parents: DataPayload::from_static_ref(self.parents), |
250 | collation_supplement: match self.collation_supplement { |
251 | None => None, |
252 | Some(x: &LocaleFallbackSupplementV1<'_>) => Some(DataPayload::from_static_ref(data:x)), |
253 | }, |
254 | } |
255 | } |
256 | } |
257 | |
258 | impl<'a> LocaleFallbackerWithConfig<'a> { |
259 | /// Creates an iterator based on a [`DataLocale`]. |
260 | /// |
261 | /// If you have a [`Locale`](icu_locid::Locale), call `.into()` to get a [`DataLocale`]. |
262 | /// |
263 | /// When first initialized, the locale is normalized according to the fallback algorithm. |
264 | pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> { |
265 | self.normalize(&mut locale); |
266 | LocaleFallbackIterator { |
267 | current: locale, |
268 | inner: LocaleFallbackIteratorInner { |
269 | likely_subtags: self.likely_subtags, |
270 | parents: self.parents, |
271 | supplement: self.supplement, |
272 | config: self.config, |
273 | backup_extension: None, |
274 | backup_subdivision: None, |
275 | backup_variants: None, |
276 | }, |
277 | phantom: core::marker::PhantomData, |
278 | } |
279 | } |
280 | } |
281 | |
282 | impl LocaleFallbackIterator<'_, '_> { |
283 | /// Borrows the current [`DataLocale`] under fallback. |
284 | pub fn get(&self) -> &DataLocale { |
285 | &self.current |
286 | } |
287 | |
288 | /// Takes the current [`DataLocale`] under fallback. |
289 | pub fn take(self) -> DataLocale { |
290 | self.current |
291 | } |
292 | |
293 | /// Performs one step of the locale fallback algorithm. |
294 | /// |
295 | /// The fallback is completed once the inner [`DataLocale`] becomes `und`. |
296 | pub fn step(&mut self) -> &mut Self { |
297 | self.inner.step(&mut self.current); |
298 | self |
299 | } |
300 | } |
301 | |