| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | //! Tools for locale fallback, enabling arbitrary input locales to be mapped into the nearest |
| 6 | //! locale with data. |
| 7 | |
| 8 | use crate::provider::*; |
| 9 | use icu_locid::extensions::unicode::Value; |
| 10 | use icu_locid::subtags::Variants; |
| 11 | use icu_provider::prelude::*; |
| 12 | |
| 13 | #[doc (inline)] |
| 14 | pub use icu_provider::fallback::*; |
| 15 | |
| 16 | mod algorithms; |
| 17 | |
| 18 | /// Implements the algorithm defined in *[UTS #35: Locale Inheritance and Matching]*. |
| 19 | /// |
| 20 | /// Note that this implementation performs some additional steps compared to the *UTS #35* |
| 21 | /// algorithm. See *[the design doc]* for a detailed description and [#2243]( |
| 22 | /// https://github.com/unicode-org/icu4x/issues/2243) to track alignment with *UTS #35*. |
| 23 | /// |
| 24 | /// If running fallback in a loop, use [`DataLocale::is_und()`] to break from the loop. |
| 25 | /// |
| 26 | /// # Examples |
| 27 | /// |
| 28 | /// ``` |
| 29 | /// use icu::locid::locale; |
| 30 | /// use icu::locid_transform::fallback::LocaleFallbacker; |
| 31 | /// |
| 32 | /// // Set up a LocaleFallbacker with data. |
| 33 | /// let fallbacker = LocaleFallbacker::new(); |
| 34 | /// |
| 35 | /// // Create a LocaleFallbackerIterator with a default configuration. |
| 36 | /// // By default, uses language priority with no additional extension keywords. |
| 37 | /// let mut fallback_iterator = fallbacker |
| 38 | /// .for_config(Default::default()) |
| 39 | /// .fallback_for(locale!("hi-Latn-IN" ).into()); |
| 40 | /// |
| 41 | /// // Run the algorithm and check the results. |
| 42 | /// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn-IN" ).into()); |
| 43 | /// fallback_iterator.step(); |
| 44 | /// assert_eq!(fallback_iterator.get(), &locale!("hi-Latn" ).into()); |
| 45 | /// fallback_iterator.step(); |
| 46 | /// assert_eq!(fallback_iterator.get(), &locale!("en-IN" ).into()); |
| 47 | /// fallback_iterator.step(); |
| 48 | /// assert_eq!(fallback_iterator.get(), &locale!("en-001" ).into()); |
| 49 | /// fallback_iterator.step(); |
| 50 | /// assert_eq!(fallback_iterator.get(), &locale!("en" ).into()); |
| 51 | /// fallback_iterator.step(); |
| 52 | /// assert_eq!(fallback_iterator.get(), &locale!("und" ).into()); |
| 53 | /// ``` |
| 54 | /// |
| 55 | /// [UTS #35: Locale Inheritance and Matching]: https://www.unicode.org/reports/tr35/#Locale_Inheritance |
| 56 | /// [the design doc]: https://docs.google.com/document/d/1Mp7EUyl-sFh_HZYgyeVwj88vJGpCBIWxzlCwGgLCDwM/edit |
| 57 | /// [language identifier]: icu::locid::LanguageIdentifier |
| 58 | #[doc (hidden)] |
| 59 | #[derive (Debug, Clone, PartialEq)] |
| 60 | pub struct LocaleFallbacker { |
| 61 | likely_subtags: DataPayload<LocaleFallbackLikelySubtagsV1Marker>, |
| 62 | parents: DataPayload<LocaleFallbackParentsV1Marker>, |
| 63 | collation_supplement: Option<DataPayload<CollationFallbackSupplementV1Marker>>, |
| 64 | } |
| 65 | |
| 66 | /// Borrowed version of [`LocaleFallbacker`]. |
| 67 | #[derive (Debug, Clone, Copy, PartialEq)] |
| 68 | pub struct LocaleFallbackerBorrowed<'a> { |
| 69 | likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, |
| 70 | parents: &'a LocaleFallbackParentsV1<'a>, |
| 71 | collation_supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, |
| 72 | } |
| 73 | |
| 74 | /// A [`LocaleFallbackerBorrowed`] with an associated [`LocaleFallbackConfig`]. |
| 75 | #[derive (Debug, Clone, Copy, PartialEq)] |
| 76 | pub struct LocaleFallbackerWithConfig<'a> { |
| 77 | likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, |
| 78 | parents: &'a LocaleFallbackParentsV1<'a>, |
| 79 | supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, |
| 80 | config: LocaleFallbackConfig, |
| 81 | } |
| 82 | |
| 83 | /// Inner iteration type. Does not own the item under fallback. |
| 84 | #[derive (Debug)] |
| 85 | struct LocaleFallbackIteratorInner<'a> { |
| 86 | likely_subtags: &'a LocaleFallbackLikelySubtagsV1<'a>, |
| 87 | parents: &'a LocaleFallbackParentsV1<'a>, |
| 88 | supplement: Option<&'a LocaleFallbackSupplementV1<'a>>, |
| 89 | config: LocaleFallbackConfig, |
| 90 | backup_extension: Option<Value>, |
| 91 | backup_subdivision: Option<Value>, |
| 92 | backup_variants: Option<Variants>, |
| 93 | } |
| 94 | |
| 95 | /// Iteration type for locale fallback operations. |
| 96 | /// |
| 97 | /// Because the `Iterator` trait does not allow items to borrow from the iterator, this class does |
| 98 | /// not implement that trait. Instead, use `.step()` and `.get()`. |
| 99 | #[derive (Debug)] |
| 100 | pub struct LocaleFallbackIterator<'a, 'b> { |
| 101 | current: DataLocale, |
| 102 | inner: LocaleFallbackIteratorInner<'a>, |
| 103 | phantom: core::marker::PhantomData<&'b ()>, |
| 104 | } |
| 105 | |
| 106 | impl LocaleFallbacker { |
| 107 | /// Creates a [`LocaleFallbacker`] with compiled fallback data (likely subtags and parent locales). |
| 108 | /// |
| 109 | /// ✨ *Enabled with the `compiled_data` Cargo feature.* |
| 110 | /// |
| 111 | /// [📚 Help choosing a constructor](icu_provider::constructors) |
| 112 | #[cfg (feature = "compiled_data" )] |
| 113 | #[allow (clippy::new_ret_no_self)] // keeping constructors together |
| 114 | pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> { |
| 115 | let tickstatic = LocaleFallbackerBorrowed { |
| 116 | likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1, |
| 117 | parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1, |
| 118 | collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1), |
| 119 | }; |
| 120 | // Safety: we're transmuting down from LocaleFallbackerBorrowed<'static> to LocaleFallbackerBorrowed<'a> |
| 121 | // ZeroMaps use associated types in a way that confuse the compiler which gives up and marks them |
| 122 | // as invariant. However, they are covariant, and in non-const code this covariance can be safely triggered |
| 123 | // using Yokeable::transform. In const code we must transmute. In the long run we should |
| 124 | // be able to `transform()` in const code, and also we will have hopefully improved map polymorphism (#3128) |
| 125 | unsafe { core::mem::transmute(tickstatic) } |
| 126 | } |
| 127 | |
| 128 | icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError, |
| 129 | #[cfg (skip)] |
| 130 | functions: [ |
| 131 | new, |
| 132 | try_new_with_any_provider, |
| 133 | try_new_with_buffer_provider, |
| 134 | try_new_unstable, |
| 135 | Self |
| 136 | ]); |
| 137 | |
| 138 | #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new)] |
| 139 | pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError> |
| 140 | where |
| 141 | P: DataProvider<LocaleFallbackLikelySubtagsV1Marker> |
| 142 | + DataProvider<LocaleFallbackParentsV1Marker> |
| 143 | + DataProvider<CollationFallbackSupplementV1Marker> |
| 144 | + ?Sized, |
| 145 | { |
| 146 | let likely_subtags = provider.load(Default::default())?.take_payload()?; |
| 147 | let parents = provider.load(Default::default())?.take_payload()?; |
| 148 | let collation_supplement = match DataProvider::<CollationFallbackSupplementV1Marker>::load( |
| 149 | provider, |
| 150 | Default::default(), |
| 151 | ) { |
| 152 | Ok(response) => Some(response.take_payload()?), |
| 153 | // It is expected that not all keys are present |
| 154 | Err(DataError { |
| 155 | kind: DataErrorKind::MissingDataKey, |
| 156 | .. |
| 157 | }) => None, |
| 158 | Err(e) => return Err(e), |
| 159 | }; |
| 160 | Ok(LocaleFallbacker { |
| 161 | likely_subtags, |
| 162 | parents, |
| 163 | collation_supplement, |
| 164 | }) |
| 165 | } |
| 166 | |
| 167 | /// Creates a [`LocaleFallbacker`] without fallback data. Using this constructor may result in |
| 168 | /// surprising behavior, especially in multi-script languages. |
| 169 | pub fn new_without_data() -> Self { |
| 170 | LocaleFallbacker { |
| 171 | likely_subtags: DataPayload::from_owned(Default::default()), |
| 172 | parents: DataPayload::from_owned(Default::default()), |
| 173 | collation_supplement: None, |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | /// Associates a configuration with this fallbacker. |
| 178 | #[inline ] |
| 179 | pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig { |
| 180 | self.as_borrowed().for_config(config) |
| 181 | } |
| 182 | |
| 183 | /// Derives a configuration from a [`DataKey`] and associates it |
| 184 | /// with this fallbacker. |
| 185 | #[inline ] |
| 186 | #[doc (hidden)] // will be removed in 2.0 |
| 187 | pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig { |
| 188 | self.for_config(data_key.fallback_config()) |
| 189 | } |
| 190 | |
| 191 | /// Creates a borrowed version of this fallbacker for performance. |
| 192 | pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed { |
| 193 | LocaleFallbackerBorrowed { |
| 194 | likely_subtags: self.likely_subtags.get(), |
| 195 | parents: self.parents.get(), |
| 196 | collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()), |
| 197 | } |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | impl<'a> LocaleFallbackerBorrowed<'a> { |
| 202 | /// Associates a configuration with this fallbacker. |
| 203 | #[inline ] |
| 204 | pub const fn for_config(self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig<'a> { |
| 205 | LocaleFallbackerWithConfig { |
| 206 | likely_subtags: self.likely_subtags, |
| 207 | parents: self.parents, |
| 208 | supplement: match config.fallback_supplement { |
| 209 | Some(LocaleFallbackSupplement::Collation) => self.collation_supplement, |
| 210 | _ => None, |
| 211 | }, |
| 212 | config, |
| 213 | } |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | impl LocaleFallbackerBorrowed<'static> { |
| 218 | /// Cheaply converts a [`LocaleFallbackerBorrowed<'static>`] into a [`LocaleFallbacker`]. |
| 219 | /// |
| 220 | /// Note: Due to branching and indirection, using [`LocaleFallbacker`] might inhibit some |
| 221 | /// compile-time optimizations that are possible with [`LocaleFallbackerBorrowed`]. |
| 222 | pub const fn static_to_owned(self) -> LocaleFallbacker { |
| 223 | LocaleFallbacker { |
| 224 | likely_subtags: DataPayload::from_static_ref(self.likely_subtags), |
| 225 | parents: DataPayload::from_static_ref(self.parents), |
| 226 | collation_supplement: match self.collation_supplement { |
| 227 | None => None, |
| 228 | Some(x: &'static LocaleFallbackSupplementV1<'static>) => Some(DataPayload::from_static_ref(data:x)), |
| 229 | }, |
| 230 | } |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | impl<'a> LocaleFallbackerWithConfig<'a> { |
| 235 | /// Creates an iterator based on a [`DataLocale`]. |
| 236 | /// |
| 237 | /// If you have a [`Locale`](icu_locid::Locale), call `.into()` to get a [`DataLocale`]. |
| 238 | /// |
| 239 | /// When first initialized, the locale is normalized according to the fallback algorithm. |
| 240 | pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> { |
| 241 | self.normalize(&mut locale); |
| 242 | LocaleFallbackIterator { |
| 243 | current: locale, |
| 244 | inner: LocaleFallbackIteratorInner { |
| 245 | likely_subtags: self.likely_subtags, |
| 246 | parents: self.parents, |
| 247 | supplement: self.supplement, |
| 248 | config: self.config, |
| 249 | backup_extension: None, |
| 250 | backup_subdivision: None, |
| 251 | backup_variants: None, |
| 252 | }, |
| 253 | phantom: core::marker::PhantomData, |
| 254 | } |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | impl LocaleFallbackIterator<'_, '_> { |
| 259 | /// Borrows the current [`DataLocale`] under fallback. |
| 260 | pub fn get(&self) -> &DataLocale { |
| 261 | &self.current |
| 262 | } |
| 263 | |
| 264 | /// Takes the current [`DataLocale`] under fallback. |
| 265 | pub fn take(self) -> DataLocale { |
| 266 | self.current |
| 267 | } |
| 268 | |
| 269 | /// Performs one step of the locale fallback algorithm. |
| 270 | /// |
| 271 | /// The fallback is completed once the inner [`DataLocale`] becomes `und`. |
| 272 | pub fn step(&mut self) -> &mut Self { |
| 273 | self.inner.step(&mut self.current); |
| 274 | self |
| 275 | } |
| 276 | } |
| 277 | |