| 1 | //! Language Negotiation is a process in which locales from different |
| 2 | //! sources are filtered and sorted in an effort to produce the best |
| 3 | //! possible selection of them. |
| 4 | //! |
| 5 | //! There are multiple language negotiation strategies, most popular is |
| 6 | //! described in [RFC4647](https://www.ietf.org/rfc/rfc4647.txt). |
| 7 | //! |
| 8 | //! The algorithm is based on the BCP4647 3.3.2 Extended Filtering algorithm, |
| 9 | //! with several modifications. |
| 10 | //! |
| 11 | //! # Example: |
| 12 | //! |
| 13 | //! ``` |
| 14 | //! use fluent_langneg::negotiate_languages; |
| 15 | //! use fluent_langneg::NegotiationStrategy; |
| 16 | //! use fluent_langneg::convert_vec_str_to_langids_lossy; |
| 17 | //! use unic_langid::LanguageIdentifier; |
| 18 | //! |
| 19 | //! let requested = convert_vec_str_to_langids_lossy(&["pl" , "fr" , "en-US" ]); |
| 20 | //! let available = convert_vec_str_to_langids_lossy(&["it" , "de" , "fr" , "en-GB" , "en_US" ]); |
| 21 | //! let default: LanguageIdentifier = "en-US" .parse().expect("Parsing langid failed." ); |
| 22 | //! |
| 23 | //! let supported = negotiate_languages( |
| 24 | //! &requested, |
| 25 | //! &available, |
| 26 | //! Some(&default), |
| 27 | //! NegotiationStrategy::Filtering |
| 28 | //! ); |
| 29 | //! |
| 30 | //! let expected = convert_vec_str_to_langids_lossy(&["fr" , "en-US" , "en-GB" ]); |
| 31 | //! assert_eq!(supported, |
| 32 | //! expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>()); |
| 33 | //! ``` |
| 34 | //! |
| 35 | //! # The exact algorithm is custom, and consists of a 6 level strategy: |
| 36 | //! |
| 37 | //! ### 1) Attempt to find an exact match for each requested locale in available locales. |
| 38 | //! |
| 39 | //! Example: |
| 40 | //! |
| 41 | //! ```text |
| 42 | //! // [requested] * [available] = [supported] |
| 43 | //! |
| 44 | //! ["en-US"] * ["en-US"] = ["en-US"] |
| 45 | //! ``` |
| 46 | //! |
| 47 | //! ### 2) Attempt to match a requested locale to an available locale treated as a locale range. |
| 48 | //! |
| 49 | //! Example: |
| 50 | //! |
| 51 | //! ```text |
| 52 | //! // [requested] * [available] = [supported] |
| 53 | //! |
| 54 | //! ["en-US"] * ["en"] = ["en"] |
| 55 | //! ^^ |
| 56 | //! |-- becomes "en-*-*-*" |
| 57 | //! ``` |
| 58 | //! |
| 59 | //! ### 3) Maximize the requested locale to find the best match in available locales. |
| 60 | //! |
| 61 | //! This part uses ICU's likelySubtags or similar database. |
| 62 | //! |
| 63 | //! Example: |
| 64 | //! |
| 65 | //! ```text |
| 66 | //! // [requested] * [available] = [supported] |
| 67 | //! |
| 68 | //! ["en"] * ["en-GB", "en-US"] = ["en-US"] |
| 69 | //! ^^ ^^^^^ ^^^^^ |
| 70 | //! | | | |
| 71 | //! | |----------- become "en-*-GB-*" and "en-*-US-*" |
| 72 | //! | |
| 73 | //! |-- ICU likelySubtags expands it to "en-Latn-US" |
| 74 | //! ``` |
| 75 | //! |
| 76 | //! ### 4) Attempt to look up for a different variant of the same locale. |
| 77 | //! |
| 78 | //! Example: |
| 79 | //! |
| 80 | //! ```text |
| 81 | //! // [requested] * [available] = [supported] |
| 82 | //! |
| 83 | //! ["ja-JP-win"] * ["ja-JP-mac"] = ["ja-JP-mac"] |
| 84 | //! ^^^^^^^^^ ^^^^^^^^^ |
| 85 | //! | |-- become "ja-*-JP-mac" |
| 86 | //! | |
| 87 | //! |----------- replace variant with range: "ja-JP-*" |
| 88 | //! ``` |
| 89 | //! |
| 90 | //! ### 5) Look up for a maximized version of the requested locale, stripped of the region code. |
| 91 | //! |
| 92 | //! Example: |
| 93 | //! |
| 94 | //! ```text |
| 95 | //! // [requested] * [available] = [supported] |
| 96 | //! |
| 97 | //! ["en-CA"] * ["en-ZA", "en-US"] = ["en-US", "en-ZA"] |
| 98 | //! ^^^^^ |
| 99 | //! | ^^^^^ ^^^^^ |
| 100 | //! | | | |
| 101 | //! | |----------- become "en-*-ZA-*" and "en-*-US-*" |
| 102 | //! | |
| 103 | //! |----------- strip region produces "en", then lookup likelySubtag: "en-Latn-US" |
| 104 | //! ``` |
| 105 | //! |
| 106 | //! |
| 107 | //! ### 6) Attempt to look up for a different region of the same locale. |
| 108 | //! |
| 109 | //! Example: |
| 110 | //! |
| 111 | //! ```text |
| 112 | //! // [requested] * [available] = [supported] |
| 113 | //! |
| 114 | //! ["en-GB"] * ["en-AU"] = ["en-AU"] |
| 115 | //! ^^^^^ ^^^^^ |
| 116 | //! | |-- become "en-*-AU-*" |
| 117 | //! | |
| 118 | //! |----- replace region with range: "en-*" |
| 119 | //! ``` |
| 120 | //! |
| 121 | |
| 122 | use unic_langid::LanguageIdentifier; |
| 123 | |
| 124 | #[cfg (not(feature = "cldr" ))] |
| 125 | mod likely_subtags; |
| 126 | #[cfg (not(feature = "cldr" ))] |
| 127 | use likely_subtags::MockLikelySubtags; |
| 128 | |
| 129 | #[derive (PartialEq, Debug, Clone, Copy)] |
| 130 | pub enum NegotiationStrategy { |
| 131 | Filtering, |
| 132 | Matching, |
| 133 | Lookup, |
| 134 | } |
| 135 | |
| 136 | pub fn filter_matches<'a, R: 'a + AsRef<LanguageIdentifier>, A: 'a + AsRef<LanguageIdentifier>>( |
| 137 | requested: &[R], |
| 138 | available: &'a [A], |
| 139 | strategy: NegotiationStrategy, |
| 140 | ) -> Vec<&'a A> { |
| 141 | let mut supported_locales = vec![]; |
| 142 | |
| 143 | let mut available_locales: Vec<&A> = available.iter().collect(); |
| 144 | |
| 145 | for req in requested { |
| 146 | let mut req = req.as_ref().to_owned(); |
| 147 | macro_rules! test_strategy { |
| 148 | ($self_as_range:expr, $other_as_range:expr) => {{ |
| 149 | let mut match_found = false; |
| 150 | available_locales.retain(|locale| { |
| 151 | if strategy != NegotiationStrategy::Filtering && match_found { |
| 152 | return true; |
| 153 | } |
| 154 | |
| 155 | if locale |
| 156 | .as_ref() |
| 157 | .matches(&req, $self_as_range, $other_as_range) |
| 158 | { |
| 159 | match_found = true; |
| 160 | supported_locales.push(*locale); |
| 161 | return false; |
| 162 | } |
| 163 | true |
| 164 | }); |
| 165 | |
| 166 | if match_found { |
| 167 | match strategy { |
| 168 | NegotiationStrategy::Filtering => {} |
| 169 | NegotiationStrategy::Matching => continue, |
| 170 | NegotiationStrategy::Lookup => break, |
| 171 | } |
| 172 | } |
| 173 | }}; |
| 174 | } |
| 175 | |
| 176 | // 1) Try to find a simple (case-insensitive) string match for the request. |
| 177 | test_strategy!(false, false); |
| 178 | |
| 179 | // 2) Try to match against the available locales treated as ranges. |
| 180 | test_strategy!(true, false); |
| 181 | |
| 182 | // Per Unicode TR35, 4.4 Locale Matching, we don't add likely subtags to |
| 183 | // requested locales, so we'll skip it from the rest of the steps. |
| 184 | if req.language.is_empty() { |
| 185 | continue; |
| 186 | } |
| 187 | |
| 188 | // 3) Try to match against a maximized version of the requested locale |
| 189 | if req.maximize() { |
| 190 | test_strategy!(true, false); |
| 191 | } |
| 192 | |
| 193 | // 4) Try to match against a variant as a range |
| 194 | req.clear_variants(); |
| 195 | test_strategy!(true, true); |
| 196 | |
| 197 | // 5) Try to match against the likely subtag without region |
| 198 | req.region = None; |
| 199 | if req.maximize() { |
| 200 | test_strategy!(true, false); |
| 201 | } |
| 202 | |
| 203 | // 6) Try to match against a region as a range |
| 204 | req.region = None; |
| 205 | test_strategy!(true, true); |
| 206 | } |
| 207 | |
| 208 | supported_locales |
| 209 | } |
| 210 | |
| 211 | pub fn negotiate_languages< |
| 212 | 'a, |
| 213 | R: 'a + AsRef<LanguageIdentifier>, |
| 214 | A: 'a + AsRef<LanguageIdentifier> + PartialEq, |
| 215 | >( |
| 216 | requested: &[R], |
| 217 | available: &'a [A], |
| 218 | default: Option<&'a A>, |
| 219 | strategy: NegotiationStrategy, |
| 220 | ) -> Vec<&'a A> { |
| 221 | let mut supported: Vec<&A> = filter_matches(requested, available, strategy); |
| 222 | |
| 223 | if let Some(default: &'a A) = default { |
| 224 | if strategy == NegotiationStrategy::Lookup { |
| 225 | if supported.is_empty() { |
| 226 | supported.push(default); |
| 227 | } |
| 228 | } else if !supported.contains(&default) { |
| 229 | supported.push(default); |
| 230 | } |
| 231 | } |
| 232 | supported |
| 233 | } |
| 234 | |