1 | //! Language Negotiation is a process in which locales from different |
2 | //! sources are filtered and sorted in an effort to produce the best |
3 | //! possible selection of them. |
4 | //! |
5 | //! There are multiple language negotiation strategies, most popular is |
6 | //! described in [RFC4647](https://www.ietf.org/rfc/rfc4647.txt). |
7 | //! |
8 | //! The algorithm is based on the BCP4647 3.3.2 Extended Filtering algorithm, |
9 | //! with several modifications. |
10 | //! |
11 | //! # Example: |
12 | //! |
13 | //! ``` |
14 | //! use fluent_langneg::negotiate_languages; |
15 | //! use fluent_langneg::NegotiationStrategy; |
16 | //! use fluent_langneg::convert_vec_str_to_langids_lossy; |
17 | //! use unic_langid::LanguageIdentifier; |
18 | //! |
19 | //! let requested = convert_vec_str_to_langids_lossy(&["pl" , "fr" , "en-US" ]); |
20 | //! let available = convert_vec_str_to_langids_lossy(&["it" , "de" , "fr" , "en-GB" , "en_US" ]); |
21 | //! let default: LanguageIdentifier = "en-US" .parse().expect("Parsing langid failed." ); |
22 | //! |
23 | //! let supported = negotiate_languages( |
24 | //! &requested, |
25 | //! &available, |
26 | //! Some(&default), |
27 | //! NegotiationStrategy::Filtering |
28 | //! ); |
29 | //! |
30 | //! let expected = convert_vec_str_to_langids_lossy(&["fr" , "en-US" , "en-GB" ]); |
31 | //! assert_eq!(supported, |
32 | //! expected.iter().map(|t| t.as_ref()).collect::<Vec<&LanguageIdentifier>>()); |
33 | //! ``` |
34 | //! |
35 | //! # The exact algorithm is custom, and consists of a 6 level strategy: |
36 | //! |
37 | //! ### 1) Attempt to find an exact match for each requested locale in available locales. |
38 | //! |
39 | //! Example: |
40 | //! |
41 | //! ```text |
42 | //! // [requested] * [available] = [supported] |
43 | //! |
44 | //! ["en-US"] * ["en-US"] = ["en-US"] |
45 | //! ``` |
46 | //! |
47 | //! ### 2) Attempt to match a requested locale to an available locale treated as a locale range. |
48 | //! |
49 | //! Example: |
50 | //! |
51 | //! ```text |
52 | //! // [requested] * [available] = [supported] |
53 | //! |
54 | //! ["en-US"] * ["en"] = ["en"] |
55 | //! ^^ |
56 | //! |-- becomes "en-*-*-*" |
57 | //! ``` |
58 | //! |
59 | //! ### 3) Maximize the requested locale to find the best match in available locales. |
60 | //! |
61 | //! This part uses ICU's likelySubtags or similar database. |
62 | //! |
63 | //! Example: |
64 | //! |
65 | //! ```text |
66 | //! // [requested] * [available] = [supported] |
67 | //! |
68 | //! ["en"] * ["en-GB", "en-US"] = ["en-US"] |
69 | //! ^^ ^^^^^ ^^^^^ |
70 | //! | | | |
71 | //! | |----------- become "en-*-GB-*" and "en-*-US-*" |
72 | //! | |
73 | //! |-- ICU likelySubtags expands it to "en-Latn-US" |
74 | //! ``` |
75 | //! |
76 | //! ### 4) Attempt to look up for a different variant of the same locale. |
77 | //! |
78 | //! Example: |
79 | //! |
80 | //! ```text |
81 | //! // [requested] * [available] = [supported] |
82 | //! |
83 | //! ["ja-JP-win"] * ["ja-JP-mac"] = ["ja-JP-mac"] |
84 | //! ^^^^^^^^^ ^^^^^^^^^ |
85 | //! | |-- become "ja-*-JP-mac" |
86 | //! | |
87 | //! |----------- replace variant with range: "ja-JP-*" |
88 | //! ``` |
89 | //! |
90 | //! ### 5) Look up for a maximized version of the requested locale, stripped of the region code. |
91 | //! |
92 | //! Example: |
93 | //! |
94 | //! ```text |
95 | //! // [requested] * [available] = [supported] |
96 | //! |
97 | //! ["en-CA"] * ["en-ZA", "en-US"] = ["en-US", "en-ZA"] |
98 | //! ^^^^^ |
99 | //! | ^^^^^ ^^^^^ |
100 | //! | | | |
101 | //! | |----------- become "en-*-ZA-*" and "en-*-US-*" |
102 | //! | |
103 | //! |----------- strip region produces "en", then lookup likelySubtag: "en-Latn-US" |
104 | //! ``` |
105 | //! |
106 | //! |
107 | //! ### 6) Attempt to look up for a different region of the same locale. |
108 | //! |
109 | //! Example: |
110 | //! |
111 | //! ```text |
112 | //! // [requested] * [available] = [supported] |
113 | //! |
114 | //! ["en-GB"] * ["en-AU"] = ["en-AU"] |
115 | //! ^^^^^ ^^^^^ |
116 | //! | |-- become "en-*-AU-*" |
117 | //! | |
118 | //! |----- replace region with range: "en-*" |
119 | //! ``` |
120 | //! |
121 | |
122 | use unic_langid::LanguageIdentifier; |
123 | |
124 | #[cfg (not(feature = "cldr" ))] |
125 | mod likely_subtags; |
126 | #[cfg (not(feature = "cldr" ))] |
127 | use likely_subtags::MockLikelySubtags; |
128 | |
129 | #[derive (PartialEq, Debug, Clone, Copy)] |
130 | pub enum NegotiationStrategy { |
131 | Filtering, |
132 | Matching, |
133 | Lookup, |
134 | } |
135 | |
136 | pub fn filter_matches<'a, R: 'a + AsRef<LanguageIdentifier>, A: 'a + AsRef<LanguageIdentifier>>( |
137 | requested: &[R], |
138 | available: &'a [A], |
139 | strategy: NegotiationStrategy, |
140 | ) -> Vec<&'a A> { |
141 | let mut supported_locales = vec![]; |
142 | |
143 | let mut available_locales: Vec<&A> = available.iter().collect(); |
144 | |
145 | for req in requested { |
146 | let mut req = req.as_ref().to_owned(); |
147 | macro_rules! test_strategy { |
148 | ($self_as_range:expr, $other_as_range:expr) => {{ |
149 | let mut match_found = false; |
150 | available_locales.retain(|locale| { |
151 | if strategy != NegotiationStrategy::Filtering && match_found { |
152 | return true; |
153 | } |
154 | |
155 | if locale |
156 | .as_ref() |
157 | .matches(&req, $self_as_range, $other_as_range) |
158 | { |
159 | match_found = true; |
160 | supported_locales.push(*locale); |
161 | return false; |
162 | } |
163 | true |
164 | }); |
165 | |
166 | if match_found { |
167 | match strategy { |
168 | NegotiationStrategy::Filtering => {} |
169 | NegotiationStrategy::Matching => continue, |
170 | NegotiationStrategy::Lookup => break, |
171 | } |
172 | } |
173 | }}; |
174 | } |
175 | |
176 | // 1) Try to find a simple (case-insensitive) string match for the request. |
177 | test_strategy!(false, false); |
178 | |
179 | // 2) Try to match against the available locales treated as ranges. |
180 | test_strategy!(true, false); |
181 | |
182 | // Per Unicode TR35, 4.4 Locale Matching, we don't add likely subtags to |
183 | // requested locales, so we'll skip it from the rest of the steps. |
184 | if req.language.is_empty() { |
185 | continue; |
186 | } |
187 | |
188 | // 3) Try to match against a maximized version of the requested locale |
189 | if req.maximize() { |
190 | test_strategy!(true, false); |
191 | } |
192 | |
193 | // 4) Try to match against a variant as a range |
194 | req.clear_variants(); |
195 | test_strategy!(true, true); |
196 | |
197 | // 5) Try to match against the likely subtag without region |
198 | req.region = None; |
199 | if req.maximize() { |
200 | test_strategy!(true, false); |
201 | } |
202 | |
203 | // 6) Try to match against a region as a range |
204 | req.region = None; |
205 | test_strategy!(true, true); |
206 | } |
207 | |
208 | supported_locales |
209 | } |
210 | |
211 | pub fn negotiate_languages< |
212 | 'a, |
213 | R: 'a + AsRef<LanguageIdentifier>, |
214 | A: 'a + AsRef<LanguageIdentifier> + PartialEq, |
215 | >( |
216 | requested: &[R], |
217 | available: &'a [A], |
218 | default: Option<&'a A>, |
219 | strategy: NegotiationStrategy, |
220 | ) -> Vec<&'a A> { |
221 | let mut supported: Vec<&A> = filter_matches(requested, available, strategy); |
222 | |
223 | if let Some(default: &A) = default { |
224 | if strategy == NegotiationStrategy::Lookup { |
225 | if supported.is_empty() { |
226 | supported.push(default); |
227 | } |
228 | } else if !supported.contains(&default) { |
229 | supported.push(default); |
230 | } |
231 | } |
232 | supported |
233 | } |
234 | |