1 | use std::{env, ffi::OsStr}; |
2 | |
3 | const LANGUAGE: &str = "LANGUAGE" ; |
4 | const LC_ALL: &str = "LC_ALL" ; |
5 | const LC_MESSAGES: &str = "LC_MESSAGES" ; |
6 | const LANG: &str = "LANG" ; |
7 | |
8 | /// Environment variable access abstraction to allow testing without |
9 | /// mutating env variables. |
10 | /// |
11 | /// Use [StdEnv] to query [std::env] |
12 | trait EnvAccess { |
13 | /// See also [std::env::var] |
14 | fn get(&self, key: impl AsRef<OsStr>) -> Option<String>; |
15 | } |
16 | |
17 | /// Proxy to [std::env] |
18 | struct StdEnv; |
19 | impl EnvAccess for StdEnv { |
20 | fn get(&self, key: impl AsRef<OsStr>) -> Option<String> { |
21 | env::var(key).ok() |
22 | } |
23 | } |
24 | |
25 | pub(crate) fn get() -> impl Iterator<Item = String> { |
26 | _get(&StdEnv) |
27 | } |
28 | |
29 | /// Retrieves a list of unique locales by checking specific environment variables |
30 | /// in a predefined order: LANGUAGE, LC_ALL, LC_MESSAGES, and LANG. |
31 | /// |
32 | /// The function first checks the `LANGUAGE` environment variable, which can contain |
33 | /// one or more locales separated by a colon (`:`). It then splits these values, |
34 | /// converts them from [POSIX](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html) |
35 | /// to [BCP 47](https://www.ietf.org/rfc/bcp/bcp47.html) format, and adds them to the list of locales |
36 | /// if they are not already included. |
37 | /// |
38 | /// Next, the function checks the `LC_ALL`, `LC_MESSAGES`, and `LANG` environment |
39 | /// variables. Each of these variables contains a single locale. If a locale is found, |
40 | /// and it's not empty, it is converted to BCP 47 format and added to the list if |
41 | /// it is not already included. |
42 | /// |
43 | /// For more information check this issue: https://github.com/1Password/sys-locale/issues/14. |
44 | /// |
45 | /// The function ensures that locales are returned in the order of precedence |
46 | /// and without duplicates. The final list of locales is returned as an iterator. |
47 | /// |
48 | /// # Returns |
49 | /// |
50 | /// An iterator over the unique locales found in the environment variables. |
51 | /// |
52 | /// # Environment Variables Checked |
53 | /// |
54 | /// 1. `LANGUAGE` - Can contain multiple locales, each separated by a colon (`:`), highest priority. |
55 | /// 2. `LC_ALL` - Contains a single locale, high priority. |
56 | /// 3. `LC_MESSAGES` - Contains a single locale, medium priority. |
57 | /// 4. `LANG` - Contains a single locale, low priority. |
58 | /// |
59 | /// # Example |
60 | /// |
61 | /// ```ignore |
62 | /// let locales: Vec<String> = _get(&env).collect(); |
63 | /// for locale in locales { |
64 | /// println!("User's preferred locales: {}" , locale); |
65 | /// } |
66 | /// ``` |
67 | fn _get(env: &impl EnvAccess) -> impl Iterator<Item = String> { |
68 | let mut locales = Vec::new(); |
69 | |
70 | // LANGUAGE contains one or multiple locales separated by colon (':') |
71 | if let Some(val) = env.get(LANGUAGE).filter(|val| !val.is_empty()) { |
72 | for part in val.split(':' ) { |
73 | let locale = posix_to_bcp47(part); |
74 | if !locales.contains(&locale) { |
75 | locales.push(locale); |
76 | } |
77 | } |
78 | } |
79 | |
80 | // LC_ALL, LC_MESSAGES and LANG contain one locale |
81 | for variable in [LC_ALL, LC_MESSAGES, LANG] { |
82 | if let Some(val) = env.get(variable).filter(|val| !val.is_empty()) { |
83 | let locale = posix_to_bcp47(&val); |
84 | if !locales.contains(&locale) { |
85 | locales.push(locale); |
86 | } |
87 | } |
88 | } |
89 | |
90 | locales.into_iter() |
91 | } |
92 | |
93 | /// Converts a POSIX locale string to a BCP 47 locale string. |
94 | /// |
95 | /// This function processes the input `code` by removing any character encoding |
96 | /// (the part after the `.` character) and any modifiers (the part after the `@` character). |
97 | /// It replaces underscores (`_`) with hyphens (`-`) to conform to BCP 47 formatting. |
98 | /// |
99 | /// If the locale is already in the BCP 47 format, no changes are made. |
100 | /// |
101 | /// Useful links: |
102 | /// - [The Open Group Base Specifications Issue 8 - 7. Locale](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html) |
103 | /// - [The Open Group Base Specifications Issue 8 - 8. Environment Variables](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html) |
104 | /// - [BCP 47 specification](https://www.ietf.org/rfc/bcp/bcp47.html) |
105 | /// |
106 | /// # Examples |
107 | /// |
108 | /// ```ignore |
109 | /// let bcp47 = posix_to_bcp47("en-US" ); // already BCP 47 |
110 | /// assert_eq!(bcp47, "en-US" ); // no changes |
111 | /// |
112 | /// let bcp47 = posix_to_bcp47("en_US" ); |
113 | /// assert_eq!(bcp47, "en-US" ); |
114 | /// |
115 | /// let bcp47 = posix_to_bcp47("ru_RU.UTF-8" ); |
116 | /// assert_eq!(bcp47, "ru-RU" ); |
117 | /// |
118 | /// let bcp47 = posix_to_bcp47("fr_FR@dict" ); |
119 | /// assert_eq!(bcp47, "fr-FR" ); |
120 | /// |
121 | /// let bcp47 = posix_to_bcp47("de_DE.UTF-8@euro" ); |
122 | /// assert_eq!(bcp47, "de-DE" ); |
123 | /// ``` |
124 | /// |
125 | /// # TODO |
126 | /// |
127 | /// 1. Implement POSIX to BCP 47 modifier conversion (see https://github.com/1Password/sys-locale/issues/32). |
128 | /// 2. Optimize to avoid creating a new buffer (see https://github.com/1Password/sys-locale/pull/33). |
129 | fn posix_to_bcp47(locale: &str) -> String { |
130 | localeimpl Iterator |
131 | .chars() |
132 | .take_while(|&c: char| c != '.' && c != '@' ) |
133 | .map(|c: char| if c == '_' { '-' } else { c }) |
134 | .collect() |
135 | } |
136 | |
137 | #[cfg (test)] |
138 | mod tests { |
139 | use super::{EnvAccess, _get, posix_to_bcp47, LANG, LANGUAGE, LC_ALL, LC_MESSAGES}; |
140 | use std::{ |
141 | collections::HashMap, |
142 | ffi::{OsStr, OsString}, |
143 | }; |
144 | |
145 | type MockEnv = HashMap<OsString, String>; |
146 | impl EnvAccess for MockEnv { |
147 | fn get(&self, key: impl AsRef<OsStr>) -> Option<String> { |
148 | self.get(key.as_ref()).cloned() |
149 | } |
150 | } |
151 | |
152 | const BCP_47: &str = "fr-FR" ; |
153 | const POSIX: &str = "fr_FR" ; |
154 | const POSIX_ENC: &str = "fr_FR.UTF-8" ; |
155 | const POSIX_MOD: &str = "fr_FR@euro" ; |
156 | const POSIX_ENC_MOD: &str = "fr_FR.UTF-8@euro" ; |
157 | |
158 | #[test ] |
159 | fn parse_identifier() { |
160 | assert_eq!(posix_to_bcp47(BCP_47), BCP_47); |
161 | assert_eq!(posix_to_bcp47(POSIX), BCP_47); |
162 | assert_eq!(posix_to_bcp47(POSIX_ENC), BCP_47); |
163 | assert_eq!(posix_to_bcp47(POSIX_MOD), BCP_47); |
164 | assert_eq!(posix_to_bcp47(POSIX_ENC_MOD), BCP_47); |
165 | } |
166 | |
167 | #[test ] |
168 | fn env_get() { |
169 | fn case( |
170 | env: &mut MockEnv, |
171 | language: impl Into<String>, |
172 | lc_all: impl Into<String>, |
173 | lc_messages: impl Into<String>, |
174 | lang: impl Into<String>, |
175 | expected: impl IntoIterator<Item = impl Into<String>>, |
176 | ) { |
177 | env.insert(LANGUAGE.into(), language.into()); |
178 | env.insert(LC_ALL.into(), lc_all.into()); |
179 | env.insert(LC_MESSAGES.into(), lc_messages.into()); |
180 | env.insert(LANG.into(), lang.into()); |
181 | assert!(_get(env).eq(expected.into_iter().map(|s| s.into()))); |
182 | } |
183 | |
184 | let mut env = MockEnv::new(); |
185 | assert_eq!(_get(&env).next(), None); |
186 | |
187 | // Empty |
188 | case(&mut env, "" , "" , "" , "" , &[] as &[String]); |
189 | |
190 | // Constants |
191 | case( |
192 | &mut env, |
193 | POSIX_ENC_MOD, |
194 | POSIX_ENC, |
195 | POSIX_MOD, |
196 | POSIX, |
197 | [BCP_47], |
198 | ); |
199 | |
200 | // Only one variable |
201 | case(&mut env, "en_US" , "" , "" , "" , ["en-US" ]); |
202 | case(&mut env, "" , "en_US" , "" , "" , ["en-US" ]); |
203 | case(&mut env, "" , "" , "en_US" , "" , ["en-US" ]); |
204 | case(&mut env, "" , "" , "" , "en_US" , ["en-US" ]); |
205 | |
206 | // Duplicates |
207 | case(&mut env, "en_US" , "en_US" , "en_US" , "en_US" , ["en-US" ]); |
208 | case( |
209 | &mut env, |
210 | "en_US" , |
211 | "en_US" , |
212 | "ru_RU" , |
213 | "en_US" , |
214 | ["en-US" , "ru-RU" ], |
215 | ); |
216 | case( |
217 | &mut env, |
218 | "en_US" , |
219 | "ru_RU" , |
220 | "ru_RU" , |
221 | "en_US" , |
222 | ["en-US" , "ru-RU" ], |
223 | ); |
224 | case( |
225 | &mut env, |
226 | "en_US" , |
227 | "es_ES" , |
228 | "ru_RU" , |
229 | "en_US" , |
230 | ["en-US" , "es-ES" , "ru-RU" ], |
231 | ); |
232 | case( |
233 | &mut env, |
234 | "en_US:ru_RU:es_ES:en_US" , |
235 | "es_ES" , |
236 | "ru_RU" , |
237 | "en_US" , |
238 | ["en-US" , "ru-RU" , "es-ES" ], |
239 | ); |
240 | |
241 | // Duplicates with different case |
242 | case( |
243 | &mut env, |
244 | "en_US:fr_fr" , |
245 | "EN_US" , |
246 | "fR_Fr" , |
247 | "En_US" , |
248 | ["en-US" , "fr-fr" , "EN-US" , "fR-Fr" , "En-US" ], |
249 | ); |
250 | |
251 | // More complicated cases |
252 | case( |
253 | &mut env, |
254 | "ru_RU:ru:en_US:en" , |
255 | "ru_RU.UTF-8" , |
256 | "ru_RU.UTF-8" , |
257 | "ru_RU.UTF-8" , |
258 | ["ru-RU" , "ru" , "en-US" , "en" ], |
259 | ); |
260 | case( |
261 | &mut env, |
262 | "fr_FR.UTF-8@euro:fr_FR.UTF-8:fr_FR:fr:en_US.UTF-8:en_US:en" , |
263 | "es_ES.UTF-8@euro" , |
264 | "fr_FR.UTF-8@euro" , |
265 | "fr_FR.UTF-8@euro" , |
266 | ["fr-FR" , "fr" , "en-US" , "en" , "es-ES" ], |
267 | ); |
268 | case( |
269 | &mut env, |
270 | "" , |
271 | "es_ES.UTF-8@euro" , |
272 | "fr_FR.UTF-8@euro" , |
273 | "fr_FR.UTF-8@euro" , |
274 | ["es-ES" , "fr-FR" ], |
275 | ); |
276 | case( |
277 | &mut env, |
278 | "fr_FR@euro" , |
279 | "fr_FR.UTF-8" , |
280 | "en_US.UTF-8" , |
281 | "en_US.UTF-8@dict" , |
282 | ["fr-FR" , "en-US" ], |
283 | ); |
284 | |
285 | // Already BCP 47 |
286 | case(&mut env, BCP_47, BCP_47, BCP_47, POSIX, [BCP_47]); |
287 | case( |
288 | &mut env, |
289 | "fr-FR" , |
290 | "es-ES" , |
291 | "de-DE" , |
292 | "en-US" , |
293 | ["fr-FR" , "es-ES" , "de-DE" , "en-US" ], |
294 | ); |
295 | } |
296 | } |
297 | |