| 1 | use std::{env, ffi::OsStr}; |
| 2 | |
| 3 | const LANGUAGE: &str = "LANGUAGE" ; |
| 4 | const LC_ALL: &str = "LC_ALL" ; |
| 5 | const LC_MESSAGES: &str = "LC_MESSAGES" ; |
| 6 | const LANG: &str = "LANG" ; |
| 7 | |
| 8 | /// Environment variable access abstraction to allow testing without |
| 9 | /// mutating env variables. |
| 10 | /// |
| 11 | /// Use [StdEnv] to query [std::env] |
| 12 | trait EnvAccess { |
| 13 | /// See also [std::env::var] |
| 14 | fn get(&self, key: impl AsRef<OsStr>) -> Option<String>; |
| 15 | } |
| 16 | |
| 17 | /// Proxy to [std::env] |
| 18 | struct StdEnv; |
| 19 | impl EnvAccess for StdEnv { |
| 20 | fn get(&self, key: impl AsRef<OsStr>) -> Option<String> { |
| 21 | env::var(key).ok() |
| 22 | } |
| 23 | } |
| 24 | |
| 25 | pub(crate) fn get() -> impl Iterator<Item = String> { |
| 26 | _get(&StdEnv) |
| 27 | } |
| 28 | |
| 29 | /// Retrieves a list of unique locales by checking specific environment variables |
| 30 | /// in a predefined order: LANGUAGE, LC_ALL, LC_MESSAGES, and LANG. |
| 31 | /// |
| 32 | /// The function first checks the `LANGUAGE` environment variable, which can contain |
| 33 | /// one or more locales separated by a colon (`:`). It then splits these values, |
| 34 | /// converts them from [POSIX](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html) |
| 35 | /// to [BCP 47](https://www.ietf.org/rfc/bcp/bcp47.html) format, and adds them to the list of locales |
| 36 | /// if they are not already included. |
| 37 | /// |
| 38 | /// Next, the function checks the `LC_ALL`, `LC_MESSAGES`, and `LANG` environment |
| 39 | /// variables. Each of these variables contains a single locale. If a locale is found, |
| 40 | /// and it's not empty, it is converted to BCP 47 format and added to the list if |
| 41 | /// it is not already included. |
| 42 | /// |
| 43 | /// For more information check this issue: https://github.com/1Password/sys-locale/issues/14. |
| 44 | /// |
| 45 | /// The function ensures that locales are returned in the order of precedence |
| 46 | /// and without duplicates. The final list of locales is returned as an iterator. |
| 47 | /// |
| 48 | /// # Returns |
| 49 | /// |
| 50 | /// An iterator over the unique locales found in the environment variables. |
| 51 | /// |
| 52 | /// # Environment Variables Checked |
| 53 | /// |
| 54 | /// 1. `LANGUAGE` - Can contain multiple locales, each separated by a colon (`:`), highest priority. |
| 55 | /// 2. `LC_ALL` - Contains a single locale, high priority. |
| 56 | /// 3. `LC_MESSAGES` - Contains a single locale, medium priority. |
| 57 | /// 4. `LANG` - Contains a single locale, low priority. |
| 58 | /// |
| 59 | /// # Example |
| 60 | /// |
| 61 | /// ```ignore |
| 62 | /// let locales: Vec<String> = _get(&env).collect(); |
| 63 | /// for locale in locales { |
| 64 | /// println!("User's preferred locales: {}" , locale); |
| 65 | /// } |
| 66 | /// ``` |
| 67 | fn _get(env: &impl EnvAccess) -> impl Iterator<Item = String> { |
| 68 | let mut locales = Vec::new(); |
| 69 | |
| 70 | // LANGUAGE contains one or multiple locales separated by colon (':') |
| 71 | if let Some(val) = env.get(LANGUAGE).filter(|val| !val.is_empty()) { |
| 72 | for part in val.split(':' ) { |
| 73 | let locale = posix_to_bcp47(part); |
| 74 | if !locales.contains(&locale) { |
| 75 | locales.push(locale); |
| 76 | } |
| 77 | } |
| 78 | } |
| 79 | |
| 80 | // LC_ALL, LC_MESSAGES and LANG contain one locale |
| 81 | for variable in [LC_ALL, LC_MESSAGES, LANG] { |
| 82 | if let Some(val) = env.get(variable).filter(|val| !val.is_empty()) { |
| 83 | let locale = posix_to_bcp47(&val); |
| 84 | if !locales.contains(&locale) { |
| 85 | locales.push(locale); |
| 86 | } |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | locales.into_iter() |
| 91 | } |
| 92 | |
| 93 | /// Converts a POSIX locale string to a BCP 47 locale string. |
| 94 | /// |
| 95 | /// This function processes the input `code` by removing any character encoding |
| 96 | /// (the part after the `.` character) and any modifiers (the part after the `@` character). |
| 97 | /// It replaces underscores (`_`) with hyphens (`-`) to conform to BCP 47 formatting. |
| 98 | /// |
| 99 | /// If the locale is already in the BCP 47 format, no changes are made. |
| 100 | /// |
| 101 | /// Useful links: |
| 102 | /// - [The Open Group Base Specifications Issue 8 - 7. Locale](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html) |
| 103 | /// - [The Open Group Base Specifications Issue 8 - 8. Environment Variables](https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap08.html) |
| 104 | /// - [BCP 47 specification](https://www.ietf.org/rfc/bcp/bcp47.html) |
| 105 | /// |
| 106 | /// # Examples |
| 107 | /// |
| 108 | /// ```ignore |
| 109 | /// let bcp47 = posix_to_bcp47("en-US" ); // already BCP 47 |
| 110 | /// assert_eq!(bcp47, "en-US" ); // no changes |
| 111 | /// |
| 112 | /// let bcp47 = posix_to_bcp47("en_US" ); |
| 113 | /// assert_eq!(bcp47, "en-US" ); |
| 114 | /// |
| 115 | /// let bcp47 = posix_to_bcp47("ru_RU.UTF-8" ); |
| 116 | /// assert_eq!(bcp47, "ru-RU" ); |
| 117 | /// |
| 118 | /// let bcp47 = posix_to_bcp47("fr_FR@dict" ); |
| 119 | /// assert_eq!(bcp47, "fr-FR" ); |
| 120 | /// |
| 121 | /// let bcp47 = posix_to_bcp47("de_DE.UTF-8@euro" ); |
| 122 | /// assert_eq!(bcp47, "de-DE" ); |
| 123 | /// ``` |
| 124 | /// |
| 125 | /// # TODO |
| 126 | /// |
| 127 | /// 1. Implement POSIX to BCP 47 modifier conversion (see https://github.com/1Password/sys-locale/issues/32). |
| 128 | /// 2. Optimize to avoid creating a new buffer (see https://github.com/1Password/sys-locale/pull/33). |
| 129 | fn posix_to_bcp47(locale: &str) -> String { |
| 130 | localeimpl Iterator |
| 131 | .chars() |
| 132 | .take_while(|&c: char| c != '.' && c != '@' ) |
| 133 | .map(|c: char| if c == '_' { '-' } else { c }) |
| 134 | .collect() |
| 135 | } |
| 136 | |
| 137 | #[cfg (test)] |
| 138 | mod tests { |
| 139 | use super::{EnvAccess, _get, posix_to_bcp47, LANG, LANGUAGE, LC_ALL, LC_MESSAGES}; |
| 140 | use std::{ |
| 141 | collections::HashMap, |
| 142 | ffi::{OsStr, OsString}, |
| 143 | }; |
| 144 | |
| 145 | type MockEnv = HashMap<OsString, String>; |
| 146 | impl EnvAccess for MockEnv { |
| 147 | fn get(&self, key: impl AsRef<OsStr>) -> Option<String> { |
| 148 | self.get(key.as_ref()).cloned() |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | const BCP_47: &str = "fr-FR" ; |
| 153 | const POSIX: &str = "fr_FR" ; |
| 154 | const POSIX_ENC: &str = "fr_FR.UTF-8" ; |
| 155 | const POSIX_MOD: &str = "fr_FR@euro" ; |
| 156 | const POSIX_ENC_MOD: &str = "fr_FR.UTF-8@euro" ; |
| 157 | |
| 158 | #[test ] |
| 159 | fn parse_identifier() { |
| 160 | assert_eq!(posix_to_bcp47(BCP_47), BCP_47); |
| 161 | assert_eq!(posix_to_bcp47(POSIX), BCP_47); |
| 162 | assert_eq!(posix_to_bcp47(POSIX_ENC), BCP_47); |
| 163 | assert_eq!(posix_to_bcp47(POSIX_MOD), BCP_47); |
| 164 | assert_eq!(posix_to_bcp47(POSIX_ENC_MOD), BCP_47); |
| 165 | } |
| 166 | |
| 167 | #[test ] |
| 168 | fn env_get() { |
| 169 | fn case( |
| 170 | env: &mut MockEnv, |
| 171 | language: impl Into<String>, |
| 172 | lc_all: impl Into<String>, |
| 173 | lc_messages: impl Into<String>, |
| 174 | lang: impl Into<String>, |
| 175 | expected: impl IntoIterator<Item = impl Into<String>>, |
| 176 | ) { |
| 177 | env.insert(LANGUAGE.into(), language.into()); |
| 178 | env.insert(LC_ALL.into(), lc_all.into()); |
| 179 | env.insert(LC_MESSAGES.into(), lc_messages.into()); |
| 180 | env.insert(LANG.into(), lang.into()); |
| 181 | assert!(_get(env).eq(expected.into_iter().map(|s| s.into()))); |
| 182 | } |
| 183 | |
| 184 | let mut env = MockEnv::new(); |
| 185 | assert_eq!(_get(&env).next(), None); |
| 186 | |
| 187 | // Empty |
| 188 | case(&mut env, "" , "" , "" , "" , &[] as &[String]); |
| 189 | |
| 190 | // Constants |
| 191 | case( |
| 192 | &mut env, |
| 193 | POSIX_ENC_MOD, |
| 194 | POSIX_ENC, |
| 195 | POSIX_MOD, |
| 196 | POSIX, |
| 197 | [BCP_47], |
| 198 | ); |
| 199 | |
| 200 | // Only one variable |
| 201 | case(&mut env, "en_US" , "" , "" , "" , ["en-US" ]); |
| 202 | case(&mut env, "" , "en_US" , "" , "" , ["en-US" ]); |
| 203 | case(&mut env, "" , "" , "en_US" , "" , ["en-US" ]); |
| 204 | case(&mut env, "" , "" , "" , "en_US" , ["en-US" ]); |
| 205 | |
| 206 | // Duplicates |
| 207 | case(&mut env, "en_US" , "en_US" , "en_US" , "en_US" , ["en-US" ]); |
| 208 | case( |
| 209 | &mut env, |
| 210 | "en_US" , |
| 211 | "en_US" , |
| 212 | "ru_RU" , |
| 213 | "en_US" , |
| 214 | ["en-US" , "ru-RU" ], |
| 215 | ); |
| 216 | case( |
| 217 | &mut env, |
| 218 | "en_US" , |
| 219 | "ru_RU" , |
| 220 | "ru_RU" , |
| 221 | "en_US" , |
| 222 | ["en-US" , "ru-RU" ], |
| 223 | ); |
| 224 | case( |
| 225 | &mut env, |
| 226 | "en_US" , |
| 227 | "es_ES" , |
| 228 | "ru_RU" , |
| 229 | "en_US" , |
| 230 | ["en-US" , "es-ES" , "ru-RU" ], |
| 231 | ); |
| 232 | case( |
| 233 | &mut env, |
| 234 | "en_US:ru_RU:es_ES:en_US" , |
| 235 | "es_ES" , |
| 236 | "ru_RU" , |
| 237 | "en_US" , |
| 238 | ["en-US" , "ru-RU" , "es-ES" ], |
| 239 | ); |
| 240 | |
| 241 | // Duplicates with different case |
| 242 | case( |
| 243 | &mut env, |
| 244 | "en_US:fr_fr" , |
| 245 | "EN_US" , |
| 246 | "fR_Fr" , |
| 247 | "En_US" , |
| 248 | ["en-US" , "fr-fr" , "EN-US" , "fR-Fr" , "En-US" ], |
| 249 | ); |
| 250 | |
| 251 | // More complicated cases |
| 252 | case( |
| 253 | &mut env, |
| 254 | "ru_RU:ru:en_US:en" , |
| 255 | "ru_RU.UTF-8" , |
| 256 | "ru_RU.UTF-8" , |
| 257 | "ru_RU.UTF-8" , |
| 258 | ["ru-RU" , "ru" , "en-US" , "en" ], |
| 259 | ); |
| 260 | case( |
| 261 | &mut env, |
| 262 | "fr_FR.UTF-8@euro:fr_FR.UTF-8:fr_FR:fr:en_US.UTF-8:en_US:en" , |
| 263 | "es_ES.UTF-8@euro" , |
| 264 | "fr_FR.UTF-8@euro" , |
| 265 | "fr_FR.UTF-8@euro" , |
| 266 | ["fr-FR" , "fr" , "en-US" , "en" , "es-ES" ], |
| 267 | ); |
| 268 | case( |
| 269 | &mut env, |
| 270 | "" , |
| 271 | "es_ES.UTF-8@euro" , |
| 272 | "fr_FR.UTF-8@euro" , |
| 273 | "fr_FR.UTF-8@euro" , |
| 274 | ["es-ES" , "fr-FR" ], |
| 275 | ); |
| 276 | case( |
| 277 | &mut env, |
| 278 | "fr_FR@euro" , |
| 279 | "fr_FR.UTF-8" , |
| 280 | "en_US.UTF-8" , |
| 281 | "en_US.UTF-8@dict" , |
| 282 | ["fr-FR" , "en-US" ], |
| 283 | ); |
| 284 | |
| 285 | // Already BCP 47 |
| 286 | case(&mut env, BCP_47, BCP_47, BCP_47, POSIX, [BCP_47]); |
| 287 | case( |
| 288 | &mut env, |
| 289 | "fr-FR" , |
| 290 | "es-ES" , |
| 291 | "de-DE" , |
| 292 | "en-US" , |
| 293 | ["fr-FR" , "es-ES" , "de-DE" , "en-US" ], |
| 294 | ); |
| 295 | } |
| 296 | } |
| 297 | |