lib.rs source code [crates/locale_config/src/lib.rs]

1	//! Global locale instances and system inspection.
2	//!
3	//! This is an auxiliary crate for i18n solutions that:
4	//!
5	//! - Holds the appropriate default instances of locale.
6	//! - Inspects the system for the initial values.
7	//!
8	//! You don't want to use it directly, but instead use an internationalisation crate like [locale].
9	//!
10	//! This crate is separate and intentionally minimal so that multiple i18n crates or multiple
11	//! versions of one that get into the application still share the current locale setting.
12	//!
13	//! [locale]: https://crates.io/crates/locale
14
15	#[macro_use]
16	extern crate lazy_static;
17
18	extern crate regex;
19
20	#[cfg(target_os = "macos")]
21	#[macro_use]
22	extern crate objc;
23
24	use regex::Regex;
25	use std::borrow::{Borrow,Cow};
26	use std::cell::RefCell;
27	use std::convert::AsRef;
28	use std::fmt;
29	use std::sync::Mutex;
30
31	// ------------------------------ LANGUAGE RANGE ---------------------------------
32
33	/// Language and culture identifier.
34	///
35	/// This object holds a [RFC4647] extended language range.
36	///
37	/// The internal data may be owned or shared from object with lifetime `'a`. The lifetime can be
38	/// extended using the `into_static()` method, which internally clones the data as needed.
39	///
40	/// # Syntax
41	///
42	/// The range is composed of `-`-separated alphanumeric subtags, possibly replaced by ``s. It*
43	/// might be empty.
44	///
45	/// In agreement with [RFC4647], this object only requires that the tag matches:
46	///
47	/// ```ebnf
48	/// language_tag = (alpha{1,8} \| "")*
49	/// ("-" (alphanum{1,8} \| ""))
50	/// ```
51	///
52	/// The exact interpretation is up to the downstream localization provider, but it expected that
53	/// it will be matched against a normalized [RFC5646] language tag, which has the structure:
54	///
55	/// ```ebnf
56	/// language_tag = language
57	/// ("-" script)?
58	/// ("-" region)?
59	/// ("-" variant)*
60	/// ("-" extension)*
61	/// ("-" private)?
62	///
63	/// language = alpha{2,3} ("-" alpha{3}){0,3}
64	///
65	/// script = aplha{4}
66	///
67	/// region = alpha{2}
68	/// \| digit{3}
69	///
70	/// variant = alphanum{5,8}
71	/// \| digit alphanum{3}
72	///
73	/// extension = [0-9a-wyz] ("-" alphanum{2,8})+
74	///
75	/// private = "x" ("-" alphanum{1,8})+
76	/// ```
77	///
78	/// `language` is an [ISO639] 2-letter or, where not defined, 3-letter code. A code for*
79	/// macro-language might be followed by code of specific dialect.
80	/// `script` is an [ISO15924] 4-letter code.*
81	/// `region` is either an [ISO3166] 2-letter code or, for areas other than countries, [UN M.49]*
82	/// 3-digit numeric code.
83	/// `variant` is a string indicating variant of the language.*
84	/// `extension` and `private` define additional options. The private part has same structure as*
85	/// the Unicode [`-u-` extension][u_ext]. Available options are documented for the facets that
86	/// use them.
87	///
88	/// The values obtained by inspecting the system are normalized according to those rules.
89	///
90	/// The content will be case-normalized as recommended in [RFC5646] §2.1.1, namely:
91	///
92	/// `language` is written in lowercase,*
93	/// `script` is written with first capital,*
94	/// `country` is written in uppercase and*
95	/// all other subtags are written in lowercase.*
96	///
97	/// When detecting system configuration, additional options that may be generated under the
98	/// [`-u-` extension][u_ext] currently are:
99	///
100	/// `cf` — Currency format (`account` for parenthesized negative values, `standard` for minus*
101	/// sign).
102	/// `fw` — First day of week (`mon` to `sun`).*
103	/// `hc` — Hour cycle (`h12` for 1–12, `h23` for 0–23).*
104	/// `ms` — Measurement system (`metric` or `ussystem`).*
105	/// `nu` — Numbering system—only decimal systems are currently used.*
106	/// `va` — Variant when locale is specified in Unix format and the tag after `@` does not*
107	/// correspond to any variant defined in [Language subtag registry].
108	///
109	/// And under the `-x-` extension, following options are defined:
110	///
111	/// `df` — Date format:*
112	///
113	/// `iso`: Short date should be in ISO format of `yyyy-MM-dd`.*
114	///
115	/// For example `-df-iso`.
116	///
117	/// `dm` — Decimal separator for monetary:*
118	///
119	/// Followed by one or more Unicode codepoints in hexadecimal. For example `-dm-002d` means to
120	/// use comma.
121	///
122	/// `ds` — Decimal separator for numbers:*
123	///
124	/// Followed by one or more Unicode codepoints in hexadecimal. For example `-ds-002d` means to
125	/// use comma.
126	///
127	/// `gm` — Group (thousand) separator for monetary:*
128	///
129	/// Followed by one or more Unicode codepoints in hexadecimal. For example `-dm-00a0` means to
130	/// use non-breaking space.
131	///
132	/// `gs` — Group (thousand) separator for numbers:*
133	///
134	/// Followed by one or more Unicode codepoints in hexadecimal. For example `-ds-00a0` means to
135	/// use non-breaking space.
136	///
137	/// `ls` — List separator:*
138	///
139	/// Followed by one or more Unicode codepoints in hexadecimal. For example, `-ds-003b` means to
140	/// use a semicolon.
141	///
142	/// [RFC5646]: https://www.rfc-editor.org/rfc/rfc5646.txt
143	/// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt
144	/// [ISO639]: https://en.wikipedia.org/wiki/ISO_639
145	/// [ISO15924]: https://en.wikipedia.org/wiki/ISO_15924
146	/// [ISO3166]: https://en.wikipedia.org/wiki/ISO_3166
147	/// [UN M.49]: https://en.wikipedia.org/wiki/UN_M.49
148	/// [u_ext]: http://www.unicode.org/reports/tr35/#u_Extension
149	/// [Language subtag registry]: https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
150	#[derive(Clone,Debug,Eq,Hash,PartialEq)]
151	pub struct LanguageRange<'a> {
152	language: Cow<'a, str>
153	}
154
155	lazy_static! {
156	static ref REGULAR_LANGUAGE_RANGE_REGEX: Regex = Regex::new(r"(?x) ^
157	(?P<language> (?:
158	[[:alpha:]]{2,3} (?: - [[:alpha:]]{3} ){0,3}
159	\| \* ))
160	(?P<script> - (?: [[:alpha:]]{4} \| \* ))?
161	(?P<region> - (?: [[:alpha:]]{2} \| [[:digit:]]{3} \| \* ))?
162	(?P<rest> (?: - (?: [[:alnum:]]{1,8} \| \* ))*)
163	$ ").unwrap();
164	static ref LANGUAGE_RANGE_REGEX: Regex = Regex::new(r"(?x) ^
165	(?: [[:alpha:]]{1,8} \| \* )
166	(?: - (?: [[:alnum:]]{1,8} \| \* ))*
167	$ ").unwrap();
168	static ref UNIX_INVARIANT_REGEX: Regex = Regex::new(r"(?ix) ^
169	(?: c \| posix )
170	(?: \. (?: [0-9a-zA-Z-]{1,20} ))?
171	$ ").unwrap();
172	static ref UNIX_TAG_REGEX: Regex = Regex::new(r"(?ix) ^
173	(?P<language> [[:alpha:]]{2,3} )
174	(?: _ (?P<region> [[:alpha:]]{2} \| [[:digit:]]{3} ))?
175	(?: \. (?P<encoding> [0-9a-zA-Z-]{1,20} ))?
176	(?: @ (?P<variant> [[:alnum:]]{1,20} ))?
177	$ ").unwrap();
178	}
179
180	fn is_owned<'a, T: ToOwned + ?Sized>(c: &Cow<'a, T>) -> bool {
181	match *c {
182	Cow::Owned(_) => `true`,
183	Cow::Borrowed(_) => `false`,
184	}
185	}
186
187	fn canon_lower<'a>(o: Option<&'a str>) -> Cow<'a, str> {
188	match o {
189	None => Cow::Borrowed(""),
190	Some(s: &'a str) =>
191	if s.chars().any(char::is_uppercase) {
192	Cow::Owned(s.to_ascii_lowercase())
193	} else {
194	Cow::Borrowed(s)
195	},
196	}
197	}
198
199	fn canon_script<'a>(o: Option<&'a str>) -> Cow<'a, str> {
200	assert!(o.map_or(`true`, \|s\| s.len() >= `2` && &s[`0`..`1`] == "-"));
201	match o {
202	None => Cow::Borrowed(""),
203	Some(s: &'a str) =>
204	if s[`1`..`2`].chars().next().unwrap().is_uppercase() &&
205	s[`2`..].chars().all(char::is_lowercase) {
206	Cow::Borrowed(s)
207	} else {
208	Cow::Owned(String::from("-") +
209	s[`1`..`2`].to_ascii_uppercase().as_ref() +
210	s[`2`..].to_ascii_lowercase().as_ref())
211	},
212	}
213	}
214
215	fn canon_upper<'a>(o: Option<&'a str>) -> Cow<'a, str> {
216	assert!(o.map_or(`true`, \|s\| s.len() > `1` && &s[`0`..`1`] == "-"));
217	match o {
218	None => Cow::Borrowed(""),
219	Some(s: &'a str) =>
220	if s.chars().any(char::is_lowercase) {
221	Cow::Owned(s.to_ascii_uppercase())
222	} else {
223	Cow::Borrowed(s)
224	},
225	}
226	}
227
228	impl<'a> LanguageRange<'a> {
229	/// Construct LanguageRange from string, with normalization.
230	///
231	/// LanguageRange must follow the [RFC4647] syntax.
232	/// It will be case-normalized as recommended in [RFC5646] §2.1.1, namely:
233	///
234	/// `language`, if recognized, is written in lowercase,*
235	/// `script`, if recognized, is written with first capital,*
236	/// `country`, if recognized, is written in uppercase and*
237	/// all other subtags are written in lowercase.*
238	///
239	/// [RFC5646]: https://www.rfc-editor.org/rfc/rfc5646.txt
240	/// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt
241	pub fn new(lt: &'a str) -> Result<LanguageRange> {
242	if lt == "" {
243	return Ok(LanguageRange {
244	language: Cow::Borrowed(lt),
245	});
246	} else if let Some(caps) = REGULAR_LANGUAGE_RANGE_REGEX.captures(lt) {
247	let language = canon_lower(caps.name("language").map(\|m\| m.as_str()));
248	let script = canon_script(caps.name("script").map(\|m\| m.as_str()));
249	let region = canon_upper(caps.name("region").map(\|m\| m.as_str()));
250	let rest = canon_lower(caps.name("rest").map(\|m\| m.as_str()));
251	if is_owned(&language) \|\|
252	is_owned(&script) \|\|
253	is_owned(&region) \|\|
254	is_owned(&rest)
255	{
256	return Ok(LanguageRange {
257	language: Cow::Owned(
258	language.into_owned() +
259	script.borrow() +
260	region.borrow() +
261	rest.borrow()),
262	});
263	} else {
264	return Ok(LanguageRange {
265	language: Cow::Borrowed(lt),
266	});
267	}
268	} else if LANGUAGE_RANGE_REGEX.is_match(lt) {
269	return Ok(LanguageRange {
270	language: canon_lower(Some(lt)),
271	});
272	} else {
273	return Err(Error::NotWellFormed);
274	}
275	}
276
277	/// Return LanguageRange for the invariant locale.
278	///
279	/// Invariant language is identified simply by empty string.
280	pub fn invariant() -> LanguageRange<'static> {
281	LanguageRange { language: Cow::Borrowed("") }
282	}
283
284	/// Clone the internal data to extend lifetime.
285	pub fn into_static(self) -> LanguageRange<'static> {
286	LanguageRange {
287	language: Cow::Owned(self.language.into_owned())
288	}
289	}
290
291	/// Create new instance sharing the internal data.
292	pub fn to_shared(&'a self) -> Self {
293	LanguageRange {
294	language: Cow::Borrowed(self.language.borrow())
295	}
296	}
297
298	/// Create language tag from Unix/Linux/GNU locale tag.
299	///
300	/// Unix locale tags have the form
301	///
302	/// > language* [ `_` region ] [ `.` encoding ] [ `@` variant ]*
303	///
304	/// The language* and region have the same format as RFC5646. Encoding is not relevant*
305	/// here, since Rust always uses Utf-8. That leaves variant, which is unfortunately rather
306	/// free-form. So this function will translate known variants to corresponding RFC5646 subtags
307	/// and represent anything else with Unicode POSIX variant (`-u-va-`) extension.
308	///
309	/// Note: This function is public here for benefit of applications that may come across this
310	/// kind of tags from other sources than system configuration.
311	pub fn from_unix(s: &str) -> Result<LanguageRange<'static>> {
312	if let Some(caps) = UNIX_TAG_REGEX.captures(s) {
313	let src_variant = caps.name("variant").map(\|m\| m.as_str()).unwrap_or("").to_ascii_lowercase();
314	let mut res = caps.name("language").map(\|m\| m.as_str()).unwrap().to_ascii_lowercase();
315	let region = caps.name("region").map(\|m\| m.as_str()).unwrap_or("");
316	let mut script = "";
317	let mut variant = "";
318	let mut uvariant = "";
319	match src_variant.as_ref() {
320	// Variants seen in the wild in GNU LibC (via http://lh.2xlibre.net/) or in Debian
321	// GNU/Linux Stretch system. Treatment of things not found in RFC5646 subtag registry
322	// (http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry)
323	// or CLDR according to notes at https://wiki.openoffice.org/wiki/LocaleMapping.
324	// Dialects:
325	// aa_ER@saaho - NOTE: Can't be found under that name in RFC5646 subtag registry,
326	// but there is language Saho with code ssy, which is likely that thing.
327	"saaho" if res == "aa" => res = String::from("ssy"),
328	// Scripts:
329	// @arabic
330	"arabic" => script = "Arab",
331	// @cyrillic
332	"cyrl" => script = "Cyrl",
333	"cyrillic" => script = "Cyrl",
334	// @devanagari
335	"devanagari" => script = "Deva",
336	// @hebrew
337	"hebrew" => script = "Hebr",
338	// tt@iqtelif
339	// Neither RFC5646 subtag registry nor CLDR knows anything about this, but as best
340	// as I can tell it is Tatar name for Latin (default is Cyrillic).
341	"iqtelif" => script = "Latn",
342	// @Latn
343	"latn" => script = "Latn",
344	// @latin
345	"latin" => script = "Latn",
346	// en@shaw
347	"shaw" => script = "Shaw",
348	// Variants:
349	// sr@ijekavianlatin
350	"ijekavianlatin" => {
351	script = "Latn";
352	variant = "ijekavsk";
353	},
354	// sr@ije
355	"ije" => variant = "ijekavsk",
356	// sr@ijekavian
357	"ijekavian" => variant = "ijekavsk",
358	// ca@valencia
359	"valencia" => variant = "valencia",
360	// Currencies:
361	// @euro - NOTE: We follow suite of Java and Openoffice and ignore it, because it
362	// is default for all locales where it sometimes appears now, and because we use
363	// explicit currency in monetary formatting anyway.
364	"euro" => {},
365	// Collation:
366	// gez@abegede - NOTE: This is collation, but CLDR does not have any code for it,
367	// so we for the moment leave it fall through as -u-va- instead of -u-co-.
368	// Anything else:
369	// en@boldquot, en@quot, en@piglatin - just randomish stuff
370	// @cjknarrow - beware, it's gonna end up as -u-va-cjknarro due to lenght limit
371	s if s.len() <= `8` => uvariant = &*s,
372	s => uvariant = &s[`0`..`8`], // the subtags are limited to 8 chars, but some are longer
373	};
374	if script != "" {
375	res.push('-');
376	res.push_str(script);
377	}
378	if region != "" {
379	res.push('-');
380	res.push_str(&*region.to_ascii_uppercase());
381	}
382	if variant != "" {
383	res.push('-');
384	res.push_str(variant);
385	}
386	if uvariant != "" {
387	res.push_str("-u-va-");
388	res.push_str(uvariant);
389	}
390	return Ok(LanguageRange {
391	language: Cow::Owned(res)
392	});
393	} else if UNIX_INVARIANT_REGEX.is_match(s) {
394	return Ok(LanguageRange::invariant())
395	} else {
396	return Err(Error::NotWellFormed);
397	}
398	}
399	}
400
401	impl<'a> AsRef<str> for LanguageRange<'a> {
402	fn as_ref(&self) -> &str {
403	self.language.as_ref()
404	}
405	}
406
407	impl<'a> fmt::Display for LanguageRange<'a> {
408	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
409	self.language.fmt(f)
410	}
411	}
412
413	// -------------------------------- LOCALE -------------------------------------
414
415	/// Locale configuration.
416	///
417	/// Users may accept several languages in some order of preference and may want to use rules from
418	/// different culture for some particular aspect of the program behaviour, and operating systems
419	/// allow them to specify this (to various extent).
420	///
421	/// The `Locale` objects represent the user configuration. They contain:
422	///
423	/// - The primary `LanguageRange`.
424	/// - Optional category-specific overrides.
425	/// - Optional fallbacks in case data (usually translations) for the primary language are not
426	/// available.
427	///
428	/// The set of categories is open-ended. The `locale` crate uses five well-known categories
429	/// `messages`, `numeric`, `time`, `collate` and `monetary`, but some systems define additional
430	/// ones (GNU Linux has additionally `paper`, `name`, `address`, `telephone` and `measurement`) and
431	/// these are provided in the user default `Locale` and other libraries can use them.
432	///
433	/// `Locale` is represented by a `,`-separated sequence of tags in `LanguageRange` syntax, where
434	/// all except the first one may be preceded by category name and `=` sign.
435	///
436	/// The first tag indicates the default locale, the tags prefixed by category names indicate
437	/// _overrides_ for those categories and the remaining tags indicate fallbacks.
438	///
439	/// Note that a syntactically valid value of HTTP `Accept-Language` header is a valid `Locale`. Not
440	/// the other way around though due to the presence of category selectors.
441	// TODO: Interning
442	#[derive(Clone,Debug,Eq,Hash,PartialEq)]
443	pub struct Locale {
444	// TODO: Intern the string for performance reasons
445	// XXX: Store pre-split to LanguageTags?
446	inner: String,
447	}
448
449	lazy_static! {
450	static ref LOCALE_ELEMENT_REGEX: Regex = Regex::new(r"(?ix) ^
451	(?: (?P<category> [[:alpha:]]{1,20} ) = )?
452	(?P<tag> (?: [[:alnum:]] \| - \| \* )+ )
453	$ ").unwrap();
454	}
455
456	impl Locale {
457	/// Obtain the user default locale.
458	///
459	/// This is the locale indicated by operating environment.
460	pub fn user_default() -> Locale {
461	USER_LOCALE.clone()
462	}
463
464	/// Obtain the global default locale.
465	///
466	/// The global default for `current()` locale. Defaults to `user_default()`.
467	pub fn global_default() -> Locale {
468	GLOBAL_LOCALE.lock().unwrap().clone()
469	}
470
471	/// Change the global default locale.
472	///
473	/// Setting this overrides the default for new threads and threads that didn't do any
474	/// locale-aware operation yet.
475	pub fn set_global_default(lb: Locale) {
476	*GLOBAL_LOCALE.lock().unwrap() = lb;
477	}
478
479	/// Obtain the current locale of current thread.
480	///
481	/// Defaults to `global_default()` on first use in each thread.
482	pub fn current() -> Locale {
483	CURRENT_LOCALE.with(\|l\| l.borrow().clone())
484	}
485
486	/// Change the current locale of current thread.
487	pub fn set_current(lb: Locale) {
488	CURRENT_LOCALE.with(\|l\| *l.borrow_mut() = lb);
489	}
490
491	/// Construct locale from the string representation.
492	///
493	/// `Locale` is represented by a `,`-separated sequence of tags in `LanguageRange` syntax, where
494	/// all except the first one may be preceded by category name and `=` sign.
495	///
496	/// The first tag indicates the default locale, the tags prefixed by category names indicate
497	/// _overrides_ for those categories and the remaining tags indicate fallbacks.
498	pub fn new(s: &str) -> Result<Locale> {
499	let mut i = s.split(',');
500	let mut res = Locale::from(
501	try!(LanguageRange::new(
502	i.next().unwrap()))); // NOTE: split "" is (""), not ()
503	for t in i {
504	if let Some(caps) = LOCALE_ELEMENT_REGEX.captures(t) {
505	let tag = try!(LanguageRange::new(
506	try!(caps.name("tag").map(\|m\| m.as_str()).ok_or(Error::NotWellFormed))));
507	match caps.name("category").map(\|m\| m.as_str()) {
508	Some(cat) => res.add_category(cat.to_ascii_lowercase().as_ref(), &tag),
509	None => res.add(&tag),
510	}
511	} else {
512	return Err(Error::NotWellFormed);
513	}
514	}
515	return Ok(res);
516	}
517
518	/// Construct invariant locale.
519	///
520	/// Invariant locale is represented simply with empty string.
521	pub fn invariant() -> Locale {
522	Locale::from(LanguageRange::invariant())
523	}
524
525	/// Append fallback language tag.
526	///
527	/// Adds fallback to the end of the list.
528	pub fn add(&mut self, tag: &LanguageRange) {
529	for i in self.inner.split(',') {
530	if i == tag.as_ref() {
531	return; // don't add duplicates
532	}
533	}
534	self.inner.push_str(",");
535	self.inner.push_str(tag.as_ref());
536	}
537
538	/// Append category override.
539	///
540	/// Appending new override for a category that already has one will not replace the existing
541	/// override. This might change in future.
542	pub fn add_category(&mut self, category: &str, tag: &LanguageRange) {
543	if self.inner.split(',').next().unwrap() == tag.as_ref() {
544	return; // don't add useless override equal to the primary tag
545	}
546	for i in self.inner.split(',') {
547	if i.starts_with(category) &&
548	i[category.len()..].starts_with("=") &&
549	&i[category.len() + `1`..] == tag.as_ref() {
550	return; // don't add duplicates
551	}
552	}
553	self.inner.push_str(",");
554	self.inner.push_str(category);
555	self.inner.push_str("=");
556	self.inner.push_str(tag.as_ref());
557	}
558
559	/// Iterate over `LanguageRange`s in this `Locale`.
560	///
561	/// Returns tuples of optional category (as string) and corresponding `LanguageRange`. All tags
562	/// in the list are returned, in order of preference.
563	///
564	/// The iterator is guaranteed to return at least one value.
565	pub fn tags<'a>(&'a self) -> Tags<'a> {
566	Tags { tags: self.inner.split(","), }
567	}
568
569	/// Iterate over `LanguageRange`s in this `Locale` applicable to given category.
570	///
571	/// Returns `LanguageRange`s in the `Locale` that are applicable to provided category. The tags
572	/// are returned in order of preference, which means the category-specific ones first and then
573	/// the generic ones.
574	///
575	/// The iterator is guaranteed to return at least one value.
576	pub fn tags_for<'a, 'c>(&'a self, category: &'c str) -> TagsFor<'a, 'c> {
577	let mut tags = self.inner.split(",");
578	while let Some(s) = tags.clone().next() {
579	if s.starts_with(category) && s[category.len()..].starts_with("=") {
580	return TagsFor {
581	src: self.inner.as_ref(),
582	tags: tags,
583	category: Some(category),
584	};
585	}
586	tags.next();
587	}
588	return TagsFor {
589	src: self.inner.as_ref(),
590	tags: self.inner.split(","),
591	category: None,
592	};
593	}
594	}
595
596	/// Locale is specified by a string tag. This is the way to access it.
597	// FIXME: Do we want to provide the full string representation? We would have it as single string
598	// then.
599	impl AsRef<str> for Locale {
600	fn as_ref(&self) -> &str {
601	self.inner.as_ref()
602	}
603	}
604
605	impl<'a> From<LanguageRange<'a>> for Locale {
606	fn from(t: LanguageRange<'a>) -> Locale {
607	Locale {
608	inner: t.language.into_owned(),
609	}
610	}
611	}
612
613	impl fmt::Display for Locale {
614	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
615	self.inner.fmt(f)
616	}
617	}
618
619	/// Iterator over `LanguageRange`s for all categories in a `Locale`
620	///
621	/// Returns tuples of optional category (as string) and corresponding `LanguageRange`. All tags
622	/// in the list are returned, in order of preference.
623	///
624	/// The iterator is guaranteed to return at least one value.
625	pub struct Tags<'a> {
626	tags: std::str::Split<'a, &'static str>,
627	}
628
629	impl<'a> Iterator for Tags<'a> {
630	type Item = (Option<&'a str>, LanguageRange<'a>);
631	fn next(&mut self) -> Option<Self::Item> {
632	if let Some(s: &'a str) = self.tags.next() {
633	if let Some(i: usize) = s.find('=') {
634	return Some((
635	Some(&s[..i]),
636	LanguageRange { language: Cow::Borrowed(&s[i+`1`..]), }));
637	} else {
638	return Some((
639	None,
640	LanguageRange { language: Cow::Borrowed(s), }));
641	}
642	} else {
643	return None;
644	}
645	}
646	}
647
648	/// Iterator over `LanguageRange`s for specific category in a `Locale`
649	///
650	/// Returns `LanguageRange`s in the `Locale` that are applicable to provided category. The tags
651	/// are returned in order of preference, which means the category-specific ones first and then
652	/// the generic ones.
653	///
654	/// The iterator is guaranteed to return at least one value.
655	pub struct TagsFor<'a, 'c> {
656	src: &'a str,
657	tags: std::str::Split<'a, &'static str>,
658	category: Option<&'c str>,
659	}
660
661	impl<'a, 'c> Iterator for TagsFor<'a, 'c> {
662	type Item = LanguageRange<'a>;
663	fn next(&mut self) -> Option<Self::Item> {
664	if let Some(cat: &'c str) = self.category {
665	while let Some(s: &'a str) = self.tags.next() {
666	if s.starts_with(cat) && s[cat.len()..].starts_with("=") {
667	return Some(
668	LanguageRange { language: Cow::Borrowed(&s[cat.len()+`1`..]) });
669	}
670	}
671	self.category = None;
672	self.tags = self.src.split(",");
673	}
674	while let Some(s: &'a str) = self.tags.next() {
675	if s.find('=').is_none() {
676	return Some(
677	LanguageRange{ language: Cow::Borrowed(s) });
678	}
679	}
680	return None;
681	}
682	}
683
684	// ------------------------------- INSTANCES -----------------------------------
685
686	// TODO: We only need this until either std::sync::StaticMutex or std::sync::Mutex becomes usable
687	// with normal `static`.
688	// FIX-THE-TODO: Do we? A mutex might be usable, but we still need to initialize the value inside
689	// on first access!
690	lazy_static! {
691	// TODO: Implement the constructor
692	static ref USER_LOCALE: Locale = system_locale();
693	static ref GLOBAL_LOCALE: Mutex<Locale> = Mutex::new(Locale::user_default());
694	}
695
696	thread_local!(
697	static CURRENT_LOCALE: RefCell<Locale> = RefCell::new(Locale::global_default())
698	);
699
700	// NOTE: Cgi-style environment variable HTTP_ACCEPT_LANGUAGE is unlikely to be defined at any other
701	// time than when actually executing in CGI, so we can relatively safely always interpret it.
702	mod cgi;
703
704	// NOTE: Unix-style environment variables are actually inspected everywhere, because many users
705	// have them, because some software only uses those even on Windows and other systems.
706	mod unix;
707
708	// NOTE: Functions used exist from Vista on only
709	#[cfg(target_family = "windows")]
710	mod win32;
711
712	// Emscripten support
713	#[cfg(target_os = "emscripten")]
714	mod emscripten;
715
716	// macOS support
717	#[cfg(target_os = "macos")]
718	mod macos;
719
720	static INITIALISERS: &'static [fn() -> Option<Locale>] = &[
721	cgi::system_locale,
722	unix::system_locale,
723	#[cfg(target_family = "windows")] win32::system_locale,
724	#[cfg(target_os = "emscripten")] emscripten::system_locale,
725	#[cfg(target_os = "macos")] macos::system_locale,
726	];
727
728	fn system_locale() -> Locale {
729	for f: &'static fn() -> Option in INITIALISERS {
730	if let Some(l: Locale) = f() {
731	return l;
732	}
733	}
734	return Locale::invariant();
735	}
736
737	// --------------------------------- ERRORS ------------------------------------
738
739	/// Errors that may be returned by `locale_config`.
740	#[derive(Copy,Clone,Debug,PartialEq,Eq)]
741	pub enum Error {
742	/// Provided definition was not well formed.
743	///
744	/// This is returned when provided configuration string does not match even the rather loose
745	/// definition for language range from [RFC4647] or the composition format used by `Locale`.
746	///
747	/// [RFC4647]: https://www.rfc-editor.org/rfc/rfc4647.txt
748	NotWellFormed,
749	/// Placeholder for adding more errors in future. Do not match!.
750	__NonExhaustive,
751	}
752
753	impl ::std::fmt::Display for Error {
754	fn fmt(&self, out: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
755	use ::std::error::Error;
756	out.write_str(self.description())
757	}
758	}
759
760	impl ::std::error::Error for Error {
761	fn description(&self) -> &str {
762	match self {
763	&Error::NotWellFormed => "Language tag is not well-formed.",
764	// this is exception: here we do want exhaustive match so we don't publish version with
765	// missing descriptions by mistake.
766	&Error::__NonExhaustive => panic!("Placeholder error must not be instantiated!"),
767	}
768	}
769	}
770
771	/// Convenience Result alias.
772	type Result<T> = ::std::result::Result<T, Error>;
773
774	// --------------------------------- TESTS -------------------------------------
775
776	#[cfg(test)]
777	mod test {
778	use super::LanguageRange;
779	use super::Locale;
780	use super::is_owned;
781	use std::iter::FromIterator;
782
783	#[test]
784	fn simple_valid_lang_ranges() {
785	assert_eq!("en-US", LanguageRange::new("en-US").unwrap().as_ref());
786	assert_eq!("en-US", LanguageRange::new("EN-US").unwrap().as_ref());
787	assert_eq!("en", LanguageRange::new("en").unwrap().as_ref());
788	assert_eq!("eng-Latn-840", LanguageRange::new("eng-Latn-840").unwrap().as_ref());
789	assert_eq!("english", LanguageRange::new("English").unwrap().as_ref());
790	}
791
792	#[test]
793	fn wildcard_lang_ranges() {
794	assert_eq!("", LanguageRange::new("").unwrap().as_ref());
795	assert_eq!("zh-", LanguageRange::new("zh-").unwrap().as_ref());
796	assert_eq!("zh--CN", LanguageRange::new("zh--cn").unwrap().as_ref());
797	assert_eq!("en--simple-", LanguageRange::new("En--Simple-").unwrap().as_ref());
798	assert_eq!("zh-Hans-", LanguageRange::new("zh-hans-").unwrap().as_ref());
799	}
800
801	#[test]
802	fn complex_valid_lang_ranges() {
803	assert_eq!("de-DE-u-email-co-phonebk-x-linux",
804	LanguageRange::new("de-DE-u-email-co-phonebk-x-linux").unwrap().as_ref());
805	assert_eq!("vi-VN-u-fw-mon-hc-h24-ms-metric",
806	LanguageRange::new("vi-vn-u-fw-mon-hc-h24-ms-metric").unwrap().as_ref());
807	assert_eq!("sl-Cyrl-YU-rozaj-solba-1994-b-1234-a-foobar-x-b-1234-a-foobar",
808	LanguageRange::new("sl-Cyrl-YU-rozaj-solba-1994-b-1234-a-Foobar-x-b-1234-a-Foobar").unwrap().as_ref());
809	}
810
811	#[test]
812	fn invalid_lang_range_invalid_char() {
813	assert!(LanguageRange::new("not a range").is_err());
814	}
815
816	#[test]
817	fn invalid_lang_range_long_element() {
818	assert!(LanguageRange::new("de-DE-u-email-co-phonebook-x-linux").is_err());
819	}
820
821	#[test]
822	fn invalid_lang_range_leading_number() {
823	assert!(LanguageRange::new("840").is_err());
824	}
825
826	#[test]
827	fn invalid_lang_range_bad_asterisk() {
828	assert!(LanguageRange::new("e*-US").is_err());
829	assert!(LanguageRange::new("en-*s").is_err());
830	}
831
832	#[test]
833	fn normal_lang_range() {
834	// Check that the string is not copied if the tag is canonical
835	assert!(!is_owned(&LanguageRange::new("en-US").unwrap().language));
836	assert!(!is_owned(&LanguageRange::new("en").unwrap().language));
837	assert!(!is_owned(&LanguageRange::new("zh-Hant-CN").unwrap().language));
838	assert!(!is_owned(&LanguageRange::new("cs-CZ-x-ds-002e").unwrap().language));
839	assert!(!is_owned(&LanguageRange::new("czech").unwrap().language));
840	}
841
842	#[test]
843	fn locale_simple() {
844	assert_eq!("en-US", Locale::new("en-US").unwrap().as_ref());
845	assert_eq!("zh-Hant", Locale::new("zh-hant").unwrap().as_ref());
846	assert_eq!("de-", Locale::new("de-").unwrap().as_ref());
847	assert!(Locale::new("invalid!").is_err());
848	assert!(Locale::new("hı-İN").is_err());
849	}
850
851	#[test]
852	fn locale_list() {
853	assert_eq!("cs-CZ,en-GB,en,", Locale::new("cs-cz,en-gb,en,").unwrap().as_ref());
854	assert_eq!("cs-CZ,engrish", Locale::new("cs-cz,engrish").unwrap().as_ref());
855	assert!(Locale::new("cs-cz,hı-İN").is_err());
856	}
857
858	#[test]
859	fn locale_category() {
860	assert_eq!("cs-CZ,messages=en-GB",
861	Locale::new("cs-CZ,messages=en-GB").unwrap().as_ref());
862	assert_eq!("zh-Hant,time=ja-JP,measurement=en-US",
863	Locale::new("zh-hant,TIME=ja-jp,meaSURement=en-US").unwrap().as_ref());
864	// the first item must be plain language tag
865	assert!(Locale::new("messages=pl").is_err());
866	// adding general alternate should not help
867	assert!(Locale::new("numeric=de,fr-FR").is_err());
868	}
869
870	#[test]
871	fn locale_dups() {
872	assert_eq!("cs-CZ,en,de-AT", Locale::new("cs-CZ,en,de-AT,en").unwrap().as_ref());
873	assert_eq!("en-US,en", Locale::new("en-us,en-US,EN,eN-Us,en").unwrap().as_ref());
874	}
875
876	#[test]
877	fn locale_category_dups() {
878	assert_eq!("cs-CZ",
879	Locale::new("cs-CZ,messages=cs-CZ,time=cs-cz,collate=CS-cz").unwrap().as_ref());
880	assert_eq!("de-AT,en-AU",
881	Locale::new("de-AT,en-AU,messages=de-AT").unwrap().as_ref());
882	// category overrides override, so don't drop if they are only equal to alternates
883	assert_eq!("de-AT,en-AU,messages=en-AU",
884	Locale::new("de-AT,en-AU,messages=en-AU").unwrap().as_ref());
885	assert_eq!("hi-IN,time=en-IN",
886	Locale::new("hi-IN,time=en-IN,TIME=EN-in,TiMe=En-iN").unwrap().as_ref());
887	}
888
889	#[test]
890	fn unix_tags() {
891	assert_eq!("cs-CZ", LanguageRange::from_unix("cs_CZ.UTF-8").unwrap().as_ref());
892	assert_eq!("sr-RS-ijekavsk", LanguageRange::from_unix("sr_RS@ijekavian").unwrap().as_ref());
893	assert_eq!("sr-Latn-ijekavsk", LanguageRange::from_unix("sr.UTF-8@ijekavianlatin").unwrap().as_ref());
894	assert_eq!("en-Arab", LanguageRange::from_unix("en@arabic").unwrap().as_ref());
895	assert_eq!("en-Arab", LanguageRange::from_unix("en.UTF-8@arabic").unwrap().as_ref());
896	assert_eq!("de-DE", LanguageRange::from_unix("DE_de.UTF-8@euro").unwrap().as_ref());
897	assert_eq!("ssy-ER", LanguageRange::from_unix("aa_ER@saaho").unwrap().as_ref());
898	assert!(LanguageRange::from_unix("foo_BAR").is_err());
899	assert!(LanguageRange::from_unix("en@arabic.UTF-8").is_err());
900	assert_eq!("", LanguageRange::from_unix("C").unwrap().as_ref());
901	assert_eq!("", LanguageRange::from_unix("C.UTF-8").unwrap().as_ref());
902	assert_eq!("", LanguageRange::from_unix("C.ISO-8859-1").unwrap().as_ref());
903	assert_eq!("", LanguageRange::from_unix("POSIX").unwrap().as_ref());
904	}
905
906	#[test]
907	fn category_tag_list() {
908	assert_eq!(
909	Vec::from_iter(Locale::new("cs-CZ,messages=en-GB,time=de-DE,collate=en-US").unwrap().tags()),
910	&[(None, LanguageRange::new("cs-CZ").unwrap()),
911	(Some("messages"), LanguageRange::new("en-GB").unwrap()),
912	(Some("time"), LanguageRange::new("de-DE").unwrap()),
913	(Some("collate"), LanguageRange::new("en-US").unwrap()),
914	]);
915	}
916
917	#[test]
918	fn tag_list_for() {
919	let locale = Locale::new("cs-CZ,messages=en-GB,time=de-DE,sk-SK,pl-PL").unwrap();
920	assert_eq!(
921	Vec::from_iter(locale.tags_for("messages")),
922	&[LanguageRange::new("en-GB").unwrap(),
923	LanguageRange::new("cs-CZ").unwrap(),
924	LanguageRange::new("sk-SK").unwrap(),
925	LanguageRange::new("pl-PL").unwrap(),
926	]);
927	assert_eq!(
928	Vec::from_iter(locale.tags_for("time")),
929	&[LanguageRange::new("de-DE").unwrap(),
930	LanguageRange::new("cs-CZ").unwrap(),
931	LanguageRange::new("sk-SK").unwrap(),
932	LanguageRange::new("pl-PL").unwrap(),
933	]);
934	assert_eq!(
935	Vec::from_iter(locale.tags_for("measurement")),
936	&[LanguageRange::new("cs-CZ").unwrap(),
937	LanguageRange::new("sk-SK").unwrap(),
938	LanguageRange::new("pl-PL").unwrap(),
939	]);
940	}
941	}
942