1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Documentation on zero-copy deserialization of locale types.
6//!
7//! [`Locale`] and [`LanguageIdentifier`] are highly structured types that cannot be directly
8//! stored in a zero-copy data structure, such as those provided by the [`zerovec`] crate.
9//! This page explains how to indirectly store these types in a [`zerovec`].
10//!
11//! There are two main use cases, which have different solutions:
12//!
13//! 1. **Lookup:** You need to locate a locale in a zero-copy vector, such as when querying a map.
14//! 2. **Obtain:** You have a locale stored in a zero-copy vector, and you need to obtain a proper
15//! [`Locale`] or [`LanguageIdentifier`] for use elsewhere in your program.
16//!
17//! # Lookup
18//!
19//! To perform lookup, store the stringified locale in a canonical BCP-47 form as a byte array,
20//! and then use [`Locale::strict_cmp()`] to perform an efficient, zero-allocation lookup.
21//!
22//! To produce more human-readable serialized output, you can use [`UnvalidatedStr`].
23//!
24//! ```
25//! use icu_locid::Locale;
26//! use zerovec::ule::UnvalidatedStr;
27//! use zerovec::ZeroMap;
28//!
29//! // ZeroMap from locales to integers
30//! let data: &[(&UnvalidatedStr, u32)] = &[
31//! ("de-DE-u-hc-h12".into(), 5),
32//! ("en-US-u-ca-buddhist".into(), 10),
33//! ("my-MM".into(), 15),
34//! ("sr-Cyrl-ME".into(), 20),
35//! ("zh-TW".into(), 25),
36//! ];
37//! let zm: ZeroMap<UnvalidatedStr, u32> = data.iter().copied().collect();
38//!
39//! // Get the value associated with a locale
40//! let loc: Locale = "en-US-u-ca-buddhist".parse().unwrap();
41//! let value = zm.get_copied_by(|uvstr| loc.strict_cmp(uvstr).reverse());
42//! assert_eq!(value, Some(10));
43//! ```
44//!
45//! # Obtain
46//!
47//! Obtaining a [`Locale`] or [`LanguageIdentifier`] is not generally a zero-copy operation, since
48//! both of these types may require memory allocation. If possible, architect your code such that
49//! you do not need to obtain a structured type.
50//!
51//! If you need the structured type, such as if you need to manipulate it in some way, there are two
52//! options: storing subtags, and storing a string for parsing.
53//!
54//! ## Storing Subtags
55//!
56//! If the data being stored only contains a limited number of subtags, you can store them as a
57//! tuple, and then construct the [`LanguageIdentifier`] externally.
58//!
59//! ```
60//! use icu_locid::subtags::{Language, Region, Script};
61//! use icu_locid::LanguageIdentifier;
62//! use icu_locid::{
63//! langid,
64//! subtags::{language, region, script},
65//! };
66//! use zerovec::ZeroMap;
67//!
68//! // ZeroMap from integer to LSR (language-script-region)
69//! let zm: ZeroMap<u32, (Language, Option<Script>, Option<Region>)> = [
70//! (5, (language!("de"), None, Some(region!("DE")))),
71//! (10, (language!("en"), None, Some(region!("US")))),
72//! (15, (language!("my"), None, Some(region!("MM")))),
73//! (
74//! 20,
75//! (language!("sr"), Some(script!("Cyrl")), Some(region!("ME"))),
76//! ),
77//! (25, (language!("zh"), None, Some(region!("TW")))),
78//! ]
79//! .into_iter()
80//! .collect();
81//!
82//! // Construct a LanguageIdentifier from a tuple entry
83//! let lid: LanguageIdentifier =
84//! zm.get_copied(&25).expect("element is present").into();
85//!
86//! assert_eq!(lid, langid!("zh-TW"));
87//! ```
88//!
89//! ## Storing Strings
90//!
91//! If it is necessary to store and obtain an arbitrary locale, it is currently recommended to
92//! store a BCP-47 string and parse it when needed.
93//!
94//! Since the string is stored in an unparsed state, it is not safe to `unwrap` the result from
95//! `Locale::try_from_bytes()`. See [icu4x#831](https://github.com/unicode-org/icu4x/issues/831)
96//! for a discussion on potential data models that could ensure that the locale is valid during
97//! deserialization.
98//!
99//! As above, to produce more human-readable serialized output, you can use [`UnvalidatedStr`].
100//!
101//! ```
102//! use icu_locid::langid;
103//! use icu_locid::Locale;
104//! use zerovec::ule::UnvalidatedStr;
105//! use zerovec::ZeroMap;
106//!
107//! // ZeroMap from integer to locale string
108//! let data: &[(u32, &UnvalidatedStr)] = &[
109//! (5, "de-DE-u-hc-h12".into()),
110//! (10, "en-US-u-ca-buddhist".into()),
111//! (15, "my-MM".into()),
112//! (20, "sr-Cyrl-ME".into()),
113//! (25, "zh-TW".into()),
114//! (30, "INVALID".into()),
115//! ];
116//! let zm: ZeroMap<u32, UnvalidatedStr> = data.iter().copied().collect();
117//!
118//! // Construct a Locale by parsing the string.
119//! let value = zm.get(&25).expect("element is present");
120//! let loc = Locale::try_from_bytes(value);
121//! assert_eq!(loc, Ok(langid!("zh-TW").into()));
122//!
123//! // Invalid entries are fallible
124//! let err_value = zm.get(&30).expect("element is present");
125//! let err_loc = Locale::try_from_bytes(err_value);
126//! assert!(matches!(err_loc, Err(_)));
127//! ```
128//!
129//! [`Locale`]: crate::Locale
130//! [`Locale::strict_cmp()`]: crate::Locale::strict_cmp()
131//! [`LanguageIdentifier`]: crate::LanguageIdentifier
132//! [`UnvalidatedStr`]: zerovec::ule::UnvalidatedStr
133