1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Unicode Extensions provide information about user preferences in a given locale.
6//!
7//! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and
8//! [`Attributes`].
9//!
10//!
11//! # Examples
12//!
13//! ```
14//! use icu::locid::extensions::unicode::{attribute, key, value, Unicode};
15//! use icu::locid::Locale;
16//!
17//! let loc: Locale = "en-US-u-foobar-hc-h12".parse().expect("Parsing failed.");
18//!
19//! assert_eq!(
20//! loc.extensions.unicode.keywords.get(&key!("hc")),
21//! Some(&value!("h12"))
22//! );
23//! assert!(loc
24//! .extensions
25//! .unicode
26//! .attributes
27//! .contains(&attribute!("foobar")));
28//! ```
29mod attribute;
30mod attributes;
31mod key;
32mod keywords;
33mod value;
34
35#[doc(inline)]
36pub use attribute::{attribute, Attribute};
37pub use attributes::Attributes;
38#[doc(inline)]
39pub use key::{key, Key};
40pub use keywords::Keywords;
41#[doc(inline)]
42pub use value::{value, Value};
43
44use crate::helpers::ShortSlice;
45use crate::parser::ParserError;
46use crate::parser::SubtagIterator;
47use litemap::LiteMap;
48
49/// Unicode Extensions provide information about user preferences in a given locale.
50///
51/// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale
52/// Identifier`] specification.
53///
54/// Unicode extensions provide subtags that specify language and/or locale-based behavior
55/// or refinements to language tags, according to work done by the Unicode Consortium.
56/// (See [`RFC 6067`] for details).
57///
58/// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension
59/// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt
60/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
61///
62/// # Examples
63///
64/// ```
65/// use icu::locid::extensions::unicode::{key, value};
66/// use icu::locid::Locale;
67///
68/// let loc: Locale =
69/// "de-u-hc-h12-ca-buddhist".parse().expect("Parsing failed.");
70///
71/// assert_eq!(
72/// loc.extensions.unicode.keywords.get(&key!("ca")),
73/// Some(&value!("buddhist"))
74/// );
75/// ```
76#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
77#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
78pub struct Unicode {
79 /// The key-value pairs present in this locale extension, with each extension key subtag
80 /// associated to its provided value subtag.
81 pub keywords: Keywords,
82 /// A canonically ordered sequence of single standalone subtags for this locale extension.
83 pub attributes: Attributes,
84}
85
86impl Unicode {
87 /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`.
88 ///
89 /// # Examples
90 ///
91 /// ```
92 /// use icu::locid::extensions::unicode::Unicode;
93 ///
94 /// assert_eq!(Unicode::new(), Unicode::default());
95 /// ```
96 #[inline]
97 pub const fn new() -> Self {
98 Self {
99 keywords: Keywords::new(),
100 attributes: Attributes::new(),
101 }
102 }
103
104 /// Returns [`true`] if there list of keywords and attributes is empty.
105 ///
106 /// # Examples
107 ///
108 /// ```
109 /// use icu::locid::Locale;
110 ///
111 /// let loc: Locale = "en-US-u-foo".parse().expect("Parsing failed.");
112 ///
113 /// assert!(!loc.extensions.unicode.is_empty());
114 /// ```
115 pub fn is_empty(&self) -> bool {
116 self.keywords.is_empty() && self.attributes.is_empty()
117 }
118
119 /// Clears all Unicode extension keywords and attributes, effectively removing
120 /// the Unicode extension.
121 ///
122 /// # Example
123 ///
124 /// ```
125 /// use icu::locid::Locale;
126 ///
127 /// let mut loc: Locale =
128 /// "und-t-mul-u-hello-ca-buddhist-hc-h12".parse().unwrap();
129 /// loc.extensions.unicode.clear();
130 /// assert_eq!(loc, "und-t-mul".parse().unwrap());
131 /// ```
132 pub fn clear(&mut self) {
133 self.keywords.clear();
134 self.attributes.clear();
135 }
136
137 pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
138 let mut attributes = ShortSlice::new();
139
140 while let Some(subtag) = iter.peek() {
141 if let Ok(attr) = Attribute::try_from_bytes(subtag) {
142 if let Err(idx) = attributes.binary_search(&attr) {
143 attributes.insert(idx, attr);
144 }
145 } else {
146 break;
147 }
148 iter.next();
149 }
150
151 let mut keywords = LiteMap::new();
152
153 let mut current_keyword = None;
154 let mut current_value = ShortSlice::new();
155
156 while let Some(subtag) = iter.peek() {
157 let slen = subtag.len();
158 if slen == 2 {
159 if let Some(kw) = current_keyword.take() {
160 keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
161 current_value = ShortSlice::new();
162 }
163 current_keyword = Some(Key::try_from_bytes(subtag)?);
164 } else if current_keyword.is_some() {
165 match Value::parse_subtag(subtag) {
166 Ok(Some(t)) => current_value.push(t),
167 Ok(None) => {}
168 Err(_) => break,
169 }
170 } else {
171 break;
172 }
173 iter.next();
174 }
175
176 if let Some(kw) = current_keyword.take() {
177 keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
178 }
179
180 // Ensure we've defined at least one attribute or keyword
181 if attributes.is_empty() && keywords.is_empty() {
182 return Err(ParserError::InvalidExtension);
183 }
184
185 Ok(Self {
186 keywords: keywords.into(),
187 attributes: Attributes::from_short_slice_unchecked(attributes),
188 })
189 }
190
191 pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
192 where
193 F: FnMut(&str) -> Result<(), E>,
194 {
195 if self.is_empty() {
196 return Ok(());
197 }
198 f("u")?;
199 self.attributes.for_each_subtag_str(f)?;
200 self.keywords.for_each_subtag_str(f)?;
201 Ok(())
202 }
203}
204
205writeable::impl_display_with_writeable!(Unicode);
206
207impl writeable::Writeable for Unicode {
208 fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
209 if self.is_empty() {
210 return Ok(());
211 }
212 sink.write_str("u")?;
213 if !self.attributes.is_empty() {
214 sink.write_char('-')?;
215 writeable::Writeable::write_to(&self.attributes, sink)?;
216 }
217 if !self.keywords.is_empty() {
218 sink.write_char('-')?;
219 writeable::Writeable::write_to(&self.keywords, sink)?;
220 }
221 Ok(())
222 }
223
224 fn writeable_length_hint(&self) -> writeable::LengthHint {
225 if self.is_empty() {
226 return writeable::LengthHint::exact(0);
227 }
228 let mut result = writeable::LengthHint::exact(1);
229 if !self.attributes.is_empty() {
230 result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1;
231 }
232 if !self.keywords.is_empty() {
233 result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1;
234 }
235 result
236 }
237}
238