| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | //! Unicode Extensions provide information about user preferences in a given locale. |
| 6 | //! |
| 7 | //! The main struct for this extension is [`Unicode`] which contains [`Keywords`] and |
| 8 | //! [`Attributes`]. |
| 9 | //! |
| 10 | //! |
| 11 | //! # Examples |
| 12 | //! |
| 13 | //! ``` |
| 14 | //! use icu::locid::extensions::unicode::{attribute, key, value, Unicode}; |
| 15 | //! use icu::locid::Locale; |
| 16 | //! |
| 17 | //! let loc: Locale = "en-US-u-foobar-hc-h12" .parse().expect("Parsing failed." ); |
| 18 | //! |
| 19 | //! assert_eq!( |
| 20 | //! loc.extensions.unicode.keywords.get(&key!("hc" )), |
| 21 | //! Some(&value!("h12" )) |
| 22 | //! ); |
| 23 | //! assert!(loc |
| 24 | //! .extensions |
| 25 | //! .unicode |
| 26 | //! .attributes |
| 27 | //! .contains(&attribute!("foobar" ))); |
| 28 | //! ``` |
| 29 | mod attribute; |
| 30 | mod attributes; |
| 31 | mod key; |
| 32 | mod keywords; |
| 33 | mod value; |
| 34 | |
| 35 | use core::cmp::Ordering; |
| 36 | |
| 37 | #[doc (inline)] |
| 38 | pub use attribute::{attribute, Attribute}; |
| 39 | pub use attributes::Attributes; |
| 40 | #[doc (inline)] |
| 41 | pub use key::{key, Key}; |
| 42 | pub use keywords::Keywords; |
| 43 | #[doc (inline)] |
| 44 | pub use value::{value, Value}; |
| 45 | |
| 46 | use crate::parser::ParserError; |
| 47 | use crate::parser::SubtagIterator; |
| 48 | use crate::shortvec::ShortBoxSlice; |
| 49 | use litemap::LiteMap; |
| 50 | |
| 51 | /// Unicode Extensions provide information about user preferences in a given locale. |
| 52 | /// |
| 53 | /// A list of [`Unicode BCP47 U Extensions`] as defined in [`Unicode Locale |
| 54 | /// Identifier`] specification. |
| 55 | /// |
| 56 | /// Unicode extensions provide subtags that specify language and/or locale-based behavior |
| 57 | /// or refinements to language tags, according to work done by the Unicode Consortium. |
| 58 | /// (See [`RFC 6067`] for details). |
| 59 | /// |
| 60 | /// [`Unicode BCP47 U Extensions`]: https://unicode.org/reports/tr35/#u_Extension |
| 61 | /// [`RFC 6067`]: https://www.ietf.org/rfc/rfc6067.txt |
| 62 | /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier |
| 63 | /// |
| 64 | /// # Examples |
| 65 | /// |
| 66 | /// ``` |
| 67 | /// use icu::locid::extensions::unicode::{key, value}; |
| 68 | /// use icu::locid::Locale; |
| 69 | /// |
| 70 | /// let loc: Locale = |
| 71 | /// "de-u-hc-h12-ca-buddhist" .parse().expect("Parsing failed." ); |
| 72 | /// |
| 73 | /// assert_eq!( |
| 74 | /// loc.extensions.unicode.keywords.get(&key!("ca" )), |
| 75 | /// Some(&value!("buddhist" )) |
| 76 | /// ); |
| 77 | /// ``` |
| 78 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)] |
| 79 | #[allow (clippy::exhaustive_structs)] // spec-backed stable datastructure |
| 80 | pub struct Unicode { |
| 81 | /// The key-value pairs present in this locale extension, with each extension key subtag |
| 82 | /// associated to its provided value subtag. |
| 83 | pub keywords: Keywords, |
| 84 | /// A canonically ordered sequence of single standalone subtags for this locale extension. |
| 85 | pub attributes: Attributes, |
| 86 | } |
| 87 | |
| 88 | impl Unicode { |
| 89 | /// Returns a new empty map of Unicode extensions. Same as [`default()`](Default::default()), but is `const`. |
| 90 | /// |
| 91 | /// # Examples |
| 92 | /// |
| 93 | /// ``` |
| 94 | /// use icu::locid::extensions::unicode::Unicode; |
| 95 | /// |
| 96 | /// assert_eq!(Unicode::new(), Unicode::default()); |
| 97 | /// ``` |
| 98 | #[inline ] |
| 99 | pub const fn new() -> Self { |
| 100 | Self { |
| 101 | keywords: Keywords::new(), |
| 102 | attributes: Attributes::new(), |
| 103 | } |
| 104 | } |
| 105 | |
| 106 | /// Returns [`true`] if there list of keywords and attributes is empty. |
| 107 | /// |
| 108 | /// # Examples |
| 109 | /// |
| 110 | /// ``` |
| 111 | /// use icu::locid::Locale; |
| 112 | /// |
| 113 | /// let loc: Locale = "en-US-u-foo" .parse().expect("Parsing failed." ); |
| 114 | /// |
| 115 | /// assert!(!loc.extensions.unicode.is_empty()); |
| 116 | /// ``` |
| 117 | pub fn is_empty(&self) -> bool { |
| 118 | self.keywords.is_empty() && self.attributes.is_empty() |
| 119 | } |
| 120 | |
| 121 | /// Clears all Unicode extension keywords and attributes, effectively removing |
| 122 | /// the Unicode extension. |
| 123 | /// |
| 124 | /// # Example |
| 125 | /// |
| 126 | /// ``` |
| 127 | /// use icu::locid::Locale; |
| 128 | /// |
| 129 | /// let mut loc: Locale = |
| 130 | /// "und-t-mul-u-hello-ca-buddhist-hc-h12" .parse().unwrap(); |
| 131 | /// loc.extensions.unicode.clear(); |
| 132 | /// assert_eq!(loc, "und-t-mul" .parse().unwrap()); |
| 133 | /// ``` |
| 134 | pub fn clear(&mut self) { |
| 135 | self.keywords.clear(); |
| 136 | self.attributes.clear(); |
| 137 | } |
| 138 | |
| 139 | pub(crate) fn as_tuple(&self) -> (&Attributes, &Keywords) { |
| 140 | (&self.attributes, &self.keywords) |
| 141 | } |
| 142 | |
| 143 | /// Returns an ordering suitable for use in [`BTreeSet`]. |
| 144 | /// |
| 145 | /// The ordering may or may not be equivalent to string ordering, and it |
| 146 | /// may or may not be stable across ICU4X releases. |
| 147 | /// |
| 148 | /// [`BTreeSet`]: alloc::collections::BTreeSet |
| 149 | pub fn total_cmp(&self, other: &Self) -> Ordering { |
| 150 | self.as_tuple().cmp(&other.as_tuple()) |
| 151 | } |
| 152 | |
| 153 | pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { |
| 154 | let mut attributes = ShortBoxSlice::new(); |
| 155 | |
| 156 | while let Some(subtag) = iter.peek() { |
| 157 | if let Ok(attr) = Attribute::try_from_bytes(subtag) { |
| 158 | if let Err(idx) = attributes.binary_search(&attr) { |
| 159 | attributes.insert(idx, attr); |
| 160 | } |
| 161 | } else { |
| 162 | break; |
| 163 | } |
| 164 | iter.next(); |
| 165 | } |
| 166 | |
| 167 | let mut keywords = LiteMap::new(); |
| 168 | |
| 169 | let mut current_keyword = None; |
| 170 | let mut current_value = ShortBoxSlice::new(); |
| 171 | |
| 172 | while let Some(subtag) = iter.peek() { |
| 173 | let slen = subtag.len(); |
| 174 | if slen == 2 { |
| 175 | if let Some(kw) = current_keyword.take() { |
| 176 | keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); |
| 177 | current_value = ShortBoxSlice::new(); |
| 178 | } |
| 179 | current_keyword = Some(Key::try_from_bytes(subtag)?); |
| 180 | } else if current_keyword.is_some() { |
| 181 | match Value::parse_subtag(subtag) { |
| 182 | Ok(Some(t)) => current_value.push(t), |
| 183 | Ok(None) => {} |
| 184 | Err(_) => break, |
| 185 | } |
| 186 | } else { |
| 187 | break; |
| 188 | } |
| 189 | iter.next(); |
| 190 | } |
| 191 | |
| 192 | if let Some(kw) = current_keyword.take() { |
| 193 | keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); |
| 194 | } |
| 195 | |
| 196 | // Ensure we've defined at least one attribute or keyword |
| 197 | if attributes.is_empty() && keywords.is_empty() { |
| 198 | return Err(ParserError::InvalidExtension); |
| 199 | } |
| 200 | |
| 201 | Ok(Self { |
| 202 | keywords: keywords.into(), |
| 203 | attributes: Attributes::from_short_slice_unchecked(attributes), |
| 204 | }) |
| 205 | } |
| 206 | |
| 207 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
| 208 | where |
| 209 | F: FnMut(&str) -> Result<(), E>, |
| 210 | { |
| 211 | if self.is_empty() { |
| 212 | return Ok(()); |
| 213 | } |
| 214 | f("u" )?; |
| 215 | self.attributes.for_each_subtag_str(f)?; |
| 216 | self.keywords.for_each_subtag_str(f)?; |
| 217 | Ok(()) |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | writeable::impl_display_with_writeable!(Unicode); |
| 222 | |
| 223 | impl writeable::Writeable for Unicode { |
| 224 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
| 225 | if self.is_empty() { |
| 226 | return Ok(()); |
| 227 | } |
| 228 | sink.write_str("u" )?; |
| 229 | if !self.attributes.is_empty() { |
| 230 | sink.write_char('-' )?; |
| 231 | writeable::Writeable::write_to(&self.attributes, sink)?; |
| 232 | } |
| 233 | if !self.keywords.is_empty() { |
| 234 | sink.write_char('-' )?; |
| 235 | writeable::Writeable::write_to(&self.keywords, sink)?; |
| 236 | } |
| 237 | Ok(()) |
| 238 | } |
| 239 | |
| 240 | fn writeable_length_hint(&self) -> writeable::LengthHint { |
| 241 | if self.is_empty() { |
| 242 | return writeable::LengthHint::exact(0); |
| 243 | } |
| 244 | let mut result = writeable::LengthHint::exact(1); |
| 245 | if !self.attributes.is_empty() { |
| 246 | result += writeable::Writeable::writeable_length_hint(&self.attributes) + 1; |
| 247 | } |
| 248 | if !self.keywords.is_empty() { |
| 249 | result += writeable::Writeable::writeable_length_hint(&self.keywords) + 1; |
| 250 | } |
| 251 | result |
| 252 | } |
| 253 | } |
| 254 | |