1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | //! Transform Extensions provide information on content transformations in a given locale. |
6 | //! |
7 | //! The main struct for this extension is [`Transform`] which contains [`Fields`] and an |
8 | //! optional [`LanguageIdentifier`]. |
9 | //! |
10 | //! [`LanguageIdentifier`]: super::super::LanguageIdentifier |
11 | //! |
12 | //! # Examples |
13 | //! |
14 | //! ``` |
15 | //! use icu::locid::extensions::transform::{Fields, Key, Transform, Value}; |
16 | //! use icu::locid::{LanguageIdentifier, Locale}; |
17 | //! |
18 | //! let mut loc: Locale = |
19 | //! "en-US-t-es-ar-h0-hybrid" .parse().expect("Parsing failed." ); |
20 | //! |
21 | //! let lang: LanguageIdentifier = |
22 | //! "es-AR" .parse().expect("Parsing LanguageIdentifier failed." ); |
23 | //! |
24 | //! let key: Key = "h0" .parse().expect("Parsing key failed." ); |
25 | //! let value: Value = "hybrid" .parse().expect("Parsing value failed." ); |
26 | //! |
27 | //! assert_eq!(loc.extensions.transform.lang, Some(lang)); |
28 | //! assert!(loc.extensions.transform.fields.contains_key(&key)); |
29 | //! assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); |
30 | //! |
31 | //! assert_eq!(&loc.extensions.transform.to_string(), "t-es-ar-h0-hybrid" ); |
32 | //! ``` |
33 | mod fields; |
34 | mod key; |
35 | mod value; |
36 | |
37 | use core::cmp::Ordering; |
38 | |
39 | pub use fields::Fields; |
40 | #[doc (inline)] |
41 | pub use key::{key, Key}; |
42 | pub use value::Value; |
43 | |
44 | use crate::parser::SubtagIterator; |
45 | use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode}; |
46 | use crate::shortvec::ShortBoxSlice; |
47 | use crate::subtags::{self, Language}; |
48 | use crate::LanguageIdentifier; |
49 | use litemap::LiteMap; |
50 | |
51 | /// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale |
52 | /// Identifier`] specification. |
53 | /// |
54 | /// Transform extension carries information about source language or script of |
55 | /// transformed content, including content that has been transliterated, transcribed, |
56 | /// or translated, or in some other way influenced by the source (See [`RFC 6497`] for details). |
57 | /// |
58 | /// # Examples |
59 | /// |
60 | /// ``` |
61 | /// use icu::locid::extensions::transform::{Key, Value}; |
62 | /// use icu::locid::{LanguageIdentifier, Locale}; |
63 | /// |
64 | /// let mut loc: Locale = |
65 | /// "de-t-en-us-h0-hybrid" .parse().expect("Parsing failed." ); |
66 | /// |
67 | /// let en_us: LanguageIdentifier = "en-US" .parse().expect("Parsing failed." ); |
68 | /// |
69 | /// assert_eq!(loc.extensions.transform.lang, Some(en_us)); |
70 | /// let key: Key = "h0" .parse().expect("Parsing key failed." ); |
71 | /// let value: Value = "hybrid" .parse().expect("Parsing value failed." ); |
72 | /// assert_eq!(loc.extensions.transform.fields.get(&key), Some(&value)); |
73 | /// ``` |
74 | /// [`Unicode BCP47 T Extensions`]: https://unicode.org/reports/tr35/#t_Extension |
75 | /// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt |
76 | /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier |
77 | #[derive (Clone, PartialEq, Eq, Debug, Default, Hash)] |
78 | #[allow (clippy::exhaustive_structs)] // spec-backed stable datastructure |
79 | pub struct Transform { |
80 | /// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present. |
81 | pub lang: Option<LanguageIdentifier>, |
82 | /// The key-value pairs present in this locale extension, with each extension key subtag |
83 | /// associated to its provided value subtag. |
84 | pub fields: Fields, |
85 | } |
86 | |
87 | impl Transform { |
88 | /// Returns a new empty map of Transform extensions. Same as [`default()`](Default::default()), but is `const`. |
89 | /// |
90 | /// # Examples |
91 | /// |
92 | /// ``` |
93 | /// use icu::locid::extensions::transform::Transform; |
94 | /// |
95 | /// assert_eq!(Transform::new(), Transform::default()); |
96 | /// ``` |
97 | #[inline ] |
98 | pub const fn new() -> Self { |
99 | Self { |
100 | lang: None, |
101 | fields: Fields::new(), |
102 | } |
103 | } |
104 | |
105 | /// Returns `true` if there are no tfields and no tlang in the `TransformExtensionList`. |
106 | /// |
107 | /// # Examples |
108 | /// |
109 | /// ``` |
110 | /// use icu::locid::Locale; |
111 | /// |
112 | /// let mut loc: Locale = "en-US-t-es-ar" .parse().expect("Parsing failed." ); |
113 | /// |
114 | /// assert!(!loc.extensions.transform.is_empty()); |
115 | /// ``` |
116 | pub fn is_empty(&self) -> bool { |
117 | self.lang.is_none() && self.fields.is_empty() |
118 | } |
119 | |
120 | /// Clears the transform extension, effectively removing it from the locale. |
121 | /// |
122 | /// # Examples |
123 | /// |
124 | /// ``` |
125 | /// use icu::locid::Locale; |
126 | /// |
127 | /// let mut loc: Locale = "en-US-t-es-ar" .parse().unwrap(); |
128 | /// loc.extensions.transform.clear(); |
129 | /// assert_eq!(loc, "en-US" .parse().unwrap()); |
130 | /// ``` |
131 | pub fn clear(&mut self) { |
132 | self.lang = None; |
133 | self.fields.clear(); |
134 | } |
135 | |
136 | #[allow (clippy::type_complexity)] |
137 | pub(crate) fn as_tuple( |
138 | &self, |
139 | ) -> ( |
140 | Option<( |
141 | subtags::Language, |
142 | Option<subtags::Script>, |
143 | Option<subtags::Region>, |
144 | &subtags::Variants, |
145 | )>, |
146 | &Fields, |
147 | ) { |
148 | (self.lang.as_ref().map(|l| l.as_tuple()), &self.fields) |
149 | } |
150 | |
151 | /// Returns an ordering suitable for use in [`BTreeSet`]. |
152 | /// |
153 | /// The ordering may or may not be equivalent to string ordering, and it |
154 | /// may or may not be stable across ICU4X releases. |
155 | /// |
156 | /// [`BTreeSet`]: alloc::collections::BTreeSet |
157 | pub fn total_cmp(&self, other: &Self) -> Ordering { |
158 | self.as_tuple().cmp(&other.as_tuple()) |
159 | } |
160 | |
161 | pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> { |
162 | let mut tlang = None; |
163 | let mut tfields = LiteMap::new(); |
164 | |
165 | if let Some(subtag) = iter.peek() { |
166 | if Language::try_from_bytes(subtag).is_ok() { |
167 | tlang = Some(parse_language_identifier_from_iter( |
168 | iter, |
169 | ParserMode::Partial, |
170 | )?); |
171 | } |
172 | } |
173 | |
174 | let mut current_tkey = None; |
175 | let mut current_tvalue = ShortBoxSlice::new(); |
176 | let mut has_current_tvalue = false; |
177 | |
178 | while let Some(subtag) = iter.peek() { |
179 | if let Some(tkey) = current_tkey { |
180 | if let Ok(val) = Value::parse_subtag(subtag) { |
181 | has_current_tvalue = true; |
182 | if let Some(val) = val { |
183 | current_tvalue.push(val); |
184 | } |
185 | } else { |
186 | if !has_current_tvalue { |
187 | return Err(ParserError::InvalidExtension); |
188 | } |
189 | tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue)); |
190 | current_tkey = None; |
191 | current_tvalue = ShortBoxSlice::new(); |
192 | has_current_tvalue = false; |
193 | continue; |
194 | } |
195 | } else if let Ok(tkey) = Key::try_from_bytes(subtag) { |
196 | current_tkey = Some(tkey); |
197 | } else { |
198 | break; |
199 | } |
200 | |
201 | iter.next(); |
202 | } |
203 | |
204 | if let Some(tkey) = current_tkey { |
205 | if !has_current_tvalue { |
206 | return Err(ParserError::InvalidExtension); |
207 | } |
208 | tfields.try_insert(tkey, Value::from_short_slice_unchecked(current_tvalue)); |
209 | } |
210 | |
211 | Ok(Self { |
212 | lang: tlang, |
213 | fields: tfields.into(), |
214 | }) |
215 | } |
216 | |
217 | pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E> |
218 | where |
219 | F: FnMut(&str) -> Result<(), E>, |
220 | { |
221 | if self.is_empty() { |
222 | return Ok(()); |
223 | } |
224 | f("t" )?; |
225 | if let Some(lang) = &self.lang { |
226 | lang.for_each_subtag_str_lowercased(f)?; |
227 | } |
228 | self.fields.for_each_subtag_str(f) |
229 | } |
230 | } |
231 | |
232 | writeable::impl_display_with_writeable!(Transform); |
233 | |
234 | impl writeable::Writeable for Transform { |
235 | fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result { |
236 | if self.is_empty() { |
237 | return Ok(()); |
238 | } |
239 | sink.write_str("t" )?; |
240 | if let Some(lang) = &self.lang { |
241 | sink.write_char('-' )?; |
242 | lang.write_lowercased_to(sink)?; |
243 | } |
244 | if !self.fields.is_empty() { |
245 | sink.write_char('-' )?; |
246 | writeable::Writeable::write_to(&self.fields, sink)?; |
247 | } |
248 | Ok(()) |
249 | } |
250 | |
251 | fn writeable_length_hint(&self) -> writeable::LengthHint { |
252 | if self.is_empty() { |
253 | return writeable::LengthHint::exact(0); |
254 | } |
255 | let mut result = writeable::LengthHint::exact(1); |
256 | if let Some(lang) = &self.lang { |
257 | result += writeable::Writeable::writeable_length_hint(lang) + 1; |
258 | } |
259 | if !self.fields.is_empty() { |
260 | result += writeable::Writeable::writeable_length_hint(&self.fields) + 1; |
261 | } |
262 | result |
263 | } |
264 | } |
265 | |