1 | // This file is part of ICU4X. For terms of use, please see the file |
2 | // called LICENSE at the top level of the ICU4X source tree |
3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
4 | |
5 | pub use super::errors::ParserError; |
6 | use crate::extensions::unicode::{Attribute, Key, Value}; |
7 | use crate::extensions::ExtensionType; |
8 | use crate::helpers::ShortSlice; |
9 | use crate::parser::SubtagIterator; |
10 | use crate::LanguageIdentifier; |
11 | use crate::{extensions, subtags}; |
12 | use tinystr::TinyAsciiStr; |
13 | |
14 | #[derive (PartialEq, Clone, Copy)] |
15 | pub enum ParserMode { |
16 | LanguageIdentifier, |
17 | Locale, |
18 | Partial, |
19 | } |
20 | |
21 | #[derive (PartialEq, Clone, Copy)] |
22 | enum ParserPosition { |
23 | Script, |
24 | Region, |
25 | Variant, |
26 | } |
27 | |
28 | pub fn parse_language_identifier_from_iter( |
29 | iter: &mut SubtagIterator, |
30 | mode: ParserMode, |
31 | ) -> Result<LanguageIdentifier, ParserError> { |
32 | let mut script = None; |
33 | let mut region = None; |
34 | let mut variants = ShortSlice::new(); |
35 | |
36 | let language = if let Some(subtag) = iter.next() { |
37 | subtags::Language::try_from_bytes(subtag)? |
38 | } else { |
39 | return Err(ParserError::InvalidLanguage); |
40 | }; |
41 | |
42 | let mut position = ParserPosition::Script; |
43 | |
44 | while let Some(subtag) = iter.peek() { |
45 | if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 { |
46 | break; |
47 | } |
48 | |
49 | if position == ParserPosition::Script { |
50 | if let Ok(s) = subtags::Script::try_from_bytes(subtag) { |
51 | script = Some(s); |
52 | position = ParserPosition::Region; |
53 | } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) { |
54 | region = Some(s); |
55 | position = ParserPosition::Variant; |
56 | } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { |
57 | if let Err(idx) = variants.binary_search(&v) { |
58 | variants.insert(idx, v); |
59 | } |
60 | position = ParserPosition::Variant; |
61 | } else if mode == ParserMode::Partial { |
62 | break; |
63 | } else { |
64 | return Err(ParserError::InvalidSubtag); |
65 | } |
66 | } else if position == ParserPosition::Region { |
67 | if let Ok(s) = subtags::Region::try_from_bytes(subtag) { |
68 | region = Some(s); |
69 | position = ParserPosition::Variant; |
70 | } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { |
71 | if let Err(idx) = variants.binary_search(&v) { |
72 | variants.insert(idx, v); |
73 | } |
74 | position = ParserPosition::Variant; |
75 | } else if mode == ParserMode::Partial { |
76 | break; |
77 | } else { |
78 | return Err(ParserError::InvalidSubtag); |
79 | } |
80 | } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { |
81 | if let Err(idx) = variants.binary_search(&v) { |
82 | variants.insert(idx, v); |
83 | } else { |
84 | return Err(ParserError::InvalidSubtag); |
85 | } |
86 | } else if mode == ParserMode::Partial { |
87 | break; |
88 | } else { |
89 | return Err(ParserError::InvalidSubtag); |
90 | } |
91 | iter.next(); |
92 | } |
93 | |
94 | Ok(LanguageIdentifier { |
95 | language, |
96 | script, |
97 | region, |
98 | variants: subtags::Variants::from_short_slice_unchecked(variants), |
99 | }) |
100 | } |
101 | |
102 | pub fn parse_language_identifier( |
103 | t: &[u8], |
104 | mode: ParserMode, |
105 | ) -> Result<LanguageIdentifier, ParserError> { |
106 | let mut iter: SubtagIterator<'_> = SubtagIterator::new(slice:t); |
107 | parse_language_identifier_from_iter(&mut iter, mode) |
108 | } |
109 | |
110 | #[allow (clippy::type_complexity)] |
111 | pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter( |
112 | mut iter: SubtagIterator, |
113 | mode: ParserMode, |
114 | ) -> Result< |
115 | ( |
116 | subtags::Language, |
117 | Option<subtags::Script>, |
118 | Option<subtags::Region>, |
119 | Option<subtags::Variant>, |
120 | Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>, |
121 | ), |
122 | ParserError, |
123 | > { |
124 | let language; |
125 | let mut script = None; |
126 | let mut region = None; |
127 | let mut variant = None; |
128 | let mut keyword = None; |
129 | |
130 | if let (i, Some((start, end))) = iter.next_manual() { |
131 | iter = i; |
132 | match subtags::Language::try_from_bytes_manual_slice(iter.slice, start, end) { |
133 | Ok(l) => language = l, |
134 | Err(e) => return Err(e), |
135 | } |
136 | } else { |
137 | return Err(ParserError::InvalidLanguage); |
138 | } |
139 | |
140 | let mut position = ParserPosition::Script; |
141 | |
142 | while let Some((start, end)) = iter.peek_manual() { |
143 | if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 { |
144 | break; |
145 | } |
146 | |
147 | if matches!(position, ParserPosition::Script) { |
148 | if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(iter.slice, start, end) { |
149 | script = Some(s); |
150 | position = ParserPosition::Region; |
151 | } else if let Ok(r) = |
152 | subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) |
153 | { |
154 | region = Some(r); |
155 | position = ParserPosition::Variant; |
156 | } else if let Ok(v) = |
157 | subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end) |
158 | { |
159 | // We cannot handle multiple variants in a const context |
160 | debug_assert!(variant.is_none()); |
161 | variant = Some(v); |
162 | position = ParserPosition::Variant; |
163 | } else if matches!(mode, ParserMode::Partial) { |
164 | break; |
165 | } else { |
166 | return Err(ParserError::InvalidSubtag); |
167 | } |
168 | } else if matches!(position, ParserPosition::Region) { |
169 | if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) { |
170 | region = Some(s); |
171 | position = ParserPosition::Variant; |
172 | } else if let Ok(v) = |
173 | subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end) |
174 | { |
175 | // We cannot handle multiple variants in a const context |
176 | debug_assert!(variant.is_none()); |
177 | variant = Some(v); |
178 | position = ParserPosition::Variant; |
179 | } else if matches!(mode, ParserMode::Partial) { |
180 | break; |
181 | } else { |
182 | return Err(ParserError::InvalidSubtag); |
183 | } |
184 | } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end) |
185 | { |
186 | debug_assert!(matches!(position, ParserPosition::Variant)); |
187 | if variant.is_some() { |
188 | // We cannot handle multiple variants in a const context |
189 | return Err(ParserError::InvalidSubtag); |
190 | } |
191 | variant = Some(v); |
192 | } else if matches!(mode, ParserMode::Partial) { |
193 | break; |
194 | } else { |
195 | return Err(ParserError::InvalidSubtag); |
196 | } |
197 | |
198 | iter = iter.next_manual().0; |
199 | } |
200 | |
201 | if matches!(mode, ParserMode::Locale) { |
202 | if let Some((start, end)) = iter.peek_manual() { |
203 | match ExtensionType::try_from_bytes_manual_slice(iter.slice, start, end) { |
204 | Ok(ExtensionType::Unicode) => { |
205 | iter = iter.next_manual().0; |
206 | if let Some((start, end)) = iter.peek_manual() { |
207 | if Attribute::try_from_bytes_manual_slice(iter.slice, start, end).is_ok() { |
208 | // We cannot handle Attributes in a const context |
209 | return Err(ParserError::InvalidSubtag); |
210 | } |
211 | } |
212 | |
213 | let mut key = None; |
214 | let mut current_type = None; |
215 | |
216 | while let Some((start, end)) = iter.peek_manual() { |
217 | let slen = end - start; |
218 | if slen == 2 { |
219 | if key.is_some() { |
220 | // We cannot handle more than one Key in a const context |
221 | return Err(ParserError::InvalidSubtag); |
222 | } |
223 | match Key::try_from_bytes_manual_slice(iter.slice, start, end) { |
224 | Ok(k) => key = Some(k), |
225 | Err(e) => return Err(e), |
226 | }; |
227 | } else if key.is_some() { |
228 | match Value::parse_subtag_from_bytes_manual_slice( |
229 | iter.slice, start, end, |
230 | ) { |
231 | Ok(Some(t)) => { |
232 | if current_type.is_some() { |
233 | // We cannot handle more than one type in a const context |
234 | return Err(ParserError::InvalidSubtag); |
235 | } |
236 | current_type = Some(t); |
237 | } |
238 | Ok(None) => {} |
239 | Err(e) => return Err(e), |
240 | } |
241 | } else { |
242 | break; |
243 | } |
244 | iter = iter.next_manual().0 |
245 | } |
246 | if let Some(k) = key { |
247 | keyword = Some((k, current_type)); |
248 | } |
249 | } |
250 | // We cannot handle Transform, Private, Other extensions in a const context |
251 | Ok(_) => return Err(ParserError::InvalidSubtag), |
252 | Err(e) => return Err(e), |
253 | } |
254 | } |
255 | } |
256 | |
257 | Ok((language, script, region, variant, keyword)) |
258 | } |
259 | |
260 | #[allow (clippy::type_complexity)] |
261 | pub const fn parse_language_identifier_with_single_variant( |
262 | t: &[u8], |
263 | mode: ParserMode, |
264 | ) -> Result< |
265 | ( |
266 | subtags::Language, |
267 | Option<subtags::Script>, |
268 | Option<subtags::Region>, |
269 | Option<subtags::Variant>, |
270 | ), |
271 | ParserError, |
272 | > { |
273 | let iter: SubtagIterator<'_> = SubtagIterator::new(slice:t); |
274 | match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) { |
275 | Ok((l: Language, s: Option, r: Option, v: Option, _)) => Ok((l, s, r, v)), |
276 | Err(e: ParserError) => Err(e), |
277 | } |
278 | } |
279 | |