1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5pub use super::errors::ParserError;
6use crate::extensions::unicode::{Attribute, Key, Value};
7use crate::extensions::ExtensionType;
8use crate::helpers::ShortSlice;
9use crate::parser::SubtagIterator;
10use crate::LanguageIdentifier;
11use crate::{extensions, subtags};
12use tinystr::TinyAsciiStr;
13
14#[derive(PartialEq, Clone, Copy)]
15pub enum ParserMode {
16 LanguageIdentifier,
17 Locale,
18 Partial,
19}
20
21#[derive(PartialEq, Clone, Copy)]
22enum ParserPosition {
23 Script,
24 Region,
25 Variant,
26}
27
28pub fn parse_language_identifier_from_iter(
29 iter: &mut SubtagIterator,
30 mode: ParserMode,
31) -> Result<LanguageIdentifier, ParserError> {
32 let mut script = None;
33 let mut region = None;
34 let mut variants = ShortSlice::new();
35
36 let language = if let Some(subtag) = iter.next() {
37 subtags::Language::try_from_bytes(subtag)?
38 } else {
39 return Err(ParserError::InvalidLanguage);
40 };
41
42 let mut position = ParserPosition::Script;
43
44 while let Some(subtag) = iter.peek() {
45 if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 {
46 break;
47 }
48
49 if position == ParserPosition::Script {
50 if let Ok(s) = subtags::Script::try_from_bytes(subtag) {
51 script = Some(s);
52 position = ParserPosition::Region;
53 } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
54 region = Some(s);
55 position = ParserPosition::Variant;
56 } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
57 if let Err(idx) = variants.binary_search(&v) {
58 variants.insert(idx, v);
59 }
60 position = ParserPosition::Variant;
61 } else if mode == ParserMode::Partial {
62 break;
63 } else {
64 return Err(ParserError::InvalidSubtag);
65 }
66 } else if position == ParserPosition::Region {
67 if let Ok(s) = subtags::Region::try_from_bytes(subtag) {
68 region = Some(s);
69 position = ParserPosition::Variant;
70 } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
71 if let Err(idx) = variants.binary_search(&v) {
72 variants.insert(idx, v);
73 }
74 position = ParserPosition::Variant;
75 } else if mode == ParserMode::Partial {
76 break;
77 } else {
78 return Err(ParserError::InvalidSubtag);
79 }
80 } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) {
81 if let Err(idx) = variants.binary_search(&v) {
82 variants.insert(idx, v);
83 } else {
84 return Err(ParserError::InvalidSubtag);
85 }
86 } else if mode == ParserMode::Partial {
87 break;
88 } else {
89 return Err(ParserError::InvalidSubtag);
90 }
91 iter.next();
92 }
93
94 Ok(LanguageIdentifier {
95 language,
96 script,
97 region,
98 variants: subtags::Variants::from_short_slice_unchecked(variants),
99 })
100}
101
102pub fn parse_language_identifier(
103 t: &[u8],
104 mode: ParserMode,
105) -> Result<LanguageIdentifier, ParserError> {
106 let mut iter: SubtagIterator<'_> = SubtagIterator::new(slice:t);
107 parse_language_identifier_from_iter(&mut iter, mode)
108}
109
110#[allow(clippy::type_complexity)]
111pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(
112 mut iter: SubtagIterator,
113 mode: ParserMode,
114) -> Result<
115 (
116 subtags::Language,
117 Option<subtags::Script>,
118 Option<subtags::Region>,
119 Option<subtags::Variant>,
120 Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
121 ),
122 ParserError,
123> {
124 let language;
125 let mut script = None;
126 let mut region = None;
127 let mut variant = None;
128 let mut keyword = None;
129
130 if let (i, Some((start, end))) = iter.next_manual() {
131 iter = i;
132 match subtags::Language::try_from_bytes_manual_slice(iter.slice, start, end) {
133 Ok(l) => language = l,
134 Err(e) => return Err(e),
135 }
136 } else {
137 return Err(ParserError::InvalidLanguage);
138 }
139
140 let mut position = ParserPosition::Script;
141
142 while let Some((start, end)) = iter.peek_manual() {
143 if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 {
144 break;
145 }
146
147 if matches!(position, ParserPosition::Script) {
148 if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(iter.slice, start, end) {
149 script = Some(s);
150 position = ParserPosition::Region;
151 } else if let Ok(r) =
152 subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end)
153 {
154 region = Some(r);
155 position = ParserPosition::Variant;
156 } else if let Ok(v) =
157 subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
158 {
159 // We cannot handle multiple variants in a const context
160 debug_assert!(variant.is_none());
161 variant = Some(v);
162 position = ParserPosition::Variant;
163 } else if matches!(mode, ParserMode::Partial) {
164 break;
165 } else {
166 return Err(ParserError::InvalidSubtag);
167 }
168 } else if matches!(position, ParserPosition::Region) {
169 if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) {
170 region = Some(s);
171 position = ParserPosition::Variant;
172 } else if let Ok(v) =
173 subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
174 {
175 // We cannot handle multiple variants in a const context
176 debug_assert!(variant.is_none());
177 variant = Some(v);
178 position = ParserPosition::Variant;
179 } else if matches!(mode, ParserMode::Partial) {
180 break;
181 } else {
182 return Err(ParserError::InvalidSubtag);
183 }
184 } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end)
185 {
186 debug_assert!(matches!(position, ParserPosition::Variant));
187 if variant.is_some() {
188 // We cannot handle multiple variants in a const context
189 return Err(ParserError::InvalidSubtag);
190 }
191 variant = Some(v);
192 } else if matches!(mode, ParserMode::Partial) {
193 break;
194 } else {
195 return Err(ParserError::InvalidSubtag);
196 }
197
198 iter = iter.next_manual().0;
199 }
200
201 if matches!(mode, ParserMode::Locale) {
202 if let Some((start, end)) = iter.peek_manual() {
203 match ExtensionType::try_from_bytes_manual_slice(iter.slice, start, end) {
204 Ok(ExtensionType::Unicode) => {
205 iter = iter.next_manual().0;
206 if let Some((start, end)) = iter.peek_manual() {
207 if Attribute::try_from_bytes_manual_slice(iter.slice, start, end).is_ok() {
208 // We cannot handle Attributes in a const context
209 return Err(ParserError::InvalidSubtag);
210 }
211 }
212
213 let mut key = None;
214 let mut current_type = None;
215
216 while let Some((start, end)) = iter.peek_manual() {
217 let slen = end - start;
218 if slen == 2 {
219 if key.is_some() {
220 // We cannot handle more than one Key in a const context
221 return Err(ParserError::InvalidSubtag);
222 }
223 match Key::try_from_bytes_manual_slice(iter.slice, start, end) {
224 Ok(k) => key = Some(k),
225 Err(e) => return Err(e),
226 };
227 } else if key.is_some() {
228 match Value::parse_subtag_from_bytes_manual_slice(
229 iter.slice, start, end,
230 ) {
231 Ok(Some(t)) => {
232 if current_type.is_some() {
233 // We cannot handle more than one type in a const context
234 return Err(ParserError::InvalidSubtag);
235 }
236 current_type = Some(t);
237 }
238 Ok(None) => {}
239 Err(e) => return Err(e),
240 }
241 } else {
242 break;
243 }
244 iter = iter.next_manual().0
245 }
246 if let Some(k) = key {
247 keyword = Some((k, current_type));
248 }
249 }
250 // We cannot handle Transform, Private, Other extensions in a const context
251 Ok(_) => return Err(ParserError::InvalidSubtag),
252 Err(e) => return Err(e),
253 }
254 }
255 }
256
257 Ok((language, script, region, variant, keyword))
258}
259
260#[allow(clippy::type_complexity)]
261pub const fn parse_language_identifier_with_single_variant(
262 t: &[u8],
263 mode: ParserMode,
264) -> Result<
265 (
266 subtags::Language,
267 Option<subtags::Script>,
268 Option<subtags::Region>,
269 Option<subtags::Variant>,
270 ),
271 ParserError,
272> {
273 let iter: SubtagIterator<'_> = SubtagIterator::new(slice:t);
274 match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) {
275 Ok((l: Language, s: Option