| 1 | // This file is part of ICU4X. For terms of use, please see the file |
| 2 | // called LICENSE at the top level of the ICU4X source tree |
| 3 | // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | |
| 5 | pub use super::errors::ParserError; |
| 6 | use crate::extensions::unicode::{Attribute, Key, Value}; |
| 7 | use crate::extensions::ExtensionType; |
| 8 | use crate::parser::SubtagIterator; |
| 9 | use crate::shortvec::ShortBoxSlice; |
| 10 | use crate::LanguageIdentifier; |
| 11 | use crate::{extensions, subtags}; |
| 12 | use tinystr::TinyAsciiStr; |
| 13 | |
| 14 | #[derive (PartialEq, Clone, Copy)] |
| 15 | pub enum ParserMode { |
| 16 | LanguageIdentifier, |
| 17 | Locale, |
| 18 | Partial, |
| 19 | } |
| 20 | |
| 21 | #[derive (PartialEq, Clone, Copy)] |
| 22 | enum ParserPosition { |
| 23 | Script, |
| 24 | Region, |
| 25 | Variant, |
| 26 | } |
| 27 | |
| 28 | pub fn parse_language_identifier_from_iter( |
| 29 | iter: &mut SubtagIterator, |
| 30 | mode: ParserMode, |
| 31 | ) -> Result<LanguageIdentifier, ParserError> { |
| 32 | let mut script = None; |
| 33 | let mut region = None; |
| 34 | let mut variants = ShortBoxSlice::new(); |
| 35 | |
| 36 | let language = if let Some(subtag) = iter.next() { |
| 37 | subtags::Language::try_from_bytes(subtag)? |
| 38 | } else { |
| 39 | return Err(ParserError::InvalidLanguage); |
| 40 | }; |
| 41 | |
| 42 | let mut position = ParserPosition::Script; |
| 43 | |
| 44 | while let Some(subtag) = iter.peek() { |
| 45 | if mode != ParserMode::LanguageIdentifier && subtag.len() == 1 { |
| 46 | break; |
| 47 | } |
| 48 | |
| 49 | if position == ParserPosition::Script { |
| 50 | if let Ok(s) = subtags::Script::try_from_bytes(subtag) { |
| 51 | script = Some(s); |
| 52 | position = ParserPosition::Region; |
| 53 | } else if let Ok(s) = subtags::Region::try_from_bytes(subtag) { |
| 54 | region = Some(s); |
| 55 | position = ParserPosition::Variant; |
| 56 | } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { |
| 57 | if let Err(idx) = variants.binary_search(&v) { |
| 58 | variants.insert(idx, v); |
| 59 | } |
| 60 | position = ParserPosition::Variant; |
| 61 | } else if mode == ParserMode::Partial { |
| 62 | break; |
| 63 | } else { |
| 64 | return Err(ParserError::InvalidSubtag); |
| 65 | } |
| 66 | } else if position == ParserPosition::Region { |
| 67 | if let Ok(s) = subtags::Region::try_from_bytes(subtag) { |
| 68 | region = Some(s); |
| 69 | position = ParserPosition::Variant; |
| 70 | } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { |
| 71 | if let Err(idx) = variants.binary_search(&v) { |
| 72 | variants.insert(idx, v); |
| 73 | } |
| 74 | position = ParserPosition::Variant; |
| 75 | } else if mode == ParserMode::Partial { |
| 76 | break; |
| 77 | } else { |
| 78 | return Err(ParserError::InvalidSubtag); |
| 79 | } |
| 80 | } else if let Ok(v) = subtags::Variant::try_from_bytes(subtag) { |
| 81 | if let Err(idx) = variants.binary_search(&v) { |
| 82 | variants.insert(idx, v); |
| 83 | } else { |
| 84 | return Err(ParserError::InvalidSubtag); |
| 85 | } |
| 86 | } else if mode == ParserMode::Partial { |
| 87 | break; |
| 88 | } else { |
| 89 | return Err(ParserError::InvalidSubtag); |
| 90 | } |
| 91 | iter.next(); |
| 92 | } |
| 93 | |
| 94 | Ok(LanguageIdentifier { |
| 95 | language, |
| 96 | script, |
| 97 | region, |
| 98 | variants: subtags::Variants::from_short_slice_unchecked(variants), |
| 99 | }) |
| 100 | } |
| 101 | |
| 102 | pub fn parse_language_identifier( |
| 103 | t: &[u8], |
| 104 | mode: ParserMode, |
| 105 | ) -> Result<LanguageIdentifier, ParserError> { |
| 106 | let mut iter: SubtagIterator<'_> = SubtagIterator::new(slice:t); |
| 107 | parse_language_identifier_from_iter(&mut iter, mode) |
| 108 | } |
| 109 | |
| 110 | #[allow (clippy::type_complexity)] |
| 111 | pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter( |
| 112 | mut iter: SubtagIterator, |
| 113 | mode: ParserMode, |
| 114 | ) -> Result< |
| 115 | ( |
| 116 | subtags::Language, |
| 117 | Option<subtags::Script>, |
| 118 | Option<subtags::Region>, |
| 119 | Option<subtags::Variant>, |
| 120 | Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>, |
| 121 | ), |
| 122 | ParserError, |
| 123 | > { |
| 124 | let language; |
| 125 | let mut script = None; |
| 126 | let mut region = None; |
| 127 | let mut variant = None; |
| 128 | let mut keyword = None; |
| 129 | |
| 130 | if let (i, Some((start, end))) = iter.next_manual() { |
| 131 | iter = i; |
| 132 | match subtags::Language::try_from_bytes_manual_slice(iter.slice, start, end) { |
| 133 | Ok(l) => language = l, |
| 134 | Err(e) => return Err(e), |
| 135 | } |
| 136 | } else { |
| 137 | return Err(ParserError::InvalidLanguage); |
| 138 | } |
| 139 | |
| 140 | let mut position = ParserPosition::Script; |
| 141 | |
| 142 | while let Some((start, end)) = iter.peek_manual() { |
| 143 | if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 { |
| 144 | break; |
| 145 | } |
| 146 | |
| 147 | if matches!(position, ParserPosition::Script) { |
| 148 | if let Ok(s) = subtags::Script::try_from_bytes_manual_slice(iter.slice, start, end) { |
| 149 | script = Some(s); |
| 150 | position = ParserPosition::Region; |
| 151 | } else if let Ok(r) = |
| 152 | subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) |
| 153 | { |
| 154 | region = Some(r); |
| 155 | position = ParserPosition::Variant; |
| 156 | } else if let Ok(v) = |
| 157 | subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end) |
| 158 | { |
| 159 | // We cannot handle multiple variants in a const context |
| 160 | debug_assert!(variant.is_none()); |
| 161 | variant = Some(v); |
| 162 | position = ParserPosition::Variant; |
| 163 | } else if matches!(mode, ParserMode::Partial) { |
| 164 | break; |
| 165 | } else { |
| 166 | return Err(ParserError::InvalidSubtag); |
| 167 | } |
| 168 | } else if matches!(position, ParserPosition::Region) { |
| 169 | if let Ok(s) = subtags::Region::try_from_bytes_manual_slice(iter.slice, start, end) { |
| 170 | region = Some(s); |
| 171 | position = ParserPosition::Variant; |
| 172 | } else if let Ok(v) = |
| 173 | subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end) |
| 174 | { |
| 175 | // We cannot handle multiple variants in a const context |
| 176 | debug_assert!(variant.is_none()); |
| 177 | variant = Some(v); |
| 178 | position = ParserPosition::Variant; |
| 179 | } else if matches!(mode, ParserMode::Partial) { |
| 180 | break; |
| 181 | } else { |
| 182 | return Err(ParserError::InvalidSubtag); |
| 183 | } |
| 184 | } else if let Ok(v) = subtags::Variant::try_from_bytes_manual_slice(iter.slice, start, end) |
| 185 | { |
| 186 | debug_assert!(matches!(position, ParserPosition::Variant)); |
| 187 | if variant.is_some() { |
| 188 | // We cannot handle multiple variants in a const context |
| 189 | return Err(ParserError::InvalidSubtag); |
| 190 | } |
| 191 | variant = Some(v); |
| 192 | } else if matches!(mode, ParserMode::Partial) { |
| 193 | break; |
| 194 | } else { |
| 195 | return Err(ParserError::InvalidSubtag); |
| 196 | } |
| 197 | |
| 198 | iter = iter.next_manual().0; |
| 199 | } |
| 200 | |
| 201 | if matches!(mode, ParserMode::Locale) { |
| 202 | if let Some((start, end)) = iter.peek_manual() { |
| 203 | match ExtensionType::try_from_bytes_manual_slice(iter.slice, start, end) { |
| 204 | Ok(ExtensionType::Unicode) => { |
| 205 | iter = iter.next_manual().0; |
| 206 | if let Some((start, end)) = iter.peek_manual() { |
| 207 | if Attribute::try_from_bytes_manual_slice(iter.slice, start, end).is_ok() { |
| 208 | // We cannot handle Attributes in a const context |
| 209 | return Err(ParserError::InvalidSubtag); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | let mut key = None; |
| 214 | let mut current_type = None; |
| 215 | |
| 216 | while let Some((start, end)) = iter.peek_manual() { |
| 217 | let slen = end - start; |
| 218 | if slen == 2 { |
| 219 | if key.is_some() { |
| 220 | // We cannot handle more than one Key in a const context |
| 221 | return Err(ParserError::InvalidSubtag); |
| 222 | } |
| 223 | match Key::try_from_bytes_manual_slice(iter.slice, start, end) { |
| 224 | Ok(k) => key = Some(k), |
| 225 | Err(e) => return Err(e), |
| 226 | }; |
| 227 | } else if key.is_some() { |
| 228 | match Value::parse_subtag_from_bytes_manual_slice( |
| 229 | iter.slice, start, end, |
| 230 | ) { |
| 231 | Ok(Some(t)) => { |
| 232 | if current_type.is_some() { |
| 233 | // We cannot handle more than one type in a const context |
| 234 | return Err(ParserError::InvalidSubtag); |
| 235 | } |
| 236 | current_type = Some(t); |
| 237 | } |
| 238 | Ok(None) => {} |
| 239 | Err(e) => return Err(e), |
| 240 | } |
| 241 | } else { |
| 242 | break; |
| 243 | } |
| 244 | iter = iter.next_manual().0 |
| 245 | } |
| 246 | if let Some(k) = key { |
| 247 | keyword = Some((k, current_type)); |
| 248 | } |
| 249 | } |
| 250 | // We cannot handle Transform, Private, Other extensions in a const context |
| 251 | Ok(_) => return Err(ParserError::InvalidSubtag), |
| 252 | Err(e) => return Err(e), |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | Ok((language, script, region, variant, keyword)) |
| 258 | } |
| 259 | |
| 260 | #[allow (clippy::type_complexity)] |
| 261 | pub const fn parse_language_identifier_with_single_variant( |
| 262 | t: &[u8], |
| 263 | mode: ParserMode, |
| 264 | ) -> Result< |
| 265 | ( |
| 266 | subtags::Language, |
| 267 | Option<subtags::Script>, |
| 268 | Option<subtags::Region>, |
| 269 | Option<subtags::Variant>, |
| 270 | ), |
| 271 | ParserError, |
| 272 | > { |
| 273 | let iter: SubtagIterator<'_> = SubtagIterator::new(slice:t); |
| 274 | match parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter(iter, mode) { |
| 275 | Ok((l: Language, s: Option, r: Option, v: Option, _)) => Ok((l, s, r, v)), |
| 276 | Err(e: ParserError) => Err(e), |
| 277 | } |
| 278 | } |
| 279 | |