1 | pub mod errors; |
2 | |
3 | use std::iter::Peekable; |
4 | |
5 | pub use self::errors::ParserError; |
6 | use crate::subtags; |
7 | use crate::LanguageIdentifier; |
8 | |
9 | pub fn parse_language_identifier_from_iter<'a>( |
10 | iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>, |
11 | allow_extension: bool, |
12 | ) -> Result<LanguageIdentifier, ParserError> { |
13 | let language = if let Some(subtag) = iter.next() { |
14 | subtags::Language::from_bytes(subtag)? |
15 | } else { |
16 | subtags::Language::default() |
17 | }; |
18 | |
19 | let mut script = None; |
20 | let mut region = None; |
21 | let mut variants = vec![]; |
22 | |
23 | let mut position = 1; |
24 | |
25 | while let Some(subtag) = iter.peek() { |
26 | if position == 1 { |
27 | if let Ok(s) = subtags::Script::from_bytes(subtag) { |
28 | script = Some(s); |
29 | position = 2; |
30 | } else if let Ok(s) = subtags::Region::from_bytes(subtag) { |
31 | region = Some(s); |
32 | position = 3; |
33 | } else if let Ok(v) = subtags::Variant::from_bytes(subtag) { |
34 | variants.push(v); |
35 | position = 3; |
36 | } else { |
37 | break; |
38 | } |
39 | } else if position == 2 { |
40 | if let Ok(s) = subtags::Region::from_bytes(subtag) { |
41 | region = Some(s); |
42 | position = 3; |
43 | } else if let Ok(v) = subtags::Variant::from_bytes(subtag) { |
44 | variants.push(v); |
45 | position = 3; |
46 | } else { |
47 | break; |
48 | } |
49 | } else { |
50 | // Variants |
51 | if let Ok(v) = subtags::Variant::from_bytes(subtag) { |
52 | variants.push(v); |
53 | } else { |
54 | break; |
55 | } |
56 | } |
57 | iter.next(); |
58 | } |
59 | |
60 | if !allow_extension && iter.peek().is_some() { |
61 | return Err(ParserError::InvalidSubtag); |
62 | } |
63 | |
64 | let variants = if variants.is_empty() { |
65 | None |
66 | } else { |
67 | variants.sort_unstable(); |
68 | variants.dedup(); |
69 | Some(variants.into_boxed_slice()) |
70 | }; |
71 | |
72 | Ok(LanguageIdentifier { |
73 | language, |
74 | script, |
75 | region, |
76 | variants, |
77 | }) |
78 | } |
79 | |
80 | pub fn parse_language_identifier(t: &[u8]) -> Result<LanguageIdentifier, ParserError> { |
81 | let mut iter: impl Iterator = t.split(|c: &u8| *c == b'-' || *c == b'_' ).peekable(); |
82 | parse_language_identifier_from_iter(&mut iter, allow_extension:false) |
83 | } |
84 | |