1 | use crate::Tag; |
2 | |
3 | pub struct TextParser<'a> { |
4 | pos: usize, |
5 | text: &'a str, |
6 | } |
7 | |
8 | impl<'a> TextParser<'a> { |
9 | #[inline ] |
10 | pub fn new(text: &'a str) -> Self { |
11 | TextParser { pos: 0, text } |
12 | } |
13 | |
14 | #[inline ] |
15 | pub fn at_end(&self) -> bool { |
16 | self.pos >= self.text.len() |
17 | } |
18 | |
19 | #[inline ] |
20 | pub fn curr_byte(&self) -> Option<u8> { |
21 | if !self.at_end() { |
22 | Some(self.curr_byte_unchecked()) |
23 | } else { |
24 | None |
25 | } |
26 | } |
27 | |
28 | #[inline ] |
29 | fn curr_byte_unchecked(&self) -> u8 { |
30 | self.text.as_bytes()[self.pos] |
31 | } |
32 | |
33 | #[inline ] |
34 | pub fn advance(&mut self, n: usize) { |
35 | debug_assert!(self.pos + n <= self.text.len()); |
36 | self.pos += n; |
37 | } |
38 | |
39 | pub fn consume_byte(&mut self, c: u8) -> Option<()> { |
40 | let curr = self.curr_byte()?; |
41 | if curr != c { |
42 | return None; |
43 | } |
44 | |
45 | self.advance(1); |
46 | Some(()) |
47 | } |
48 | |
49 | #[inline ] |
50 | pub fn skip_spaces(&mut self) { |
51 | // Unlike harfbuzz::ISSPACE, is_ascii_whitespace doesn't includes `\v`, but whatever. |
52 | while !self.at_end() && self.curr_byte_unchecked().is_ascii_whitespace() { |
53 | self.advance(1); |
54 | } |
55 | } |
56 | |
57 | pub fn consume_quote(&mut self) -> Option<u8> { |
58 | let c = self.curr_byte()?; |
59 | if matches!(c, b' \'' | b'"' ) { |
60 | self.advance(1); |
61 | Some(c) |
62 | } else { |
63 | None |
64 | } |
65 | } |
66 | |
67 | #[inline ] |
68 | pub fn consume_bytes<F>(&mut self, f: F) -> &'a str |
69 | where |
70 | F: Fn(u8) -> bool, |
71 | { |
72 | let start = self.pos; |
73 | self.skip_bytes(f); |
74 | &self.text[start..self.pos] |
75 | } |
76 | |
77 | pub fn skip_bytes<F>(&mut self, f: F) |
78 | where |
79 | F: Fn(u8) -> bool, |
80 | { |
81 | while !self.at_end() && f(self.curr_byte_unchecked()) { |
82 | self.advance(1); |
83 | } |
84 | } |
85 | |
86 | pub fn consume_tag(&mut self) -> Option<Tag> { |
87 | let tag = self.consume_bytes(|c| c.is_ascii_alphanumeric() || c == b'_' ); |
88 | if tag.len() > 4 { |
89 | return None; |
90 | } |
91 | |
92 | Some(Tag::from_bytes_lossy(tag.as_bytes())) |
93 | } |
94 | |
95 | pub fn consume_i32(&mut self) -> Option<i32> { |
96 | let start = self.pos; |
97 | |
98 | if matches!(self.curr_byte(), Some(b'-' ) | Some(b'+' )) { |
99 | self.advance(1); |
100 | } |
101 | |
102 | self.skip_bytes(|c| c.is_ascii_digit()); |
103 | self.text[start..self.pos].parse::<i32>().ok() |
104 | } |
105 | |
106 | pub fn consume_f32(&mut self) -> Option<f32> { |
107 | let start = self.pos; |
108 | |
109 | // TODO: does number like 1-e2 required? |
110 | |
111 | if matches!(self.curr_byte(), Some(b'-' ) | Some(b'+' )) { |
112 | self.advance(1); |
113 | } |
114 | |
115 | self.skip_bytes(|c| c.is_ascii_digit()); |
116 | |
117 | if self.consume_byte(b'.' ).is_some() { |
118 | self.skip_bytes(|c| c.is_ascii_digit()); |
119 | } |
120 | |
121 | self.text[start..self.pos].parse::<f32>().ok() |
122 | } |
123 | |
124 | pub fn consume_bool(&mut self) -> Option<bool> { |
125 | self.skip_spaces(); |
126 | |
127 | let value = self.consume_bytes(|c| c.is_ascii_alphabetic()).as_bytes(); |
128 | if value.len() == 2 { |
129 | if value[0].to_ascii_lowercase() == b'o' && value[1].to_ascii_lowercase() == b'n' { |
130 | return Some(true); |
131 | } |
132 | } else if value.len() == 3 { |
133 | if value[0].to_ascii_lowercase() == b'o' |
134 | && value[1].to_ascii_lowercase() == b'f' |
135 | && value[2].to_ascii_lowercase() == b'f' |
136 | { |
137 | return Some(false); |
138 | } |
139 | } |
140 | |
141 | None |
142 | } |
143 | } |
144 | |