1use std::result::Result;
2
3use super::path_reader::{PathReader, ReaderError};
4
5const CH_DOLLA: char = '$';
6const CH_DOT: char = '.';
7const CH_ASTERISK: char = '*';
8const CH_LARRAY: char = '[';
9const CH_RARRAY: char = ']';
10const CH_LPAREN: char = '(';
11const CH_RPAREN: char = ')';
12const CH_AT: char = '@';
13const CH_QUESTION: char = '?';
14const CH_COMMA: char = ',';
15const CH_SEMICOLON: char = ':';
16const CH_EQUAL: char = '=';
17const CH_AMPERSAND: char = '&';
18const CH_PIPE: char = '|';
19const CH_LITTLE: char = '<';
20const CH_GREATER: char = '>';
21const CH_EXCLAMATION: char = '!';
22const CH_SINGLE_QUOTE: char = '\'';
23const CH_DOUBLE_QUOTE: char = '"';
24
25#[derive(Debug, Clone, PartialEq)]
26pub enum TokenError {
27 Eof,
28 Position(usize),
29}
30
31fn to_token_error(read_err: ReaderError) -> TokenError {
32 match read_err {
33 ReaderError::Eof => TokenError::Eof,
34 }
35}
36
37#[derive(Debug, PartialEq)]
38pub enum Token {
39 Absolute(usize),
40 Dot(usize),
41 At(usize),
42 OpenArray(usize),
43 CloseArray(usize),
44 Asterisk(usize),
45 Question(usize),
46 Comma(usize),
47 Split(usize),
48 OpenParenthesis(usize),
49 CloseParenthesis(usize),
50 Key(usize, String),
51 DoubleQuoted(usize, String),
52 SingleQuoted(usize, String),
53 Equal(usize),
54 GreaterOrEqual(usize),
55 Greater(usize),
56 Little(usize),
57 LittleOrEqual(usize),
58 NotEqual(usize),
59 And(usize),
60 Or(usize),
61 Whitespace(usize, usize),
62}
63
64impl Token {
65 pub fn is_match_token_type(&self, other: Token) -> bool {
66 match self {
67 Token::Absolute(_) => matches!(other, Token::Absolute(_)),
68 Token::Dot(_) => matches!(other, Token::Dot(_)),
69 Token::At(_) => matches!(other, Token::At(_)),
70 Token::OpenArray(_) => matches!(other, Token::OpenArray(_)),
71 Token::CloseArray(_) => matches!(other, Token::CloseArray(_)),
72 Token::Asterisk(_) => matches!(other, Token::Asterisk(_)),
73 Token::Question(_) => matches!(other, Token::Question(_)),
74 Token::Comma(_) => matches!(other, Token::Comma(_)),
75 Token::Split(_) => matches!(other, Token::Split(_)),
76 Token::OpenParenthesis(_) => matches!(other, Token::OpenParenthesis(_)),
77 Token::CloseParenthesis(_) => matches!(other, Token::CloseParenthesis(_)),
78 Token::Key(_, _) => matches!(other, Token::Key(_, _)),
79 Token::DoubleQuoted(_, _) => matches!(other, Token::DoubleQuoted(_, _)),
80 Token::SingleQuoted(_, _) => matches!(other, Token::SingleQuoted(_, _)),
81 Token::Equal(_) => matches!(other, Token::Equal(_)),
82 Token::GreaterOrEqual(_) => matches!(other, Token::GreaterOrEqual(_)),
83 Token::Greater(_) => matches!(other, Token::Greater(_)),
84 Token::Little(_) => matches!(other, Token::Little(_)),
85 Token::LittleOrEqual(_) => matches!(other, Token::LittleOrEqual(_)),
86 Token::NotEqual(_) => matches!(other, Token::NotEqual(_)),
87 Token::And(_) => matches!(other, Token::And(_)),
88 Token::Or(_) => matches!(other, Token::Or(_)),
89 Token::Whitespace(_, _) => matches!(other, Token::Whitespace(_, _)),
90 }
91 }
92}
93
94pub struct Tokenizer<'a> {
95 input: PathReader<'a>,
96}
97
98impl<'a> Tokenizer<'a> {
99 pub fn new(input: &'a str) -> Self {
100 trace!("input: {}", input);
101 Tokenizer {
102 input: PathReader::new(input),
103 }
104 }
105
106 fn dolla(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
107 let fun = |c: &char| match c {
108 &CH_DOT
109 | &CH_ASTERISK
110 | &CH_LARRAY
111 | &CH_RARRAY
112 | &CH_LPAREN
113 | &CH_RPAREN
114 | &CH_AT
115 | &CH_QUESTION
116 | &CH_COMMA
117 | &CH_SEMICOLON
118 | &CH_LITTLE
119 | &CH_GREATER
120 | &CH_EQUAL
121 | &CH_AMPERSAND
122 | &CH_PIPE
123 | &CH_EXCLAMATION
124 => false,
125 _ => !c.is_whitespace(),
126 };
127 let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?;
128 vec.insert(0, ch);
129
130 if vec.len() == 1 {
131 Ok(Token::Absolute(pos))
132 } else {
133 Ok(Token::Key(pos, vec))
134 }
135 }
136
137 fn quote(&mut self, ch: char) -> Result<String, TokenError> {
138 let (_, mut val) = self
139 .input
140 .take_while(|c| *c != ch)
141 .map_err(to_token_error)?;
142
143 if let Some('\\') = val.chars().last() {
144 self.input.next_char().map_err(to_token_error)?;
145 let _ = val.pop();
146 let (_, val_remain) = self
147 .input
148 .take_while(|c| *c != ch)
149 .map_err(to_token_error)?;
150 self.input.next_char().map_err(to_token_error)?;
151 val.push(ch);
152 val.push_str(val_remain.as_str());
153 } else {
154 self.input.next_char().map_err(to_token_error)?;
155 }
156
157 Ok(val)
158 }
159
160 fn single_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
161 let val = self.quote(ch)?;
162 Ok(Token::SingleQuoted(pos, val))
163 }
164
165 fn double_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
166 let val = self.quote(ch)?;
167 Ok(Token::DoubleQuoted(pos, val))
168 }
169
170 fn equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
171 let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
172 match ch {
173 CH_EQUAL => {
174 self.input.next_char().map_err(to_token_error)?;
175 Ok(Token::Equal(pos))
176 }
177 _ => Err(TokenError::Position(pos)),
178 }
179 }
180
181 fn not_equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
182 let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
183 match ch {
184 CH_EQUAL => {
185 self.input.next_char().map_err(to_token_error)?;
186 Ok(Token::NotEqual(pos))
187 }
188 _ => Err(TokenError::Position(pos)),
189 }
190 }
191
192 fn little(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
193 let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
194 match ch {
195 CH_EQUAL => {
196 self.input.next_char().map_err(to_token_error)?;
197 Ok(Token::LittleOrEqual(pos))
198 }
199 _ => Ok(Token::Little(pos)),
200 }
201 }
202
203 fn greater(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
204 let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
205 match ch {
206 CH_EQUAL => {
207 self.input.next_char().map_err(to_token_error)?;
208 Ok(Token::GreaterOrEqual(pos))
209 }
210 _ => Ok(Token::Greater(pos)),
211 }
212 }
213
214 fn and(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
215 let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
216 match ch {
217 CH_AMPERSAND => {
218 let _ = self.input.next_char().map_err(to_token_error);
219 Ok(Token::And(pos))
220 }
221 _ => Err(TokenError::Position(pos)),
222 }
223 }
224
225 fn or(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
226 let (_, ch) = self.input.peek_char().map_err(to_token_error)?;
227 match ch {
228 CH_PIPE => {
229 self.input.next_char().map_err(to_token_error)?;
230 Ok(Token::Or(pos))
231 }
232 _ => Err(TokenError::Position(pos)),
233 }
234 }
235
236 fn whitespace(&mut self, pos: usize, _: char) -> Result<Token, TokenError> {
237 let (_, vec) = self
238 .input
239 .take_while(|c| c.is_whitespace())
240 .map_err(to_token_error)?;
241 Ok(Token::Whitespace(pos, vec.len()))
242 }
243
244 fn other(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> {
245 let fun = |c: &char| match c {
246 &CH_DOLLA
247 | &CH_DOT
248 | &CH_ASTERISK
249 | &CH_LARRAY
250 | &CH_RARRAY
251 | &CH_LPAREN
252 | &CH_RPAREN
253 | &CH_AT
254 | &CH_QUESTION
255 | &CH_COMMA
256 | &CH_SEMICOLON
257 | &CH_LITTLE
258 | &CH_GREATER
259 | &CH_EQUAL
260 | &CH_AMPERSAND
261 | &CH_PIPE
262 | &CH_EXCLAMATION
263 => false,
264 _ => !c.is_whitespace(),
265 };
266 let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?;
267 vec.insert(0, ch);
268 Ok(Token::Key(pos, vec))
269 }
270
271 pub fn next_token(&mut self) -> Result<Token, TokenError> {
272 let (pos, ch) = self.input.next_char().map_err(to_token_error)?;
273 match ch {
274 CH_DOLLA => self.dolla(pos, ch),
275 CH_DOT => Ok(Token::Dot(pos)),
276 CH_ASTERISK => Ok(Token::Asterisk(pos)),
277 CH_LARRAY => Ok(Token::OpenArray(pos)),
278 CH_RARRAY => Ok(Token::CloseArray(pos)),
279 CH_LPAREN => Ok(Token::OpenParenthesis(pos)),
280 CH_RPAREN => Ok(Token::CloseParenthesis(pos)),
281 CH_AT => Ok(Token::At(pos)),
282 CH_QUESTION => Ok(Token::Question(pos)),
283 CH_COMMA => Ok(Token::Comma(pos)),
284 CH_SEMICOLON => Ok(Token::Split(pos)),
285 CH_SINGLE_QUOTE => self.single_quote(pos, ch),
286 CH_DOUBLE_QUOTE => self.double_quote(pos, ch),
287 CH_EQUAL => self.equal(pos, ch),
288 CH_GREATER => self.greater(pos, ch),
289 CH_LITTLE => self.little(pos, ch),
290 CH_AMPERSAND => self.and(pos, ch),
291 CH_PIPE => self.or(pos, ch),
292 CH_EXCLAMATION => self.not_equal(pos, ch),
293 _ if ch.is_whitespace() => self.whitespace(pos, ch),
294 _ => self.other(pos, ch),
295 }
296 }
297
298 fn current_pos(&self) -> usize {
299 self.input.current_pos()
300 }
301}
302
303pub struct TokenReader<'a> {
304 origin_input: &'a str,
305 err: TokenError,
306 err_pos: usize,
307 tokens: Vec<(usize, Token)>,
308 curr_pos: Option<usize>,
309}
310
311impl<'a> TokenReader<'a> {
312 pub fn new(input: &'a str) -> Self {
313 let mut tokenizer = Tokenizer::new(input);
314 let mut tokens = vec![];
315 loop {
316 match tokenizer.next_token() {
317 Ok(t) => {
318 tokens.insert(0, (tokenizer.current_pos(), t));
319 }
320 Err(e) => {
321 return TokenReader {
322 origin_input: input,
323 err: e,
324 err_pos: tokenizer.current_pos(),
325 tokens,
326 curr_pos: None,
327 };
328 }
329 }
330 }
331 }
332
333 pub fn peek_token(&self) -> Result<&Token, TokenError> {
334 match self.tokens.last() {
335 Some((_, t)) => {
336 trace!("%{:?}", t);
337 Ok(t)
338 }
339 _ => {
340 trace!("%{:?}", self.err);
341 Err(self.err.clone())
342 }
343 }
344 }
345
346 pub fn next_token(&mut self) -> Result<Token, TokenError> {
347 match self.tokens.pop() {
348 Some((pos, t)) => {
349 self.curr_pos = Some(pos);
350 trace!("@{:?}", t);
351 Ok(t)
352 }
353 _ => {
354 trace!("@{:?}", self.err);
355 Err(self.err.clone())
356 }
357 }
358 }
359
360 pub fn err_msg_with_pos(&self, pos: usize) -> String {
361 format!("{}\n{}", self.origin_input, "^".repeat(pos))
362 }
363
364 pub fn err_msg(&self) -> String {
365 match self.curr_pos {
366 Some(pos) => self.err_msg_with_pos(pos),
367 _ => self.err_msg_with_pos(self.err_pos),
368 }
369 }
370}
371