1 | use std::result::Result; |
2 | |
3 | use super::path_reader::{PathReader, ReaderError}; |
4 | |
5 | const CH_DOLLA: char = '$' ; |
6 | const CH_DOT: char = '.' ; |
7 | const CH_ASTERISK: char = '*' ; |
8 | const CH_LARRAY: char = '[' ; |
9 | const CH_RARRAY: char = ']' ; |
10 | const CH_LPAREN: char = '(' ; |
11 | const CH_RPAREN: char = ')' ; |
12 | const CH_AT: char = '@' ; |
13 | const CH_QUESTION: char = '?' ; |
14 | const CH_COMMA: char = ',' ; |
15 | const CH_SEMICOLON: char = ':' ; |
16 | const CH_EQUAL: char = '=' ; |
17 | const CH_AMPERSAND: char = '&' ; |
18 | const CH_PIPE: char = '|' ; |
19 | const CH_LITTLE: char = '<' ; |
20 | const CH_GREATER: char = '>' ; |
21 | const CH_EXCLAMATION: char = '!' ; |
22 | const CH_SINGLE_QUOTE: char = ' \'' ; |
23 | const CH_DOUBLE_QUOTE: char = '"' ; |
24 | |
25 | #[derive (Debug, Clone, PartialEq)] |
26 | pub enum TokenError { |
27 | Eof, |
28 | Position(usize), |
29 | } |
30 | |
31 | fn to_token_error(read_err: ReaderError) -> TokenError { |
32 | match read_err { |
33 | ReaderError::Eof => TokenError::Eof, |
34 | } |
35 | } |
36 | |
37 | #[derive (Debug, PartialEq)] |
38 | pub enum Token { |
39 | Absolute(usize), |
40 | Dot(usize), |
41 | At(usize), |
42 | OpenArray(usize), |
43 | CloseArray(usize), |
44 | Asterisk(usize), |
45 | Question(usize), |
46 | Comma(usize), |
47 | Split(usize), |
48 | OpenParenthesis(usize), |
49 | CloseParenthesis(usize), |
50 | Key(usize, String), |
51 | DoubleQuoted(usize, String), |
52 | SingleQuoted(usize, String), |
53 | Equal(usize), |
54 | GreaterOrEqual(usize), |
55 | Greater(usize), |
56 | Little(usize), |
57 | LittleOrEqual(usize), |
58 | NotEqual(usize), |
59 | And(usize), |
60 | Or(usize), |
61 | Whitespace(usize, usize), |
62 | } |
63 | |
64 | impl Token { |
65 | pub fn is_match_token_type(&self, other: Token) -> bool { |
66 | match self { |
67 | Token::Absolute(_) => matches!(other, Token::Absolute(_)), |
68 | Token::Dot(_) => matches!(other, Token::Dot(_)), |
69 | Token::At(_) => matches!(other, Token::At(_)), |
70 | Token::OpenArray(_) => matches!(other, Token::OpenArray(_)), |
71 | Token::CloseArray(_) => matches!(other, Token::CloseArray(_)), |
72 | Token::Asterisk(_) => matches!(other, Token::Asterisk(_)), |
73 | Token::Question(_) => matches!(other, Token::Question(_)), |
74 | Token::Comma(_) => matches!(other, Token::Comma(_)), |
75 | Token::Split(_) => matches!(other, Token::Split(_)), |
76 | Token::OpenParenthesis(_) => matches!(other, Token::OpenParenthesis(_)), |
77 | Token::CloseParenthesis(_) => matches!(other, Token::CloseParenthesis(_)), |
78 | Token::Key(_, _) => matches!(other, Token::Key(_, _)), |
79 | Token::DoubleQuoted(_, _) => matches!(other, Token::DoubleQuoted(_, _)), |
80 | Token::SingleQuoted(_, _) => matches!(other, Token::SingleQuoted(_, _)), |
81 | Token::Equal(_) => matches!(other, Token::Equal(_)), |
82 | Token::GreaterOrEqual(_) => matches!(other, Token::GreaterOrEqual(_)), |
83 | Token::Greater(_) => matches!(other, Token::Greater(_)), |
84 | Token::Little(_) => matches!(other, Token::Little(_)), |
85 | Token::LittleOrEqual(_) => matches!(other, Token::LittleOrEqual(_)), |
86 | Token::NotEqual(_) => matches!(other, Token::NotEqual(_)), |
87 | Token::And(_) => matches!(other, Token::And(_)), |
88 | Token::Or(_) => matches!(other, Token::Or(_)), |
89 | Token::Whitespace(_, _) => matches!(other, Token::Whitespace(_, _)), |
90 | } |
91 | } |
92 | } |
93 | |
94 | pub struct Tokenizer<'a> { |
95 | input: PathReader<'a>, |
96 | } |
97 | |
98 | impl<'a> Tokenizer<'a> { |
99 | pub fn new(input: &'a str) -> Self { |
100 | trace!("input: {}" , input); |
101 | Tokenizer { |
102 | input: PathReader::new(input), |
103 | } |
104 | } |
105 | |
106 | fn dolla(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { |
107 | let fun = |c: &char| match c { |
108 | &CH_DOT |
109 | | &CH_ASTERISK |
110 | | &CH_LARRAY |
111 | | &CH_RARRAY |
112 | | &CH_LPAREN |
113 | | &CH_RPAREN |
114 | | &CH_AT |
115 | | &CH_QUESTION |
116 | | &CH_COMMA |
117 | | &CH_SEMICOLON |
118 | | &CH_LITTLE |
119 | | &CH_GREATER |
120 | | &CH_EQUAL |
121 | | &CH_AMPERSAND |
122 | | &CH_PIPE |
123 | | &CH_EXCLAMATION |
124 | => false, |
125 | _ => !c.is_whitespace(), |
126 | }; |
127 | let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?; |
128 | vec.insert(0, ch); |
129 | |
130 | if vec.len() == 1 { |
131 | Ok(Token::Absolute(pos)) |
132 | } else { |
133 | Ok(Token::Key(pos, vec)) |
134 | } |
135 | } |
136 | |
137 | fn quote(&mut self, ch: char) -> Result<String, TokenError> { |
138 | let (_, mut val) = self |
139 | .input |
140 | .take_while(|c| *c != ch) |
141 | .map_err(to_token_error)?; |
142 | |
143 | if let Some(' \\' ) = val.chars().last() { |
144 | self.input.next_char().map_err(to_token_error)?; |
145 | let _ = val.pop(); |
146 | let (_, val_remain) = self |
147 | .input |
148 | .take_while(|c| *c != ch) |
149 | .map_err(to_token_error)?; |
150 | self.input.next_char().map_err(to_token_error)?; |
151 | val.push(ch); |
152 | val.push_str(val_remain.as_str()); |
153 | } else { |
154 | self.input.next_char().map_err(to_token_error)?; |
155 | } |
156 | |
157 | Ok(val) |
158 | } |
159 | |
160 | fn single_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { |
161 | let val = self.quote(ch)?; |
162 | Ok(Token::SingleQuoted(pos, val)) |
163 | } |
164 | |
165 | fn double_quote(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { |
166 | let val = self.quote(ch)?; |
167 | Ok(Token::DoubleQuoted(pos, val)) |
168 | } |
169 | |
170 | fn equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
171 | let (_, ch) = self.input.peek_char().map_err(to_token_error)?; |
172 | match ch { |
173 | CH_EQUAL => { |
174 | self.input.next_char().map_err(to_token_error)?; |
175 | Ok(Token::Equal(pos)) |
176 | } |
177 | _ => Err(TokenError::Position(pos)), |
178 | } |
179 | } |
180 | |
181 | fn not_equal(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
182 | let (_, ch) = self.input.peek_char().map_err(to_token_error)?; |
183 | match ch { |
184 | CH_EQUAL => { |
185 | self.input.next_char().map_err(to_token_error)?; |
186 | Ok(Token::NotEqual(pos)) |
187 | } |
188 | _ => Err(TokenError::Position(pos)), |
189 | } |
190 | } |
191 | |
192 | fn little(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
193 | let (_, ch) = self.input.peek_char().map_err(to_token_error)?; |
194 | match ch { |
195 | CH_EQUAL => { |
196 | self.input.next_char().map_err(to_token_error)?; |
197 | Ok(Token::LittleOrEqual(pos)) |
198 | } |
199 | _ => Ok(Token::Little(pos)), |
200 | } |
201 | } |
202 | |
203 | fn greater(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
204 | let (_, ch) = self.input.peek_char().map_err(to_token_error)?; |
205 | match ch { |
206 | CH_EQUAL => { |
207 | self.input.next_char().map_err(to_token_error)?; |
208 | Ok(Token::GreaterOrEqual(pos)) |
209 | } |
210 | _ => Ok(Token::Greater(pos)), |
211 | } |
212 | } |
213 | |
214 | fn and(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
215 | let (_, ch) = self.input.peek_char().map_err(to_token_error)?; |
216 | match ch { |
217 | CH_AMPERSAND => { |
218 | let _ = self.input.next_char().map_err(to_token_error); |
219 | Ok(Token::And(pos)) |
220 | } |
221 | _ => Err(TokenError::Position(pos)), |
222 | } |
223 | } |
224 | |
225 | fn or(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
226 | let (_, ch) = self.input.peek_char().map_err(to_token_error)?; |
227 | match ch { |
228 | CH_PIPE => { |
229 | self.input.next_char().map_err(to_token_error)?; |
230 | Ok(Token::Or(pos)) |
231 | } |
232 | _ => Err(TokenError::Position(pos)), |
233 | } |
234 | } |
235 | |
236 | fn whitespace(&mut self, pos: usize, _: char) -> Result<Token, TokenError> { |
237 | let (_, vec) = self |
238 | .input |
239 | .take_while(|c| c.is_whitespace()) |
240 | .map_err(to_token_error)?; |
241 | Ok(Token::Whitespace(pos, vec.len())) |
242 | } |
243 | |
244 | fn other(&mut self, pos: usize, ch: char) -> Result<Token, TokenError> { |
245 | let fun = |c: &char| match c { |
246 | &CH_DOLLA |
247 | | &CH_DOT |
248 | | &CH_ASTERISK |
249 | | &CH_LARRAY |
250 | | &CH_RARRAY |
251 | | &CH_LPAREN |
252 | | &CH_RPAREN |
253 | | &CH_AT |
254 | | &CH_QUESTION |
255 | | &CH_COMMA |
256 | | &CH_SEMICOLON |
257 | | &CH_LITTLE |
258 | | &CH_GREATER |
259 | | &CH_EQUAL |
260 | | &CH_AMPERSAND |
261 | | &CH_PIPE |
262 | | &CH_EXCLAMATION |
263 | => false, |
264 | _ => !c.is_whitespace(), |
265 | }; |
266 | let (_, mut vec) = self.input.take_while(fun).map_err(to_token_error)?; |
267 | vec.insert(0, ch); |
268 | Ok(Token::Key(pos, vec)) |
269 | } |
270 | |
271 | pub fn next_token(&mut self) -> Result<Token, TokenError> { |
272 | let (pos, ch) = self.input.next_char().map_err(to_token_error)?; |
273 | match ch { |
274 | CH_DOLLA => self.dolla(pos, ch), |
275 | CH_DOT => Ok(Token::Dot(pos)), |
276 | CH_ASTERISK => Ok(Token::Asterisk(pos)), |
277 | CH_LARRAY => Ok(Token::OpenArray(pos)), |
278 | CH_RARRAY => Ok(Token::CloseArray(pos)), |
279 | CH_LPAREN => Ok(Token::OpenParenthesis(pos)), |
280 | CH_RPAREN => Ok(Token::CloseParenthesis(pos)), |
281 | CH_AT => Ok(Token::At(pos)), |
282 | CH_QUESTION => Ok(Token::Question(pos)), |
283 | CH_COMMA => Ok(Token::Comma(pos)), |
284 | CH_SEMICOLON => Ok(Token::Split(pos)), |
285 | CH_SINGLE_QUOTE => self.single_quote(pos, ch), |
286 | CH_DOUBLE_QUOTE => self.double_quote(pos, ch), |
287 | CH_EQUAL => self.equal(pos, ch), |
288 | CH_GREATER => self.greater(pos, ch), |
289 | CH_LITTLE => self.little(pos, ch), |
290 | CH_AMPERSAND => self.and(pos, ch), |
291 | CH_PIPE => self.or(pos, ch), |
292 | CH_EXCLAMATION => self.not_equal(pos, ch), |
293 | _ if ch.is_whitespace() => self.whitespace(pos, ch), |
294 | _ => self.other(pos, ch), |
295 | } |
296 | } |
297 | |
298 | fn current_pos(&self) -> usize { |
299 | self.input.current_pos() |
300 | } |
301 | } |
302 | |
303 | pub struct TokenReader<'a> { |
304 | origin_input: &'a str, |
305 | err: TokenError, |
306 | err_pos: usize, |
307 | tokens: Vec<(usize, Token)>, |
308 | curr_pos: Option<usize>, |
309 | } |
310 | |
311 | impl<'a> TokenReader<'a> { |
312 | pub fn new(input: &'a str) -> Self { |
313 | let mut tokenizer = Tokenizer::new(input); |
314 | let mut tokens = vec![]; |
315 | loop { |
316 | match tokenizer.next_token() { |
317 | Ok(t) => { |
318 | tokens.insert(0, (tokenizer.current_pos(), t)); |
319 | } |
320 | Err(e) => { |
321 | return TokenReader { |
322 | origin_input: input, |
323 | err: e, |
324 | err_pos: tokenizer.current_pos(), |
325 | tokens, |
326 | curr_pos: None, |
327 | }; |
328 | } |
329 | } |
330 | } |
331 | } |
332 | |
333 | pub fn peek_token(&self) -> Result<&Token, TokenError> { |
334 | match self.tokens.last() { |
335 | Some((_, t)) => { |
336 | trace!("% {:?}" , t); |
337 | Ok(t) |
338 | } |
339 | _ => { |
340 | trace!("% {:?}" , self.err); |
341 | Err(self.err.clone()) |
342 | } |
343 | } |
344 | } |
345 | |
346 | pub fn next_token(&mut self) -> Result<Token, TokenError> { |
347 | match self.tokens.pop() { |
348 | Some((pos, t)) => { |
349 | self.curr_pos = Some(pos); |
350 | trace!("@ {:?}" , t); |
351 | Ok(t) |
352 | } |
353 | _ => { |
354 | trace!("@ {:?}" , self.err); |
355 | Err(self.err.clone()) |
356 | } |
357 | } |
358 | } |
359 | |
360 | pub fn err_msg_with_pos(&self, pos: usize) -> String { |
361 | format!(" {}\n{}" , self.origin_input, "^" .repeat(pos)) |
362 | } |
363 | |
364 | pub fn err_msg(&self) -> String { |
365 | match self.curr_pos { |
366 | Some(pos) => self.err_msg_with_pos(pos), |
367 | _ => self.err_msg_with_pos(self.err_pos), |
368 | } |
369 | } |
370 | } |
371 | |