1use std::str::FromStr;
2
3use crate::Error;
4
5/// Extension methods for XML-subset only operations.
6pub(crate) trait ByteExt {
7 /// Checks if a byte is a numeric sign.
8 fn is_sign(&self) -> bool;
9
10 /// Checks if a byte is a digit.
11 ///
12 /// `[0-9]`
13 fn is_digit(&self) -> bool;
14
15 /// Checks if a byte is a hex digit.
16 ///
17 /// `[0-9A-Fa-f]`
18 fn is_hex_digit(&self) -> bool;
19
20 /// Checks if a byte is a space.
21 ///
22 /// `[ \r\n\t]`
23 fn is_space(&self) -> bool;
24
25 fn is_quote(&self) -> bool;
26
27 /// Checks if a byte is an ASCII char.
28 ///
29 /// `[A-Za-z]`
30 fn is_letter(&self) -> bool;
31
32 /// Checks if a byte is an ASCII ident char.
33 fn is_ascii_ident(&self) -> bool;
34}
35
36impl ByteExt for u8 {
37 #[inline]
38 fn is_sign(&self) -> bool {
39 matches!(*self, b'+' | b'-')
40 }
41
42 #[inline]
43 fn is_digit(&self) -> bool {
44 matches!(*self, b'0'..=b'9')
45 }
46
47 #[inline]
48 fn is_hex_digit(&self) -> bool {
49 matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f')
50 }
51
52 #[inline]
53 fn is_space(&self) -> bool {
54 matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
55 }
56
57 #[inline]
58 fn is_quote(&self) -> bool {
59 matches!(*self, b'\'' | b'"')
60 }
61
62 #[inline]
63 fn is_letter(&self) -> bool {
64 matches!(*self, b'A'..=b'Z' | b'a'..=b'z')
65 }
66
67 #[inline]
68 fn is_ascii_ident(&self) -> bool {
69 matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'_')
70 }
71}
72
73trait CharExt {
74 fn is_name_start(&self) -> bool;
75 fn is_name_char(&self) -> bool;
76 fn is_non_ascii(&self) -> bool;
77 fn is_escape(&self) -> bool;
78}
79
80impl CharExt for char {
81 #[inline]
82 fn is_name_start(&self) -> bool {
83 match *self {
84 '_' | 'a'..='z' | 'A'..='Z' => true,
85 _ => self.is_non_ascii() || self.is_escape(),
86 }
87 }
88
89 #[inline]
90 fn is_name_char(&self) -> bool {
91 match *self {
92 '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' => true,
93 _ => self.is_non_ascii() || self.is_escape(),
94 }
95 }
96
97 #[inline]
98 fn is_non_ascii(&self) -> bool {
99 *self as u32 > 237
100 }
101
102 #[inline]
103 fn is_escape(&self) -> bool {
104 // TODO: this
105 false
106 }
107}
108
109/// A streaming text parsing interface.
110#[derive(Clone, Copy, PartialEq, Eq, Debug)]
111pub struct Stream<'a> {
112 text: &'a str,
113 pos: usize,
114}
115
116impl<'a> From<&'a str> for Stream<'a> {
117 #[inline]
118 fn from(text: &'a str) -> Self {
119 Stream { text, pos: 0 }
120 }
121}
122
123impl<'a> Stream<'a> {
124 /// Returns the current position in bytes.
125 #[inline]
126 pub fn pos(&self) -> usize {
127 self.pos
128 }
129
130 /// Calculates the current position in chars.
131 pub fn calc_char_pos(&self) -> usize {
132 self.calc_char_pos_at(self.pos)
133 }
134
135 /// Calculates the current position in chars.
136 pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
137 let mut pos = 1;
138 for (idx, _) in self.text.char_indices() {
139 if idx >= byte_pos {
140 break;
141 }
142
143 pos += 1;
144 }
145
146 pos
147 }
148
149 /// Sets current position equal to the end.
150 ///
151 /// Used to indicate end of parsing on error.
152 #[inline]
153 pub fn jump_to_end(&mut self) {
154 self.pos = self.text.len();
155 }
156
157 /// Checks if the stream is reached the end.
158 ///
159 /// Any [`pos()`] value larger than original text length indicates stream end.
160 ///
161 /// Accessing stream after reaching end via safe methods will produce
162 /// an `UnexpectedEndOfStream` error.
163 ///
164 /// Accessing stream after reaching end via *_unchecked methods will produce
165 /// a Rust's bound checking error.
166 ///
167 /// [`pos()`]: #method.pos
168 #[inline]
169 pub fn at_end(&self) -> bool {
170 self.pos >= self.text.len()
171 }
172
173 /// Returns a byte from a current stream position.
174 ///
175 /// # Errors
176 ///
177 /// - `UnexpectedEndOfStream`
178 #[inline]
179 pub fn curr_byte(&self) -> Result<u8, Error> {
180 if self.at_end() {
181 return Err(Error::UnexpectedEndOfStream);
182 }
183
184 Ok(self.curr_byte_unchecked())
185 }
186
187 #[inline]
188 pub fn chars(&self) -> std::str::Chars<'a> {
189 self.text[self.pos..].chars()
190 }
191
192 /// Returns a byte from a current stream position.
193 ///
194 /// # Panics
195 ///
196 /// - if the current position is after the end of the data
197 #[inline]
198 pub fn curr_byte_unchecked(&self) -> u8 {
199 self.text.as_bytes()[self.pos]
200 }
201
202 /// Checks that current byte is equal to provided.
203 ///
204 /// Returns `false` if no bytes left.
205 #[inline]
206 pub fn is_curr_byte_eq(&self, c: u8) -> bool {
207 if !self.at_end() {
208 self.curr_byte_unchecked() == c
209 } else {
210 false
211 }
212 }
213
214 /// Returns a next byte from a current stream position.
215 ///
216 /// # Errors
217 ///
218 /// - `UnexpectedEndOfStream`
219 #[inline]
220 pub fn next_byte(&self) -> Result<u8, Error> {
221 if self.pos + 1 >= self.text.len() {
222 return Err(Error::UnexpectedEndOfStream);
223 }
224
225 Ok(self.text.as_bytes()[self.pos + 1])
226 }
227
228 /// Advances by `n` bytes.
229 #[inline]
230 pub fn advance(&mut self, n: usize) {
231 debug_assert!(self.pos + n <= self.text.len());
232 self.pos += n;
233 }
234
235 /// Skips whitespaces.
236 ///
237 /// Accepted values: `' ' \n \r \t`.
238 pub fn skip_spaces(&mut self) {
239 while !self.at_end() && self.curr_byte_unchecked().is_space() {
240 self.advance(1);
241 }
242 }
243
244 /// Checks that the stream starts with a selected text.
245 ///
246 /// We are using `&[u8]` instead of `&str` for performance reasons.
247 #[inline]
248 pub fn starts_with(&self, text: &[u8]) -> bool {
249 self.text.as_bytes()[self.pos..].starts_with(text)
250 }
251
252 /// Consumes current byte if it's equal to the provided byte.
253 ///
254 /// # Errors
255 ///
256 /// - `InvalidChar`
257 /// - `UnexpectedEndOfStream`
258 pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
259 if self.curr_byte()? != c {
260 return Err(Error::InvalidChar(
261 vec![self.curr_byte_unchecked(), c],
262 self.calc_char_pos(),
263 ));
264 }
265
266 self.advance(1);
267 Ok(())
268 }
269
270 /// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
271 ///
272 /// # Errors
273 ///
274 /// - `InvalidIdent`
275 pub fn parse_ident(&mut self) -> Result<&'a str, Error> {
276 let start = self.pos();
277
278 if self.curr_byte() == Ok(b'-') {
279 self.advance(1);
280 }
281
282 let mut iter = self.chars();
283 if let Some(c) = iter.next() {
284 if c.is_name_start() {
285 self.advance(c.len_utf8());
286 } else {
287 return Err(Error::InvalidIdent);
288 }
289 }
290
291 for c in iter {
292 if c.is_name_char() {
293 self.advance(c.len_utf8());
294 } else {
295 break;
296 }
297 }
298
299 if start == self.pos() {
300 return Err(Error::InvalidIdent);
301 }
302
303 let name = self.slice_back(start);
304 Ok(name)
305 }
306
307 /// Consumes a single ident consisting of ASCII characters, if available.
308 pub fn consume_ascii_ident(&mut self) -> &'a str {
309 let start = self.pos;
310 self.skip_bytes(|_, c| c.is_ascii_ident());
311 self.slice_back(start)
312 }
313
314 /// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
315 ///
316 /// # Errors
317 ///
318 /// - `UnexpectedEndOfStream`
319 /// - `InvalidValue`
320 pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
321 // Check for opening quote.
322 let quote = self.curr_byte()?;
323
324 if quote != b'\'' && quote != b'"' {
325 return Err(Error::InvalidValue);
326 }
327
328 let mut prev = quote;
329 self.advance(1);
330
331 let start = self.pos();
332
333 while !self.at_end() {
334 let curr = self.curr_byte_unchecked();
335
336 // Advance until the closing quote.
337 if curr == quote {
338 // Check for escaped quote.
339 if prev != b'\\' {
340 break;
341 }
342 }
343
344 prev = curr;
345 self.advance(1);
346 }
347
348 let value = self.slice_back(start);
349
350 // Check for closing quote.
351 self.consume_byte(quote)?;
352
353 Ok(value)
354 }
355
356 /// Consumes selected string.
357 ///
358 /// # Errors
359 ///
360 /// - `InvalidChar`
361 /// - `UnexpectedEndOfStream`
362 pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
363 if self.at_end() {
364 return Err(Error::UnexpectedEndOfStream);
365 }
366
367 if !self.starts_with(text) {
368 let len = std::cmp::min(text.len(), self.text.len() - self.pos);
369 // Collect chars and do not slice a string,
370 // because the `len` can be on the char boundary.
371 // Which lead to a panic.
372 let actual = self.text[self.pos..].chars().take(len).collect();
373
374 // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
375 let expected = std::str::from_utf8(text).unwrap().to_owned();
376
377 return Err(Error::InvalidString(
378 vec![actual, expected],
379 self.calc_char_pos(),
380 ));
381 }
382
383 self.advance(text.len());
384 Ok(())
385 }
386
387 /// Consumes bytes by the predicate and returns them.
388 ///
389 /// The result can be empty.
390 pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
391 where
392 F: Fn(&Stream, u8) -> bool,
393 {
394 let start = self.pos();
395 self.skip_bytes(f);
396 self.slice_back(start)
397 }
398
399 /// Consumes bytes by the predicate.
400 pub fn skip_bytes<F>(&mut self, f: F)
401 where
402 F: Fn(&Stream, u8) -> bool,
403 {
404 while !self.at_end() {
405 let c = self.curr_byte_unchecked();
406 if f(self, c) {
407 self.advance(1);
408 } else {
409 break;
410 }
411 }
412 }
413
414 /// Slices data from `pos` to the current position.
415 #[inline]
416 pub fn slice_back(&self, pos: usize) -> &'a str {
417 &self.text[pos..self.pos]
418 }
419
420 /// Slices data from the current position to the end.
421 #[inline]
422 pub fn slice_tail(&self) -> &'a str {
423 &self.text[self.pos..]
424 }
425
426 /// Parses integer number from the stream.
427 ///
428 /// Same as [`parse_number()`], but only for integer. Does not refer to any SVG type.
429 ///
430 /// [`parse_number()`]: #method.parse_number
431 pub fn parse_integer(&mut self) -> Result<i32, Error> {
432 self.skip_spaces();
433
434 if self.at_end() {
435 return Err(Error::InvalidNumber(self.calc_char_pos()));
436 }
437
438 let start = self.pos();
439
440 // Consume sign.
441 if self.curr_byte()?.is_sign() {
442 self.advance(1);
443 }
444
445 // The current char must be a digit.
446 if !self.curr_byte()?.is_digit() {
447 return Err(Error::InvalidNumber(self.calc_char_pos_at(start)));
448 }
449
450 self.skip_digits();
451
452 // Use the default i32 parser now.
453 let s = self.slice_back(start);
454 match i32::from_str(s) {
455 Ok(n) => Ok(n),
456 Err(_) => Err(Error::InvalidNumber(self.calc_char_pos_at(start))),
457 }
458 }
459
460 /// Parses integer from a list of numbers.
461 pub fn parse_list_integer(&mut self) -> Result<i32, Error> {
462 if self.at_end() {
463 return Err(Error::UnexpectedEndOfStream);
464 }
465
466 let n = self.parse_integer()?;
467 self.skip_spaces();
468 self.parse_list_separator();
469 Ok(n)
470 }
471
472 /// Parses number or percent from the stream.
473 ///
474 /// Percent value will be normalized.
475 pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
476 self.skip_spaces();
477
478 let n = self.parse_number()?;
479 if self.starts_with(b"%") {
480 self.advance(1);
481 Ok(n / 100.0)
482 } else {
483 Ok(n)
484 }
485 }
486
487 /// Parses number or percent from a list of numbers and/or percents.
488 pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
489 if self.at_end() {
490 return Err(Error::UnexpectedEndOfStream);
491 }
492
493 let l = self.parse_number_or_percent()?;
494 self.skip_spaces();
495 self.parse_list_separator();
496 Ok(l)
497 }
498
499 /// Skips digits.
500 pub fn skip_digits(&mut self) {
501 self.skip_bytes(|_, c| c.is_digit());
502 }
503
504 #[inline]
505 pub(crate) fn parse_list_separator(&mut self) {
506 if self.is_curr_byte_eq(b',') {
507 self.advance(1);
508 }
509 }
510}
511
512#[rustfmt::skip]
513#[cfg(test)]
514mod tests {
515 use super::*;
516
517 #[test]
518 fn parse_integer_1() {
519 let mut s = Stream::from("10");
520 assert_eq!(s.parse_integer().unwrap(), 10);
521 }
522
523 #[test]
524 fn parse_err_integer_1() {
525 // error because of overflow
526 let mut s = Stream::from("10000000000000");
527 assert_eq!(s.parse_integer().unwrap_err().to_string(),
528 "invalid number at position 1");
529 }
530}
531