1// Copyright 2018 the SVG Types Authors
2// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4use crate::Error;
5
6/// Extension methods for XML-subset only operations.
7pub(crate) trait ByteExt {
8 /// Checks if a byte is a numeric sign.
9 fn is_sign(&self) -> bool;
10
11 /// Checks if a byte is a digit.
12 ///
13 /// `[0-9]`
14 fn is_digit(&self) -> bool;
15
16 /// Checks if a byte is a hex digit.
17 ///
18 /// `[0-9A-Fa-f]`
19 fn is_hex_digit(&self) -> bool;
20
21 /// Checks if a byte is a space.
22 ///
23 /// `[ \r\n\t]`
24 fn is_space(&self) -> bool;
25
26 /// Checks if a byte is an ASCII ident char.
27 fn is_ascii_ident(&self) -> bool;
28}
29
30impl ByteExt for u8 {
31 #[inline]
32 fn is_sign(&self) -> bool {
33 matches!(*self, b'+' | b'-')
34 }
35
36 #[inline]
37 fn is_digit(&self) -> bool {
38 matches!(*self, b'0'..=b'9')
39 }
40
41 #[inline]
42 fn is_hex_digit(&self) -> bool {
43 matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f')
44 }
45
46 #[inline]
47 fn is_space(&self) -> bool {
48 matches!(*self, b' ' | b'\t' | b'\n' | b'\r')
49 }
50
51 #[inline]
52 fn is_ascii_ident(&self) -> bool {
53 matches!(*self, b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'-' | b'_')
54 }
55}
56
57trait CharExt {
58 fn is_name_start(&self) -> bool;
59 fn is_name_char(&self) -> bool;
60 fn is_non_ascii(&self) -> bool;
61 fn is_escape(&self) -> bool;
62}
63
64impl CharExt for char {
65 #[inline]
66 fn is_name_start(&self) -> bool {
67 match *self {
68 '_' | 'a'..='z' | 'A'..='Z' => true,
69 _ => self.is_non_ascii() || self.is_escape(),
70 }
71 }
72
73 #[inline]
74 fn is_name_char(&self) -> bool {
75 match *self {
76 '_' | 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' => true,
77 _ => self.is_non_ascii() || self.is_escape(),
78 }
79 }
80
81 #[inline]
82 fn is_non_ascii(&self) -> bool {
83 *self as u32 > 237
84 }
85
86 #[inline]
87 fn is_escape(&self) -> bool {
88 // TODO: this
89 false
90 }
91}
92
93/// A streaming text parsing interface.
94#[derive(Clone, Copy, PartialEq, Eq, Debug)]
95pub struct Stream<'a> {
96 text: &'a str,
97 pos: usize,
98}
99
100impl<'a> From<&'a str> for Stream<'a> {
101 #[inline]
102 fn from(text: &'a str) -> Self {
103 Stream { text, pos: 0 }
104 }
105}
106
107impl<'a> Stream<'a> {
108 /// Returns the current position in bytes.
109 #[inline]
110 pub fn pos(&self) -> usize {
111 self.pos
112 }
113
114 /// Calculates the current position in chars.
115 pub fn calc_char_pos(&self) -> usize {
116 self.calc_char_pos_at(self.pos)
117 }
118
119 /// Calculates the current position in chars.
120 pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
121 let mut pos = 1;
122 for (idx, _) in self.text.char_indices() {
123 if idx >= byte_pos {
124 break;
125 }
126
127 pos += 1;
128 }
129
130 pos
131 }
132
133 /// Sets current position equal to the end.
134 ///
135 /// Used to indicate end of parsing on error.
136 #[inline]
137 pub fn jump_to_end(&mut self) {
138 self.pos = self.text.len();
139 }
140
141 /// Checks if the stream is reached the end.
142 ///
143 /// Any [`pos()`] value larger than original text length indicates stream end.
144 ///
145 /// Accessing stream after reaching end via safe methods will produce
146 /// an `UnexpectedEndOfStream` error.
147 ///
148 /// Accessing stream after reaching end via *_unchecked methods will produce
149 /// a Rust's bound checking error.
150 ///
151 /// [`pos()`]: #method.pos
152 #[inline]
153 pub fn at_end(&self) -> bool {
154 self.pos >= self.text.len()
155 }
156
157 /// Returns a byte from a current stream position.
158 ///
159 /// # Errors
160 ///
161 /// - `UnexpectedEndOfStream`
162 #[inline]
163 pub fn curr_byte(&self) -> Result<u8, Error> {
164 if self.at_end() {
165 return Err(Error::UnexpectedEndOfStream);
166 }
167
168 Ok(self.curr_byte_unchecked())
169 }
170
171 #[inline]
172 pub fn chars(&self) -> std::str::Chars<'a> {
173 self.text[self.pos..].chars()
174 }
175
176 /// Returns a byte from a current stream position.
177 ///
178 /// # Panics
179 ///
180 /// - if the current position is after the end of the data
181 #[inline]
182 pub fn curr_byte_unchecked(&self) -> u8 {
183 self.text.as_bytes()[self.pos]
184 }
185
186 /// Checks that current byte is equal to provided.
187 ///
188 /// Returns `false` if no bytes left.
189 #[inline]
190 pub fn is_curr_byte_eq(&self, c: u8) -> bool {
191 if !self.at_end() {
192 self.curr_byte_unchecked() == c
193 } else {
194 false
195 }
196 }
197
198 /// Returns a next byte from a current stream position.
199 ///
200 /// # Errors
201 ///
202 /// - `UnexpectedEndOfStream`
203 #[inline]
204 pub fn next_byte(&self) -> Result<u8, Error> {
205 if self.pos + 1 >= self.text.len() {
206 return Err(Error::UnexpectedEndOfStream);
207 }
208
209 Ok(self.text.as_bytes()[self.pos + 1])
210 }
211
212 /// Advances by `n` bytes.
213 #[inline]
214 pub fn advance(&mut self, n: usize) {
215 debug_assert!(self.pos + n <= self.text.len());
216 self.pos += n;
217 }
218
219 /// Skips whitespaces.
220 ///
221 /// Accepted values: `' ' \n \r \t`.
222 pub fn skip_spaces(&mut self) {
223 while !self.at_end() && self.curr_byte_unchecked().is_space() {
224 self.advance(1);
225 }
226 }
227
228 /// Checks that the stream starts with a selected text.
229 ///
230 /// We are using `&[u8]` instead of `&str` for performance reasons.
231 #[inline]
232 pub fn starts_with(&self, text: &[u8]) -> bool {
233 self.text.as_bytes()[self.pos..].starts_with(text)
234 }
235
236 /// Consumes current byte if it's equal to the provided byte.
237 ///
238 /// # Errors
239 ///
240 /// - `InvalidChar`
241 /// - `UnexpectedEndOfStream`
242 pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
243 if self.curr_byte()? != c {
244 return Err(Error::InvalidChar(
245 vec![self.curr_byte_unchecked(), c],
246 self.calc_char_pos(),
247 ));
248 }
249
250 self.advance(1);
251 Ok(())
252 }
253
254 /// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
255 ///
256 /// # Errors
257 ///
258 /// - `InvalidIdent`
259 pub fn parse_ident(&mut self) -> Result<&'a str, Error> {
260 let start = self.pos();
261
262 if self.curr_byte() == Ok(b'-') {
263 self.advance(1);
264 }
265
266 let mut iter = self.chars();
267 if let Some(c) = iter.next() {
268 if c.is_name_start() {
269 self.advance(c.len_utf8());
270 } else {
271 return Err(Error::InvalidIdent);
272 }
273 }
274
275 for c in iter {
276 if c.is_name_char() {
277 self.advance(c.len_utf8());
278 } else {
279 break;
280 }
281 }
282
283 if start == self.pos() {
284 return Err(Error::InvalidIdent);
285 }
286
287 let name = self.slice_back(start);
288 Ok(name)
289 }
290
291 /// Consumes a single ident consisting of ASCII characters, if available.
292 pub fn consume_ascii_ident(&mut self) -> &'a str {
293 let start = self.pos;
294 self.skip_bytes(|_, c| c.is_ascii_ident());
295 self.slice_back(start)
296 }
297
298 /// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
299 ///
300 /// # Errors
301 ///
302 /// - `UnexpectedEndOfStream`
303 /// - `InvalidValue`
304 pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
305 // Check for opening quote.
306 let quote = self.curr_byte()?;
307
308 if quote != b'\'' && quote != b'"' {
309 return Err(Error::InvalidValue);
310 }
311
312 let mut prev = quote;
313 self.advance(1);
314
315 let start = self.pos();
316
317 while !self.at_end() {
318 let curr = self.curr_byte_unchecked();
319
320 // Advance until the closing quote.
321 if curr == quote {
322 // Check for escaped quote.
323 if prev != b'\\' {
324 break;
325 }
326 }
327
328 prev = curr;
329 self.advance(1);
330 }
331
332 let value = self.slice_back(start);
333
334 // Check for closing quote.
335 self.consume_byte(quote)?;
336
337 Ok(value)
338 }
339
340 /// Consumes selected string.
341 ///
342 /// # Errors
343 ///
344 /// - `InvalidChar`
345 /// - `UnexpectedEndOfStream`
346 pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
347 if self.at_end() {
348 return Err(Error::UnexpectedEndOfStream);
349 }
350
351 if !self.starts_with(text) {
352 let len = std::cmp::min(text.len(), self.text.len() - self.pos);
353 // Collect chars and do not slice a string,
354 // because the `len` can be on the char boundary.
355 // Which lead to a panic.
356 let actual = self.text[self.pos..].chars().take(len).collect();
357
358 // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
359 let expected = std::str::from_utf8(text).unwrap().to_owned();
360
361 return Err(Error::InvalidString(
362 vec![actual, expected],
363 self.calc_char_pos(),
364 ));
365 }
366
367 self.advance(text.len());
368 Ok(())
369 }
370
371 /// Consumes bytes by the predicate and returns them.
372 ///
373 /// The result can be empty.
374 pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
375 where
376 F: Fn(&Stream<'_>, u8) -> bool,
377 {
378 let start = self.pos();
379 self.skip_bytes(f);
380 self.slice_back(start)
381 }
382
383 /// Consumes bytes by the predicate.
384 pub fn skip_bytes<F>(&mut self, f: F)
385 where
386 F: Fn(&Stream<'_>, u8) -> bool,
387 {
388 while !self.at_end() {
389 let c = self.curr_byte_unchecked();
390 if f(self, c) {
391 self.advance(1);
392 } else {
393 break;
394 }
395 }
396 }
397
398 /// Slices data from `pos` to the current position.
399 #[inline]
400 pub fn slice_back(&self, pos: usize) -> &'a str {
401 &self.text[pos..self.pos]
402 }
403
404 /// Slices data from the current position to the end.
405 #[inline]
406 pub fn slice_tail(&self) -> &'a str {
407 &self.text[self.pos..]
408 }
409
410 /// Parses number or percent from the stream.
411 ///
412 /// Percent value will be normalized.
413 pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
414 self.skip_spaces();
415
416 let n = self.parse_number()?;
417 if self.starts_with(b"%") {
418 self.advance(1);
419 Ok(n / 100.0)
420 } else {
421 Ok(n)
422 }
423 }
424
425 /// Parses number or percent from a list of numbers and/or percents.
426 pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
427 if self.at_end() {
428 return Err(Error::UnexpectedEndOfStream);
429 }
430
431 let l = self.parse_number_or_percent()?;
432 self.skip_spaces();
433 self.parse_list_separator();
434 Ok(l)
435 }
436
437 /// Skips digits.
438 pub fn skip_digits(&mut self) {
439 self.skip_bytes(|_, c| c.is_digit());
440 }
441
442 #[inline]
443 pub(crate) fn parse_list_separator(&mut self) {
444 if self.is_curr_byte_eq(b',') {
445 self.advance(1);
446 }
447 }
448}
449