1 | // Copyright 2018 the SVG Types Authors |
2 | // SPDX-License-Identifier: Apache-2.0 OR MIT |
3 | |
4 | use crate::Error; |
5 | |
6 | /// Extension methods for XML-subset only operations. |
7 | pub(crate) trait ByteExt { |
8 | /// Checks if a byte is a numeric sign. |
9 | fn is_sign(&self) -> bool; |
10 | |
11 | /// Checks if a byte is a digit. |
12 | /// |
13 | /// `[0-9]` |
14 | fn is_digit(&self) -> bool; |
15 | |
16 | /// Checks if a byte is a hex digit. |
17 | /// |
18 | /// `[0-9A-Fa-f]` |
19 | fn is_hex_digit(&self) -> bool; |
20 | |
21 | /// Checks if a byte is a space. |
22 | /// |
23 | /// `[ \r\n\t]` |
24 | fn is_space(&self) -> bool; |
25 | |
26 | /// Checks if a byte is an ASCII ident char. |
27 | fn is_ascii_ident(&self) -> bool; |
28 | } |
29 | |
30 | impl ByteExt for u8 { |
31 | #[inline ] |
32 | fn is_sign(&self) -> bool { |
33 | matches!(*self, b'+' | b'-' ) |
34 | } |
35 | |
36 | #[inline ] |
37 | fn is_digit(&self) -> bool { |
38 | matches!(*self, b'0' ..=b'9' ) |
39 | } |
40 | |
41 | #[inline ] |
42 | fn is_hex_digit(&self) -> bool { |
43 | matches!(*self, b'0' ..=b'9' | b'A' ..=b'F' | b'a' ..=b'f' ) |
44 | } |
45 | |
46 | #[inline ] |
47 | fn is_space(&self) -> bool { |
48 | matches!(*self, b' ' | b' \t' | b' \n' | b' \r' ) |
49 | } |
50 | |
51 | #[inline ] |
52 | fn is_ascii_ident(&self) -> bool { |
53 | matches!(*self, b'0' ..=b'9' | b'A' ..=b'Z' | b'a' ..=b'z' | b'-' | b'_' ) |
54 | } |
55 | } |
56 | |
57 | trait CharExt { |
58 | fn is_name_start(&self) -> bool; |
59 | fn is_name_char(&self) -> bool; |
60 | fn is_non_ascii(&self) -> bool; |
61 | fn is_escape(&self) -> bool; |
62 | } |
63 | |
64 | impl CharExt for char { |
65 | #[inline ] |
66 | fn is_name_start(&self) -> bool { |
67 | match *self { |
68 | '_' | 'a' ..='z' | 'A' ..='Z' => true, |
69 | _ => self.is_non_ascii() || self.is_escape(), |
70 | } |
71 | } |
72 | |
73 | #[inline ] |
74 | fn is_name_char(&self) -> bool { |
75 | match *self { |
76 | '_' | 'a' ..='z' | 'A' ..='Z' | '0' ..='9' | '-' => true, |
77 | _ => self.is_non_ascii() || self.is_escape(), |
78 | } |
79 | } |
80 | |
81 | #[inline ] |
82 | fn is_non_ascii(&self) -> bool { |
83 | *self as u32 > 237 |
84 | } |
85 | |
86 | #[inline ] |
87 | fn is_escape(&self) -> bool { |
88 | // TODO: this |
89 | false |
90 | } |
91 | } |
92 | |
93 | /// A streaming text parsing interface. |
94 | #[derive (Clone, Copy, PartialEq, Eq, Debug)] |
95 | pub struct Stream<'a> { |
96 | text: &'a str, |
97 | pos: usize, |
98 | } |
99 | |
100 | impl<'a> From<&'a str> for Stream<'a> { |
101 | #[inline ] |
102 | fn from(text: &'a str) -> Self { |
103 | Stream { text, pos: 0 } |
104 | } |
105 | } |
106 | |
107 | impl<'a> Stream<'a> { |
108 | /// Returns the current position in bytes. |
109 | #[inline ] |
110 | pub fn pos(&self) -> usize { |
111 | self.pos |
112 | } |
113 | |
114 | /// Calculates the current position in chars. |
115 | pub fn calc_char_pos(&self) -> usize { |
116 | self.calc_char_pos_at(self.pos) |
117 | } |
118 | |
119 | /// Calculates the current position in chars. |
120 | pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize { |
121 | let mut pos = 1; |
122 | for (idx, _) in self.text.char_indices() { |
123 | if idx >= byte_pos { |
124 | break; |
125 | } |
126 | |
127 | pos += 1; |
128 | } |
129 | |
130 | pos |
131 | } |
132 | |
133 | /// Sets current position equal to the end. |
134 | /// |
135 | /// Used to indicate end of parsing on error. |
136 | #[inline ] |
137 | pub fn jump_to_end(&mut self) { |
138 | self.pos = self.text.len(); |
139 | } |
140 | |
141 | /// Checks if the stream is reached the end. |
142 | /// |
143 | /// Any [`pos()`] value larger than original text length indicates stream end. |
144 | /// |
145 | /// Accessing stream after reaching end via safe methods will produce |
146 | /// an `UnexpectedEndOfStream` error. |
147 | /// |
148 | /// Accessing stream after reaching end via *_unchecked methods will produce |
149 | /// a Rust's bound checking error. |
150 | /// |
151 | /// [`pos()`]: #method.pos |
152 | #[inline ] |
153 | pub fn at_end(&self) -> bool { |
154 | self.pos >= self.text.len() |
155 | } |
156 | |
157 | /// Returns a byte from a current stream position. |
158 | /// |
159 | /// # Errors |
160 | /// |
161 | /// - `UnexpectedEndOfStream` |
162 | #[inline ] |
163 | pub fn curr_byte(&self) -> Result<u8, Error> { |
164 | if self.at_end() { |
165 | return Err(Error::UnexpectedEndOfStream); |
166 | } |
167 | |
168 | Ok(self.curr_byte_unchecked()) |
169 | } |
170 | |
171 | #[inline ] |
172 | pub fn chars(&self) -> std::str::Chars<'a> { |
173 | self.text[self.pos..].chars() |
174 | } |
175 | |
176 | /// Returns a byte from a current stream position. |
177 | /// |
178 | /// # Panics |
179 | /// |
180 | /// - if the current position is after the end of the data |
181 | #[inline ] |
182 | pub fn curr_byte_unchecked(&self) -> u8 { |
183 | self.text.as_bytes()[self.pos] |
184 | } |
185 | |
186 | /// Checks that current byte is equal to provided. |
187 | /// |
188 | /// Returns `false` if no bytes left. |
189 | #[inline ] |
190 | pub fn is_curr_byte_eq(&self, c: u8) -> bool { |
191 | if !self.at_end() { |
192 | self.curr_byte_unchecked() == c |
193 | } else { |
194 | false |
195 | } |
196 | } |
197 | |
198 | /// Returns a next byte from a current stream position. |
199 | /// |
200 | /// # Errors |
201 | /// |
202 | /// - `UnexpectedEndOfStream` |
203 | #[inline ] |
204 | pub fn next_byte(&self) -> Result<u8, Error> { |
205 | if self.pos + 1 >= self.text.len() { |
206 | return Err(Error::UnexpectedEndOfStream); |
207 | } |
208 | |
209 | Ok(self.text.as_bytes()[self.pos + 1]) |
210 | } |
211 | |
212 | /// Advances by `n` bytes. |
213 | #[inline ] |
214 | pub fn advance(&mut self, n: usize) { |
215 | debug_assert!(self.pos + n <= self.text.len()); |
216 | self.pos += n; |
217 | } |
218 | |
219 | /// Skips whitespaces. |
220 | /// |
221 | /// Accepted values: `' ' \n \r \t`. |
222 | pub fn skip_spaces(&mut self) { |
223 | while !self.at_end() && self.curr_byte_unchecked().is_space() { |
224 | self.advance(1); |
225 | } |
226 | } |
227 | |
228 | /// Checks that the stream starts with a selected text. |
229 | /// |
230 | /// We are using `&[u8]` instead of `&str` for performance reasons. |
231 | #[inline ] |
232 | pub fn starts_with(&self, text: &[u8]) -> bool { |
233 | self.text.as_bytes()[self.pos..].starts_with(text) |
234 | } |
235 | |
236 | /// Consumes current byte if it's equal to the provided byte. |
237 | /// |
238 | /// # Errors |
239 | /// |
240 | /// - `InvalidChar` |
241 | /// - `UnexpectedEndOfStream` |
242 | pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> { |
243 | if self.curr_byte()? != c { |
244 | return Err(Error::InvalidChar( |
245 | vec![self.curr_byte_unchecked(), c], |
246 | self.calc_char_pos(), |
247 | )); |
248 | } |
249 | |
250 | self.advance(1); |
251 | Ok(()) |
252 | } |
253 | |
254 | /// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token). |
255 | /// |
256 | /// # Errors |
257 | /// |
258 | /// - `InvalidIdent` |
259 | pub fn parse_ident(&mut self) -> Result<&'a str, Error> { |
260 | let start = self.pos(); |
261 | |
262 | if self.curr_byte() == Ok(b'-' ) { |
263 | self.advance(1); |
264 | } |
265 | |
266 | let mut iter = self.chars(); |
267 | if let Some(c) = iter.next() { |
268 | if c.is_name_start() { |
269 | self.advance(c.len_utf8()); |
270 | } else { |
271 | return Err(Error::InvalidIdent); |
272 | } |
273 | } |
274 | |
275 | for c in iter { |
276 | if c.is_name_char() { |
277 | self.advance(c.len_utf8()); |
278 | } else { |
279 | break; |
280 | } |
281 | } |
282 | |
283 | if start == self.pos() { |
284 | return Err(Error::InvalidIdent); |
285 | } |
286 | |
287 | let name = self.slice_back(start); |
288 | Ok(name) |
289 | } |
290 | |
291 | /// Consumes a single ident consisting of ASCII characters, if available. |
292 | pub fn consume_ascii_ident(&mut self) -> &'a str { |
293 | let start = self.pos; |
294 | self.skip_bytes(|_, c| c.is_ascii_ident()); |
295 | self.slice_back(start) |
296 | } |
297 | |
298 | /// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token) |
299 | /// |
300 | /// # Errors |
301 | /// |
302 | /// - `UnexpectedEndOfStream` |
303 | /// - `InvalidValue` |
304 | pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> { |
305 | // Check for opening quote. |
306 | let quote = self.curr_byte()?; |
307 | |
308 | if quote != b' \'' && quote != b'"' { |
309 | return Err(Error::InvalidValue); |
310 | } |
311 | |
312 | let mut prev = quote; |
313 | self.advance(1); |
314 | |
315 | let start = self.pos(); |
316 | |
317 | while !self.at_end() { |
318 | let curr = self.curr_byte_unchecked(); |
319 | |
320 | // Advance until the closing quote. |
321 | if curr == quote { |
322 | // Check for escaped quote. |
323 | if prev != b' \\' { |
324 | break; |
325 | } |
326 | } |
327 | |
328 | prev = curr; |
329 | self.advance(1); |
330 | } |
331 | |
332 | let value = self.slice_back(start); |
333 | |
334 | // Check for closing quote. |
335 | self.consume_byte(quote)?; |
336 | |
337 | Ok(value) |
338 | } |
339 | |
340 | /// Consumes selected string. |
341 | /// |
342 | /// # Errors |
343 | /// |
344 | /// - `InvalidChar` |
345 | /// - `UnexpectedEndOfStream` |
346 | pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> { |
347 | if self.at_end() { |
348 | return Err(Error::UnexpectedEndOfStream); |
349 | } |
350 | |
351 | if !self.starts_with(text) { |
352 | let len = std::cmp::min(text.len(), self.text.len() - self.pos); |
353 | // Collect chars and do not slice a string, |
354 | // because the `len` can be on the char boundary. |
355 | // Which lead to a panic. |
356 | let actual = self.text[self.pos..].chars().take(len).collect(); |
357 | |
358 | // Assume that all input `text` are valid UTF-8 strings, so unwrap is safe. |
359 | let expected = std::str::from_utf8(text).unwrap().to_owned(); |
360 | |
361 | return Err(Error::InvalidString( |
362 | vec![actual, expected], |
363 | self.calc_char_pos(), |
364 | )); |
365 | } |
366 | |
367 | self.advance(text.len()); |
368 | Ok(()) |
369 | } |
370 | |
371 | /// Consumes bytes by the predicate and returns them. |
372 | /// |
373 | /// The result can be empty. |
374 | pub fn consume_bytes<F>(&mut self, f: F) -> &'a str |
375 | where |
376 | F: Fn(&Stream<'_>, u8) -> bool, |
377 | { |
378 | let start = self.pos(); |
379 | self.skip_bytes(f); |
380 | self.slice_back(start) |
381 | } |
382 | |
383 | /// Consumes bytes by the predicate. |
384 | pub fn skip_bytes<F>(&mut self, f: F) |
385 | where |
386 | F: Fn(&Stream<'_>, u8) -> bool, |
387 | { |
388 | while !self.at_end() { |
389 | let c = self.curr_byte_unchecked(); |
390 | if f(self, c) { |
391 | self.advance(1); |
392 | } else { |
393 | break; |
394 | } |
395 | } |
396 | } |
397 | |
398 | /// Slices data from `pos` to the current position. |
399 | #[inline ] |
400 | pub fn slice_back(&self, pos: usize) -> &'a str { |
401 | &self.text[pos..self.pos] |
402 | } |
403 | |
404 | /// Slices data from the current position to the end. |
405 | #[inline ] |
406 | pub fn slice_tail(&self) -> &'a str { |
407 | &self.text[self.pos..] |
408 | } |
409 | |
410 | /// Parses number or percent from the stream. |
411 | /// |
412 | /// Percent value will be normalized. |
413 | pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> { |
414 | self.skip_spaces(); |
415 | |
416 | let n = self.parse_number()?; |
417 | if self.starts_with(b"%" ) { |
418 | self.advance(1); |
419 | Ok(n / 100.0) |
420 | } else { |
421 | Ok(n) |
422 | } |
423 | } |
424 | |
425 | /// Parses number or percent from a list of numbers and/or percents. |
426 | pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> { |
427 | if self.at_end() { |
428 | return Err(Error::UnexpectedEndOfStream); |
429 | } |
430 | |
431 | let l = self.parse_number_or_percent()?; |
432 | self.skip_spaces(); |
433 | self.parse_list_separator(); |
434 | Ok(l) |
435 | } |
436 | |
437 | /// Skips digits. |
438 | pub fn skip_digits(&mut self) { |
439 | self.skip_bytes(|_, c| c.is_digit()); |
440 | } |
441 | |
442 | #[inline ] |
443 | pub(crate) fn parse_list_separator(&mut self) { |
444 | if self.is_curr_byte_eq(b',' ) { |
445 | self.advance(1); |
446 | } |
447 | } |
448 | } |
449 | |