stream.rs source code [crates/svgtypes/src/stream.rs]

1	// Copyright 2018 the SVG Types Authors
2	// SPDX-License-Identifier: Apache-2.0 OR MIT
3
4	use crate::Error;
5
6	/// Extension methods for XML-subset only operations.
7	pub(crate) trait ByteExt {
8	/// Checks if a byte is a numeric sign.
9	fn is_sign(&self) -> bool;
10
11	/// Checks if a byte is a digit.
12	///
13	/// `[0-9]`
14	fn is_digit(&self) -> bool;
15
16	/// Checks if a byte is a hex digit.
17	///
18	/// `[0-9A-Fa-f]`
19	fn is_hex_digit(&self) -> bool;
20
21	/// Checks if a byte is a space.
22	///
23	/// `[ \r\n\t]`
24	fn is_space(&self) -> bool;
25
26	/// Checks if a byte is an ASCII ident char.
27	fn is_ascii_ident(&self) -> bool;
28	}
29
30	impl ByteExt for u8 {
31	#[inline]
32	fn is_sign(&self) -> bool {
33	matches!(*self, b'+' \| b'-')
34	}
35
36	#[inline]
37	fn is_digit(&self) -> bool {
38	matches!(*self, b'0'..=b'9')
39	}
40
41	#[inline]
42	fn is_hex_digit(&self) -> bool {
43	matches!(*self, b'0'..=b'9' \| b'A'..=b'F' \| b'a'..=b'f')
44	}
45
46	#[inline]
47	fn is_space(&self) -> bool {
48	matches!(*self, b' ' \| b'`\t`' \| b'`\n`' \| b'`\r`')
49	}
50
51	#[inline]
52	fn is_ascii_ident(&self) -> bool {
53	matches!(*self, b'0'..=b'9' \| b'A'..=b'Z' \| b'a'..=b'z' \| b'-' \| b'_')
54	}
55	}
56
57	trait CharExt {
58	fn is_name_start(&self) -> bool;
59	fn is_name_char(&self) -> bool;
60	fn is_non_ascii(&self) -> bool;
61	fn is_escape(&self) -> bool;
62	}
63
64	impl CharExt for char {
65	#[inline]
66	fn is_name_start(&self) -> bool {
67	match *self {
68	'_' \| 'a'..='z' \| 'A'..='Z' => `true`,
69	_ => self.is_non_ascii() \|\| self.is_escape(),
70	}
71	}
72
73	#[inline]
74	fn is_name_char(&self) -> bool {
75	match *self {
76	'_' \| 'a'..='z' \| 'A'..='Z' \| '0'..='9' \| '-' => `true`,
77	_ => self.is_non_ascii() \|\| self.is_escape(),
78	}
79	}
80
81	#[inline]
82	fn is_non_ascii(&self) -> bool {
83	self as u32* > `237`
84	}
85
86	#[inline]
87	fn is_escape(&self) -> bool {
88	// TODO: this
89	`false`
90	}
91	}
92
93	/// A streaming text parsing interface.
94	#[derive(Clone, Copy, PartialEq, Eq, Debug)]
95	pub struct Stream<'a> {
96	text: &'a str,
97	pos: usize,
98	}
99
100	impl<'a> From<&'a str> for Stream<'a> {
101	#[inline]
102	fn from(text: &'a str) -> Self {
103	Stream { text, pos: `0` }
104	}
105	}
106
107	impl<'a> Stream<'a> {
108	/// Returns the current position in bytes.
109	#[inline]
110	pub fn pos(&self) -> usize {
111	self.pos
112	}
113
114	/// Calculates the current position in chars.
115	pub fn calc_char_pos(&self) -> usize {
116	self.calc_char_pos_at(self.pos)
117	}
118
119	/// Calculates the current position in chars.
120	pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
121	let mut pos = `1`;
122	for (idx, _) in self.text.char_indices() {
123	if idx >= byte_pos {
124	break;
125	}
126
127	pos += `1`;
128	}
129
130	pos
131	}
132
133	/// Sets current position equal to the end.
134	///
135	/// Used to indicate end of parsing on error.
136	#[inline]
137	pub fn jump_to_end(&mut self) {
138	self.pos = self.text.len();
139	}
140
141	/// Checks if the stream is reached the end.
142	///
143	/// Any [`pos()`] value larger than original text length indicates stream end.
144	///
145	/// Accessing stream after reaching end via safe methods will produce
146	/// an `UnexpectedEndOfStream` error.
147	///
148	/// Accessing stream after reaching end via _unchecked methods will produce*
149	/// a Rust's bound checking error.
150	///
151	/// [`pos()`]: #method.pos
152	#[inline]
153	pub fn at_end(&self) -> bool {
154	self.pos >= self.text.len()
155	}
156
157	/// Returns a byte from a current stream position.
158	///
159	/// # Errors
160	///
161	/// - `UnexpectedEndOfStream`
162	#[inline]
163	pub fn curr_byte(&self) -> Result<u8, Error> {
164	if self.at_end() {
165	return Err(Error::UnexpectedEndOfStream);
166	}
167
168	Ok(self.curr_byte_unchecked())
169	}
170
171	#[inline]
172	pub fn chars(&self) -> std::str::Chars<'a> {
173	self.text[self.pos..].chars()
174	}
175
176	/// Returns a byte from a current stream position.
177	///
178	/// # Panics
179	///
180	/// - if the current position is after the end of the data
181	#[inline]
182	pub fn curr_byte_unchecked(&self) -> u8 {
183	self.text.as_bytes()[self.pos]
184	}
185
186	/// Checks that current byte is equal to provided.
187	///
188	/// Returns `false` if no bytes left.
189	#[inline]
190	pub fn is_curr_byte_eq(&self, c: u8) -> bool {
191	if !self.at_end() {
192	self.curr_byte_unchecked() == c
193	} else {
194	`false`
195	}
196	}
197
198	/// Returns a next byte from a current stream position.
199	///
200	/// # Errors
201	///
202	/// - `UnexpectedEndOfStream`
203	#[inline]
204	pub fn next_byte(&self) -> Result<u8, Error> {
205	if self.pos + `1` >= self.text.len() {
206	return Err(Error::UnexpectedEndOfStream);
207	}
208
209	Ok(self.text.as_bytes()[self.pos + `1`])
210	}
211
212	/// Advances by `n` bytes.
213	#[inline]
214	pub fn advance(&mut self, n: usize) {
215	debug_assert!(self.pos + n <= self.text.len());
216	self.pos += n;
217	}
218
219	/// Skips whitespaces.
220	///
221	/// Accepted values: `' ' \n \r \t`.
222	pub fn skip_spaces(&mut self) {
223	while !self.at_end() && self.curr_byte_unchecked().is_space() {
224	self.advance(`1`);
225	}
226	}
227
228	/// Checks that the stream starts with a selected text.
229	///
230	/// We are using `&[u8]` instead of `&str` for performance reasons.
231	#[inline]
232	pub fn starts_with(&self, text: &[u8]) -> bool {
233	self.text.as_bytes()[self.pos..].starts_with(text)
234	}
235
236	/// Consumes current byte if it's equal to the provided byte.
237	///
238	/// # Errors
239	///
240	/// - `InvalidChar`
241	/// - `UnexpectedEndOfStream`
242	pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
243	if self.curr_byte()? != c {
244	return Err(Error::InvalidChar(
245	vec![self.curr_byte_unchecked(), c],
246	self.calc_char_pos(),
247	));
248	}
249
250	self.advance(`1`);
251	Ok(())
252	}
253
254	/// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
255	///
256	/// # Errors
257	///
258	/// - `InvalidIdent`
259	pub fn parse_ident(&mut self) -> Result<&'a str, Error> {
260	let start = self.pos();
261
262	if self.curr_byte() == Ok(b'-') {
263	self.advance(`1`);
264	}
265
266	let mut iter = self.chars();
267	if let Some(c) = iter.next() {
268	if c.is_name_start() {
269	self.advance(c.len_utf8());
270	} else {
271	return Err(Error::InvalidIdent);
272	}
273	}
274
275	for c in iter {
276	if c.is_name_char() {
277	self.advance(c.len_utf8());
278	} else {
279	break;
280	}
281	}
282
283	if start == self.pos() {
284	return Err(Error::InvalidIdent);
285	}
286
287	let name = self.slice_back(start);
288	Ok(name)
289	}
290
291	/// Consumes a single ident consisting of ASCII characters, if available.
292	pub fn consume_ascii_ident(&mut self) -> &'a str {
293	let start = self.pos;
294	self.skip_bytes(\|_, c\| c.is_ascii_ident());
295	self.slice_back(start)
296	}
297
298	/// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
299	///
300	/// # Errors
301	///
302	/// - `UnexpectedEndOfStream`
303	/// - `InvalidValue`
304	pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
305	// Check for opening quote.
306	let quote = self.curr_byte()?;
307
308	if quote != b'`\'`' && quote != b'"' {
309	return Err(Error::InvalidValue);
310	}
311
312	let mut prev = quote;
313	self.advance(`1`);
314
315	let start = self.pos();
316
317	while !self.at_end() {
318	let curr = self.curr_byte_unchecked();
319
320	// Advance until the closing quote.
321	if curr == quote {
322	// Check for escaped quote.
323	if prev != b'`\\`' {
324	break;
325	}
326	}
327
328	prev = curr;
329	self.advance(`1`);
330	}
331
332	let value = self.slice_back(start);
333
334	// Check for closing quote.
335	self.consume_byte(quote)?;
336
337	Ok(value)
338	}
339
340	/// Consumes selected string.
341	///
342	/// # Errors
343	///
344	/// - `InvalidChar`
345	/// - `UnexpectedEndOfStream`
346	pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
347	if self.at_end() {
348	return Err(Error::UnexpectedEndOfStream);
349	}
350
351	if !self.starts_with(text) {
352	let len = std::cmp::min(text.len(), self.text.len() - self.pos);
353	// Collect chars and do not slice a string,
354	// because the `len` can be on the char boundary.
355	// Which lead to a panic.
356	let actual = self.text[self.pos..].chars().take(len).collect();
357
358	// Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
359	let expected = std::str::from_utf8(text).unwrap().to_owned();
360
361	return Err(Error::InvalidString(
362	vec![actual, expected],
363	self.calc_char_pos(),
364	));
365	}
366
367	self.advance(text.len());
368	Ok(())
369	}
370
371	/// Consumes bytes by the predicate and returns them.
372	///
373	/// The result can be empty.
374	pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
375	where
376	F: Fn(&Stream<'_>, u8) -> bool,
377	{
378	let start = self.pos();
379	self.skip_bytes(f);
380	self.slice_back(start)
381	}
382
383	/// Consumes bytes by the predicate.
384	pub fn skip_bytes<F>(&mut self, f: F)
385	where
386	F: Fn(&Stream<'_>, u8) -> bool,
387	{
388	while !self.at_end() {
389	let c = self.curr_byte_unchecked();
390	if f(self, c) {
391	self.advance(`1`);
392	} else {
393	break;
394	}
395	}
396	}
397
398	/// Slices data from `pos` to the current position.
399	#[inline]
400	pub fn slice_back(&self, pos: usize) -> &'a str {
401	&self.text[pos..self.pos]
402	}
403
404	/// Slices data from the current position to the end.
405	#[inline]
406	pub fn slice_tail(&self) -> &'a str {
407	&self.text[self.pos..]
408	}
409
410	/// Parses number or percent from the stream.
411	///
412	/// Percent value will be normalized.
413	pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
414	self.skip_spaces();
415
416	let n = self.parse_number()?;
417	if self.starts_with(b"%") {
418	self.advance(`1`);
419	Ok(n / `100.0`)
420	} else {
421	Ok(n)
422	}
423	}
424
425	/// Parses number or percent from a list of numbers and/or percents.
426	pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
427	if self.at_end() {
428	return Err(Error::UnexpectedEndOfStream);
429	}
430
431	let l = self.parse_number_or_percent()?;
432	self.skip_spaces();
433	self.parse_list_separator();
434	Ok(l)
435	}
436
437	/// Skips digits.
438	pub fn skip_digits(&mut self) {
439	self.skip_bytes(\|_, c\| c.is_digit());
440	}
441
442	#[inline]
443	pub(crate) fn parse_list_separator(&mut self) {
444	if self.is_curr_byte_eq(b',') {
445	self.advance(`1`);
446	}
447	}
448	}
449