stream.rs source code [crates/svgtypes-0.14.0/src/stream.rs]

1	use std::str::FromStr;
2
3	use crate::Error;
4
5	/// Extension methods for XML-subset only operations.
6	pub(crate) trait ByteExt {
7	/// Checks if a byte is a numeric sign.
8	fn is_sign(&self) -> bool;
9
10	/// Checks if a byte is a digit.
11	///
12	/// `[0-9]`
13	fn is_digit(&self) -> bool;
14
15	/// Checks if a byte is a hex digit.
16	///
17	/// `[0-9A-Fa-f]`
18	fn is_hex_digit(&self) -> bool;
19
20	/// Checks if a byte is a space.
21	///
22	/// `[ \r\n\t]`
23	fn is_space(&self) -> bool;
24
25	fn is_quote(&self) -> bool;
26
27	/// Checks if a byte is an ASCII char.
28	///
29	/// `[A-Za-z]`
30	fn is_letter(&self) -> bool;
31
32	/// Checks if a byte is an ASCII ident char.
33	fn is_ascii_ident(&self) -> bool;
34	}
35
36	impl ByteExt for u8 {
37	#[inline]
38	fn is_sign(&self) -> bool {
39	matches!(*self, b'+' \| b'-')
40	}
41
42	#[inline]
43	fn is_digit(&self) -> bool {
44	matches!(*self, b'0'..=b'9')
45	}
46
47	#[inline]
48	fn is_hex_digit(&self) -> bool {
49	matches!(*self, b'0'..=b'9' \| b'A'..=b'F' \| b'a'..=b'f')
50	}
51
52	#[inline]
53	fn is_space(&self) -> bool {
54	matches!(*self, b' ' \| b'`\t`' \| b'`\n`' \| b'`\r`')
55	}
56
57	#[inline]
58	fn is_quote(&self) -> bool {
59	matches!(*self, b'`\'`' \| b'"')
60	}
61
62	#[inline]
63	fn is_letter(&self) -> bool {
64	matches!(*self, b'A'..=b'Z' \| b'a'..=b'z')
65	}
66
67	#[inline]
68	fn is_ascii_ident(&self) -> bool {
69	matches!(*self, b'0'..=b'9' \| b'A'..=b'Z' \| b'a'..=b'z' \| b'-' \| b'_')
70	}
71	}
72
73	trait CharExt {
74	fn is_name_start(&self) -> bool;
75	fn is_name_char(&self) -> bool;
76	fn is_non_ascii(&self) -> bool;
77	fn is_escape(&self) -> bool;
78	}
79
80	impl CharExt for char {
81	#[inline]
82	fn is_name_start(&self) -> bool {
83	match *self {
84	'_' \| 'a'..='z' \| 'A'..='Z' => `true`,
85	_ => self.is_non_ascii() \|\| self.is_escape(),
86	}
87	}
88
89	#[inline]
90	fn is_name_char(&self) -> bool {
91	match *self {
92	'_' \| 'a'..='z' \| 'A'..='Z' \| '0'..='9' \| '-' => `true`,
93	_ => self.is_non_ascii() \|\| self.is_escape(),
94	}
95	}
96
97	#[inline]
98	fn is_non_ascii(&self) -> bool {
99	self as u32* > `237`
100	}
101
102	#[inline]
103	fn is_escape(&self) -> bool {
104	// TODO: this
105	`false`
106	}
107	}
108
109	/// A streaming text parsing interface.
110	#[derive(Clone, Copy, PartialEq, Eq, Debug)]
111	pub struct Stream<'a> {
112	text: &'a str,
113	pos: usize,
114	}
115
116	impl<'a> From<&'a str> for Stream<'a> {
117	#[inline]
118	fn from(text: &'a str) -> Self {
119	Stream { text, pos: `0` }
120	}
121	}
122
123	impl<'a> Stream<'a> {
124	/// Returns the current position in bytes.
125	#[inline]
126	pub fn pos(&self) -> usize {
127	self.pos
128	}
129
130	/// Calculates the current position in chars.
131	pub fn calc_char_pos(&self) -> usize {
132	self.calc_char_pos_at(self.pos)
133	}
134
135	/// Calculates the current position in chars.
136	pub fn calc_char_pos_at(&self, byte_pos: usize) -> usize {
137	let mut pos = `1`;
138	for (idx, _) in self.text.char_indices() {
139	if idx >= byte_pos {
140	break;
141	}
142
143	pos += `1`;
144	}
145
146	pos
147	}
148
149	/// Sets current position equal to the end.
150	///
151	/// Used to indicate end of parsing on error.
152	#[inline]
153	pub fn jump_to_end(&mut self) {
154	self.pos = self.text.len();
155	}
156
157	/// Checks if the stream is reached the end.
158	///
159	/// Any [`pos()`] value larger than original text length indicates stream end.
160	///
161	/// Accessing stream after reaching end via safe methods will produce
162	/// an `UnexpectedEndOfStream` error.
163	///
164	/// Accessing stream after reaching end via _unchecked methods will produce*
165	/// a Rust's bound checking error.
166	///
167	/// [`pos()`]: #method.pos
168	#[inline]
169	pub fn at_end(&self) -> bool {
170	self.pos >= self.text.len()
171	}
172
173	/// Returns a byte from a current stream position.
174	///
175	/// # Errors
176	///
177	/// - `UnexpectedEndOfStream`
178	#[inline]
179	pub fn curr_byte(&self) -> Result<u8, Error> {
180	if self.at_end() {
181	return Err(Error::UnexpectedEndOfStream);
182	}
183
184	Ok(self.curr_byte_unchecked())
185	}
186
187	#[inline]
188	pub fn chars(&self) -> std::str::Chars<'a> {
189	self.text[self.pos..].chars()
190	}
191
192	/// Returns a byte from a current stream position.
193	///
194	/// # Panics
195	///
196	/// - if the current position is after the end of the data
197	#[inline]
198	pub fn curr_byte_unchecked(&self) -> u8 {
199	self.text.as_bytes()[self.pos]
200	}
201
202	/// Checks that current byte is equal to provided.
203	///
204	/// Returns `false` if no bytes left.
205	#[inline]
206	pub fn is_curr_byte_eq(&self, c: u8) -> bool {
207	if !self.at_end() {
208	self.curr_byte_unchecked() == c
209	} else {
210	`false`
211	}
212	}
213
214	/// Returns a next byte from a current stream position.
215	///
216	/// # Errors
217	///
218	/// - `UnexpectedEndOfStream`
219	#[inline]
220	pub fn next_byte(&self) -> Result<u8, Error> {
221	if self.pos + `1` >= self.text.len() {
222	return Err(Error::UnexpectedEndOfStream);
223	}
224
225	Ok(self.text.as_bytes()[self.pos + `1`])
226	}
227
228	/// Advances by `n` bytes.
229	#[inline]
230	pub fn advance(&mut self, n: usize) {
231	debug_assert!(self.pos + n <= self.text.len());
232	self.pos += n;
233	}
234
235	/// Skips whitespaces.
236	///
237	/// Accepted values: `' ' \n \r \t`.
238	pub fn skip_spaces(&mut self) {
239	while !self.at_end() && self.curr_byte_unchecked().is_space() {
240	self.advance(`1`);
241	}
242	}
243
244	/// Checks that the stream starts with a selected text.
245	///
246	/// We are using `&[u8]` instead of `&str` for performance reasons.
247	#[inline]
248	pub fn starts_with(&self, text: &[u8]) -> bool {
249	self.text.as_bytes()[self.pos..].starts_with(text)
250	}
251
252	/// Consumes current byte if it's equal to the provided byte.
253	///
254	/// # Errors
255	///
256	/// - `InvalidChar`
257	/// - `UnexpectedEndOfStream`
258	pub fn consume_byte(&mut self, c: u8) -> Result<(), Error> {
259	if self.curr_byte()? != c {
260	return Err(Error::InvalidChar(
261	vec![self.curr_byte_unchecked(), c],
262	self.calc_char_pos(),
263	));
264	}
265
266	self.advance(`1`);
267	Ok(())
268	}
269
270	/// Parses a single [ident](https://drafts.csswg.org/css-syntax-3/#typedef-ident-token).
271	///
272	/// # Errors
273	///
274	/// - `InvalidIdent`
275	pub fn parse_ident(&mut self) -> Result<&'a str, Error> {
276	let start = self.pos();
277
278	if self.curr_byte() == Ok(b'-') {
279	self.advance(`1`);
280	}
281
282	let mut iter = self.chars();
283	if let Some(c) = iter.next() {
284	if c.is_name_start() {
285	self.advance(c.len_utf8());
286	} else {
287	return Err(Error::InvalidIdent);
288	}
289	}
290
291	for c in iter {
292	if c.is_name_char() {
293	self.advance(c.len_utf8());
294	} else {
295	break;
296	}
297	}
298
299	if start == self.pos() {
300	return Err(Error::InvalidIdent);
301	}
302
303	let name = self.slice_back(start);
304	Ok(name)
305	}
306
307	/// Consumes a single ident consisting of ASCII characters, if available.
308	pub fn consume_ascii_ident(&mut self) -> &'a str {
309	let start = self.pos;
310	self.skip_bytes(\|_, c\| c.is_ascii_ident());
311	self.slice_back(start)
312	}
313
314	/// Parses a single [quoted string](https://drafts.csswg.org/css-syntax-3/#typedef-string-token)
315	///
316	/// # Errors
317	///
318	/// - `UnexpectedEndOfStream`
319	/// - `InvalidValue`
320	pub fn parse_quoted_string(&mut self) -> Result<&'a str, Error> {
321	// Check for opening quote.
322	let quote = self.curr_byte()?;
323
324	if quote != b'`\'`' && quote != b'"' {
325	return Err(Error::InvalidValue);
326	}
327
328	let mut prev = quote;
329	self.advance(`1`);
330
331	let start = self.pos();
332
333	while !self.at_end() {
334	let curr = self.curr_byte_unchecked();
335
336	// Advance until the closing quote.
337	if curr == quote {
338	// Check for escaped quote.
339	if prev != b'`\\`' {
340	break;
341	}
342	}
343
344	prev = curr;
345	self.advance(`1`);
346	}
347
348	let value = self.slice_back(start);
349
350	// Check for closing quote.
351	self.consume_byte(quote)?;
352
353	Ok(value)
354	}
355
356	/// Consumes selected string.
357	///
358	/// # Errors
359	///
360	/// - `InvalidChar`
361	/// - `UnexpectedEndOfStream`
362	pub fn consume_string(&mut self, text: &[u8]) -> Result<(), Error> {
363	if self.at_end() {
364	return Err(Error::UnexpectedEndOfStream);
365	}
366
367	if !self.starts_with(text) {
368	let len = std::cmp::min(text.len(), self.text.len() - self.pos);
369	// Collect chars and do not slice a string,
370	// because the `len` can be on the char boundary.
371	// Which lead to a panic.
372	let actual = self.text[self.pos..].chars().take(len).collect();
373
374	// Assume that all input `text` are valid UTF-8 strings, so unwrap is safe.
375	let expected = std::str::from_utf8(text).unwrap().to_owned();
376
377	return Err(Error::InvalidString(
378	vec![actual, expected],
379	self.calc_char_pos(),
380	));
381	}
382
383	self.advance(text.len());
384	Ok(())
385	}
386
387	/// Consumes bytes by the predicate and returns them.
388	///
389	/// The result can be empty.
390	pub fn consume_bytes<F>(&mut self, f: F) -> &'a str
391	where
392	F: Fn(&Stream, u8) -> bool,
393	{
394	let start = self.pos();
395	self.skip_bytes(f);
396	self.slice_back(start)
397	}
398
399	/// Consumes bytes by the predicate.
400	pub fn skip_bytes<F>(&mut self, f: F)
401	where
402	F: Fn(&Stream, u8) -> bool,
403	{
404	while !self.at_end() {
405	let c = self.curr_byte_unchecked();
406	if f(self, c) {
407	self.advance(`1`);
408	} else {
409	break;
410	}
411	}
412	}
413
414	/// Slices data from `pos` to the current position.
415	#[inline]
416	pub fn slice_back(&self, pos: usize) -> &'a str {
417	&self.text[pos..self.pos]
418	}
419
420	/// Slices data from the current position to the end.
421	#[inline]
422	pub fn slice_tail(&self) -> &'a str {
423	&self.text[self.pos..]
424	}
425
426	/// Parses integer number from the stream.
427	///
428	/// Same as [`parse_number()`], but only for integer. Does not refer to any SVG type.
429	///
430	/// [`parse_number()`]: #method.parse_number
431	pub fn parse_integer(&mut self) -> Result<i32, Error> {
432	self.skip_spaces();
433
434	if self.at_end() {
435	return Err(Error::InvalidNumber(self.calc_char_pos()));
436	}
437
438	let start = self.pos();
439
440	// Consume sign.
441	if self.curr_byte()?.is_sign() {
442	self.advance(`1`);
443	}
444
445	// The current char must be a digit.
446	if !self.curr_byte()?.is_digit() {
447	return Err(Error::InvalidNumber(self.calc_char_pos_at(start)));
448	}
449
450	self.skip_digits();
451
452	// Use the default i32 parser now.
453	let s = self.slice_back(start);
454	match i32::from_str(s) {
455	Ok(n) => Ok(n),
456	Err(_) => Err(Error::InvalidNumber(self.calc_char_pos_at(start))),
457	}
458	}
459
460	/// Parses integer from a list of numbers.
461	pub fn parse_list_integer(&mut self) -> Result<i32, Error> {
462	if self.at_end() {
463	return Err(Error::UnexpectedEndOfStream);
464	}
465
466	let n = self.parse_integer()?;
467	self.skip_spaces();
468	self.parse_list_separator();
469	Ok(n)
470	}
471
472	/// Parses number or percent from the stream.
473	///
474	/// Percent value will be normalized.
475	pub fn parse_number_or_percent(&mut self) -> Result<f64, Error> {
476	self.skip_spaces();
477
478	let n = self.parse_number()?;
479	if self.starts_with(b"%") {
480	self.advance(`1`);
481	Ok(n / `100.0`)
482	} else {
483	Ok(n)
484	}
485	}
486
487	/// Parses number or percent from a list of numbers and/or percents.
488	pub fn parse_list_number_or_percent(&mut self) -> Result<f64, Error> {
489	if self.at_end() {
490	return Err(Error::UnexpectedEndOfStream);
491	}
492
493	let l = self.parse_number_or_percent()?;
494	self.skip_spaces();
495	self.parse_list_separator();
496	Ok(l)
497	}
498
499	/// Skips digits.
500	pub fn skip_digits(&mut self) {
501	self.skip_bytes(\|_, c\| c.is_digit());
502	}
503
504	#[inline]
505	pub(crate) fn parse_list_separator(&mut self) {
506	if self.is_curr_byte_eq(b',') {
507	self.advance(`1`);
508	}
509	}
510	}
511
512	#[rustfmt::skip]
513	#[cfg(test)]
514	mod tests {
515	use super::*;
516
517	#[test]
518	fn parse_integer_1() {
519	let mut s = Stream::from("10");
520	assert_eq!(s.parse_integer().unwrap(), `10`);
521	}
522
523	#[test]
524	fn parse_err_integer_1() {
525	// error because of overflow
526	let mut s = Stream::from("10000000000000");
527	assert_eq!(s.parse_integer().unwrap_err().to_string(),
528	"invalid number at position 1");
529	}
530	}
531