seg.rs - Codebrowser

1	use super::*;
2
3	use ciborium_io::Read;
4
5	use core::marker::PhantomData;
6
7	/// A parser for incoming segments
8	pub trait Parser: Default {
9	/// The type of item that is parsed
10	type Item: ?Sized;
11
12	/// The parsing error that may occur
13	type Error;
14
15	/// The main parsing function
16	///
17	/// This function processes the incoming bytes and returns the item.
18	///
19	/// One important detail that MUST NOT* be overlooked is that the*
20	/// parser may save data from a previous parsing attempt. The number of
21	/// bytes saved is indicated by the `Parser::saved()` function. The saved
22	/// bytes will be copied into the beginning of the `bytes` array before
23	/// processing. Therefore, two requirements should be met.
24	///
25	/// First, the incoming byte slice should be larger than the saved bytes.
26	///
27	/// Second, the incoming byte slice should contain new bytes only after
28	/// the saved byte prefix.
29	///
30	/// If both criteria are met, this allows the parser to prepend its saved
31	/// bytes without any additional allocation.
32	fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33
34	/// Indicates the number of saved bytes in the parser
35	fn saved(&self) -> usize {
36	`0`
37	}
38	}
39
40	/// A bytes parser
41	///
42	/// No actual processing is performed and the input bytes are directly
43	/// returned. This implies that this parser never saves any bytes internally.
44	#[derive(Default)]
45	pub struct Bytes(());
46
47	impl Parser for Bytes {
48	type Item = [u8];
49	type Error = core::convert::Infallible;
50
51	fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52	Ok(bytes)
53	}
54	}
55
56	/// A text parser
57	///
58	/// This parser converts the input bytes to a `str`. This parser preserves
59	/// trailing invalid UTF-8 sequences in the case that chunking fell in the
60	/// middle of a valid UTF-8 character.
61	#[derive(Default)]
62	pub struct Text {
63	stored: usize,
64	buffer: [u8; `3`],
65	}
66
67	impl Parser for Text {
68	type Item = str;
69	type Error = core::str::Utf8Error;
70
71	fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72	// If we cannot advance, return nothing.
73	if bytes.len() <= self.stored {
74	return Ok("");
75	}
76
77	// Copy previously invalid data into place.
78	bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79
80	Ok(match core::str::from_utf8(bytes) {
81	Ok(s) => {
82	self.stored = `0`;
83	s
84	}
85	Err(e) => {
86	let valid_len = e.valid_up_to();
87	let invalid_len = bytes.len() - valid_len;
88
89	// If the size of the invalid UTF-8 is large enough to hold
90	// all valid UTF-8 characters, we have a syntax error.
91	if invalid_len > self.buffer.len() {
92	return Err(e);
93	}
94
95	// Otherwise, store the invalid bytes for the next read cycle.
96	self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
97	self.stored = invalid_len;
98
99	// Decode the valid part of the string.
100	core::str::from_utf8(&bytes[..valid_len]).unwrap()
101	}
102	})
103	}
104
105	fn saved(&self) -> usize {
106	self.stored
107	}
108	}
109
110	/// A CBOR segment
111	///
112	/// This type represents a single bytes or text segment on the wire. It can be
113	/// read out in parsed chunks based on the size of the input scratch buffer.
114	pub struct Segment<'r, R: Read, P: Parser> {
115	reader: &'r mut Decoder<R>,
116	unread: usize,
117	offset: usize,
118	parser: P,
119	}
120
121	impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
122	/// Gets the number of unprocessed bytes
123	#[inline]
124	pub fn left(&self) -> usize {
125	self.unread + self.parser.saved()
126	}
127
128	/// Gets the next parsed chunk within the segment
129	///
130	/// Returns `Ok(None)` when all chunks have been read.
131	#[inline]
132	pub fn pull<'a>(
133	&mut self,
134	buffer: &'a mut [u8],
135	) -> Result<Option<&'a P::Item>, Error<R::Error>> {
136	use core::cmp::min;
137
138	let prev = self.parser.saved();
139	match self.unread {
140	`0` if prev == `0` => return Ok(None),
141	`0` => return Err(Error::Syntax(self.offset)),
142	_ => (),
143	}
144
145	// Determine how many bytes to read.
146	let size = min(buffer.len(), prev + self.unread);
147	let full = &mut buffer[..size];
148	let next = &mut full[min(size, prev)..];
149
150	// Read additional bytes.
151	self.reader.read_exact(next)?;
152	self.unread -= next.len();
153
154	self.parser
155	.parse(full)
156	.or(Err(Error::Syntax(self.offset)))
157	.map(Some)
158	}
159	}
160
161	/// A sequence of CBOR segments
162	///
163	/// CBOR allows for bytes or text items to be segmented. This type represents
164	/// the state of that segmented input stream.
165	pub struct Segments<'r, R: Read, P: Parser> {
166	reader: &'r mut Decoder<R>,
167	finish: bool,
168	nested: usize,
169	parser: PhantomData<P>,
170	unwrap: fn(Header) -> Result<Option<usize>, ()>,
171	}
172
173	impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
174	#[inline]
175	pub(crate) fn new(
176	decoder: &'r mut Decoder<R>,
177	unwrap: fn(Header) -> Result<Option<usize>, ()>,
178	) -> Self {
179	Self {
180	reader: decoder,
181	finish: `false`,
182	nested: `0`,
183	parser: PhantomData,
184	unwrap,
185	}
186	}
187
188	/// Gets the next segment in the stream
189	///
190	/// Returns `Ok(None)` at the conclusion of the stream.
191	#[inline]
192	pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
193	while !self.finish {
194	let offset = self.reader.offset();
195	match self.reader.pull()? {
196	Header::Break if self.nested == `1` => return Ok(None),
197	Header::Break if self.nested > `1` => self.nested -= `1`,
198	header => match (self.unwrap)(header) {
199	Err(..) => return Err(Error::Syntax(offset)),
200	Ok(None) => self.nested += `1`,
201	Ok(Some(len)) => {
202	self.finish = self.nested == `0`;
203	return Ok(Some(Segment {
204	reader: self.reader,
205	unread: len,
206	offset,
207	parser: P::default(),
208	}));
209	}
210	},
211	}
212	}
213
214	Ok(None)
215	}
216	}
217

Provided by KDAB

Definitions