1use super::*;
2
3use ciborium_io::Read;
4
5use core::marker::PhantomData;
6
7/// A parser for incoming segments
8pub trait Parser: Default {
9 /// The type of item that is parsed
10 type Item: ?Sized;
11
12 /// The parsing error that may occur
13 type Error;
14
15 /// The main parsing function
16 ///
17 /// This function processes the incoming bytes and returns the item.
18 ///
19 /// One important detail that **MUST NOT** be overlooked is that the
20 /// parser may save data from a previous parsing attempt. The number of
21 /// bytes saved is indicated by the `Parser::saved()` function. The saved
22 /// bytes will be copied into the beginning of the `bytes` array before
23 /// processing. Therefore, two requirements should be met.
24 ///
25 /// First, the incoming byte slice should be larger than the saved bytes.
26 ///
27 /// Second, the incoming byte slice should contain new bytes only after
28 /// the saved byte prefix.
29 ///
30 /// If both criteria are met, this allows the parser to prepend its saved
31 /// bytes without any additional allocation.
32 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
33
34 /// Indicates the number of saved bytes in the parser
35 fn saved(&self) -> usize {
36 0
37 }
38}
39
40/// A bytes parser
41///
42/// No actual processing is performed and the input bytes are directly
43/// returned. This implies that this parser never saves any bytes internally.
44#[derive(Default)]
45pub struct Bytes(());
46
47impl Parser for Bytes {
48 type Item = [u8];
49 type Error = core::convert::Infallible;
50
51 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
52 Ok(bytes)
53 }
54}
55
56/// A text parser
57///
58/// This parser converts the input bytes to a `str`. This parser preserves
59/// trailing invalid UTF-8 sequences in the case that chunking fell in the
60/// middle of a valid UTF-8 character.
61#[derive(Default)]
62pub struct Text {
63 stored: usize,
64 buffer: [u8; 3],
65}
66
67impl Parser for Text {
68 type Item = str;
69 type Error = core::str::Utf8Error;
70
71 fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
72 // If we cannot advance, return nothing.
73 if bytes.len() <= self.stored {
74 return Ok("");
75 }
76
77 // Copy previously invalid data into place.
78 bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
79
80 Ok(match core::str::from_utf8(bytes) {
81 Ok(s) => {
82 self.stored = 0;
83 s
84 }
85 Err(e) => {
86 let valid_len = e.valid_up_to();
87 let invalid_len = bytes.len() - valid_len;
88
89 // If the size of the invalid UTF-8 is large enough to hold
90 // all valid UTF-8 characters, we have a syntax error.
91 if invalid_len > self.buffer.len() {
92 return Err(e);
93 }
94
95 // Otherwise, store the invalid bytes for the next read cycle.
96 self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
97 self.stored = invalid_len;
98
99 // Decode the valid part of the string.
100 core::str::from_utf8(&bytes[..valid_len]).unwrap()
101 }
102 })
103 }
104
105 fn saved(&self) -> usize {
106 self.stored
107 }
108}
109
110/// A CBOR segment
111///
112/// This type represents a single bytes or text segment on the wire. It can be
113/// read out in parsed chunks based on the size of the input scratch buffer.
114pub struct Segment<'r, R: Read, P: Parser> {
115 reader: &'r mut Decoder<R>,
116 unread: usize,
117 offset: usize,
118 parser: P,
119}
120
121impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
122 /// Gets the number of unprocessed bytes
123 #[inline]
124 pub fn left(&self) -> usize {
125 self.unread + self.parser.saved()
126 }
127
128 /// Gets the next parsed chunk within the segment
129 ///
130 /// Returns `Ok(None)` when all chunks have been read.
131 #[inline]
132 pub fn pull<'a>(
133 &mut self,
134 buffer: &'a mut [u8],
135 ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
136 use core::cmp::min;
137
138 let prev = self.parser.saved();
139 match self.unread {
140 0 if prev == 0 => return Ok(None),
141 0 => return Err(Error::Syntax(self.offset)),
142 _ => (),
143 }
144
145 // Determine how many bytes to read.
146 let size = min(buffer.len(), prev + self.unread);
147 let full = &mut buffer[..size];
148 let next = &mut full[min(size, prev)..];
149
150 // Read additional bytes.
151 self.reader.read_exact(next)?;
152 self.unread -= next.len();
153
154 self.parser
155 .parse(full)
156 .or(Err(Error::Syntax(self.offset)))
157 .map(Some)
158 }
159}
160
161/// A sequence of CBOR segments
162///
163/// CBOR allows for bytes or text items to be segmented. This type represents
164/// the state of that segmented input stream.
165pub struct Segments<'r, R: Read, P: Parser> {
166 reader: &'r mut Decoder<R>,
167 finish: bool,
168 nested: usize,
169 parser: PhantomData<P>,
170 unwrap: fn(Header) -> Result<Option<usize>, ()>,
171}
172
173impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
174 #[inline]
175 pub(crate) fn new(
176 decoder: &'r mut Decoder<R>,
177 unwrap: fn(Header) -> Result<Option<usize>, ()>,
178 ) -> Self {
179 Self {
180 reader: decoder,
181 finish: false,
182 nested: 0,
183 parser: PhantomData,
184 unwrap,
185 }
186 }
187
188 /// Gets the next segment in the stream
189 ///
190 /// Returns `Ok(None)` at the conclusion of the stream.
191 #[inline]
192 pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
193 while !self.finish {
194 let offset = self.reader.offset();
195 match self.reader.pull()? {
196 Header::Break if self.nested == 1 => return Ok(None),
197 Header::Break if self.nested > 1 => self.nested -= 1,
198 header => match (self.unwrap)(header) {
199 Err(..) => return Err(Error::Syntax(offset)),
200 Ok(None) => self.nested += 1,
201 Ok(Some(len)) => {
202 self.finish = self.nested == 0;
203 return Ok(Some(Segment {
204 reader: self.reader,
205 unread: len,
206 offset,
207 parser: P::default(),
208 }));
209 }
210 },
211 }
212 }
213
214 Ok(None)
215 }
216}
217