1 | use super::*; |
2 | |
3 | use ciborium_io::Read; |
4 | |
5 | use core::marker::PhantomData; |
6 | |
7 | /// A parser for incoming segments |
8 | pub trait Parser: Default { |
9 | /// The type of item that is parsed |
10 | type Item: ?Sized; |
11 | |
12 | /// The parsing error that may occur |
13 | type Error; |
14 | |
15 | /// The main parsing function |
16 | /// |
17 | /// This function processes the incoming bytes and returns the item. |
18 | /// |
19 | /// One important detail that **MUST NOT** be overlooked is that the |
20 | /// parser may save data from a previous parsing attempt. The number of |
21 | /// bytes saved is indicated by the `Parser::saved()` function. The saved |
22 | /// bytes will be copied into the beginning of the `bytes` array before |
23 | /// processing. Therefore, two requirements should be met. |
24 | /// |
25 | /// First, the incoming byte slice should be larger than the saved bytes. |
26 | /// |
27 | /// Second, the incoming byte slice should contain new bytes only after |
28 | /// the saved byte prefix. |
29 | /// |
30 | /// If both criteria are met, this allows the parser to prepend its saved |
31 | /// bytes without any additional allocation. |
32 | fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>; |
33 | |
34 | /// Indicates the number of saved bytes in the parser |
35 | fn saved(&self) -> usize { |
36 | 0 |
37 | } |
38 | } |
39 | |
40 | /// A bytes parser |
41 | /// |
42 | /// No actual processing is performed and the input bytes are directly |
43 | /// returned. This implies that this parser never saves any bytes internally. |
44 | #[derive(Default)] |
45 | pub struct Bytes(()); |
46 | |
47 | impl Parser for Bytes { |
48 | type Item = [u8]; |
49 | type Error = core::convert::Infallible; |
50 | |
51 | fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> { |
52 | Ok(bytes) |
53 | } |
54 | } |
55 | |
56 | /// A text parser |
57 | /// |
58 | /// This parser converts the input bytes to a `str`. This parser preserves |
59 | /// trailing invalid UTF-8 sequences in the case that chunking fell in the |
60 | /// middle of a valid UTF-8 character. |
61 | #[derive(Default)] |
62 | pub struct Text { |
63 | stored: usize, |
64 | buffer: [u8; 3], |
65 | } |
66 | |
67 | impl Parser for Text { |
68 | type Item = str; |
69 | type Error = core::str::Utf8Error; |
70 | |
71 | fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> { |
72 | // If we cannot advance, return nothing. |
73 | if bytes.len() <= self.stored { |
74 | return Ok("" ); |
75 | } |
76 | |
77 | // Copy previously invalid data into place. |
78 | bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]); |
79 | |
80 | Ok(match core::str::from_utf8(bytes) { |
81 | Ok(s) => { |
82 | self.stored = 0; |
83 | s |
84 | } |
85 | Err(e) => { |
86 | let valid_len = e.valid_up_to(); |
87 | let invalid_len = bytes.len() - valid_len; |
88 | |
89 | // If the size of the invalid UTF-8 is large enough to hold |
90 | // all valid UTF-8 characters, we have a syntax error. |
91 | if invalid_len > self.buffer.len() { |
92 | return Err(e); |
93 | } |
94 | |
95 | // Otherwise, store the invalid bytes for the next read cycle. |
96 | self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]); |
97 | self.stored = invalid_len; |
98 | |
99 | // Decode the valid part of the string. |
100 | core::str::from_utf8(&bytes[..valid_len]).unwrap() |
101 | } |
102 | }) |
103 | } |
104 | |
105 | fn saved(&self) -> usize { |
106 | self.stored |
107 | } |
108 | } |
109 | |
110 | /// A CBOR segment |
111 | /// |
112 | /// This type represents a single bytes or text segment on the wire. It can be |
113 | /// read out in parsed chunks based on the size of the input scratch buffer. |
114 | pub struct Segment<'r, R: Read, P: Parser> { |
115 | reader: &'r mut Decoder<R>, |
116 | unread: usize, |
117 | offset: usize, |
118 | parser: P, |
119 | } |
120 | |
121 | impl<'r, R: Read, P: Parser> Segment<'r, R, P> { |
122 | /// Gets the number of unprocessed bytes |
123 | #[inline ] |
124 | pub fn left(&self) -> usize { |
125 | self.unread + self.parser.saved() |
126 | } |
127 | |
128 | /// Gets the next parsed chunk within the segment |
129 | /// |
130 | /// Returns `Ok(None)` when all chunks have been read. |
131 | #[inline ] |
132 | pub fn pull<'a>( |
133 | &mut self, |
134 | buffer: &'a mut [u8], |
135 | ) -> Result<Option<&'a P::Item>, Error<R::Error>> { |
136 | use core::cmp::min; |
137 | |
138 | let prev = self.parser.saved(); |
139 | match self.unread { |
140 | 0 if prev == 0 => return Ok(None), |
141 | 0 => return Err(Error::Syntax(self.offset)), |
142 | _ => (), |
143 | } |
144 | |
145 | // Determine how many bytes to read. |
146 | let size = min(buffer.len(), prev + self.unread); |
147 | let full = &mut buffer[..size]; |
148 | let next = &mut full[min(size, prev)..]; |
149 | |
150 | // Read additional bytes. |
151 | self.reader.read_exact(next)?; |
152 | self.unread -= next.len(); |
153 | |
154 | self.parser |
155 | .parse(full) |
156 | .or(Err(Error::Syntax(self.offset))) |
157 | .map(Some) |
158 | } |
159 | } |
160 | |
161 | /// A sequence of CBOR segments |
162 | /// |
163 | /// CBOR allows for bytes or text items to be segmented. This type represents |
164 | /// the state of that segmented input stream. |
165 | pub struct Segments<'r, R: Read, P: Parser> { |
166 | reader: &'r mut Decoder<R>, |
167 | finish: bool, |
168 | nested: usize, |
169 | parser: PhantomData<P>, |
170 | unwrap: fn(Header) -> Result<Option<usize>, ()>, |
171 | } |
172 | |
173 | impl<'r, R: Read, P: Parser> Segments<'r, R, P> { |
174 | #[inline ] |
175 | pub(crate) fn new( |
176 | decoder: &'r mut Decoder<R>, |
177 | unwrap: fn(Header) -> Result<Option<usize>, ()>, |
178 | ) -> Self { |
179 | Self { |
180 | reader: decoder, |
181 | finish: false, |
182 | nested: 0, |
183 | parser: PhantomData, |
184 | unwrap, |
185 | } |
186 | } |
187 | |
188 | /// Gets the next segment in the stream |
189 | /// |
190 | /// Returns `Ok(None)` at the conclusion of the stream. |
191 | #[inline ] |
192 | pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> { |
193 | while !self.finish { |
194 | let offset = self.reader.offset(); |
195 | match self.reader.pull()? { |
196 | Header::Break if self.nested == 1 => return Ok(None), |
197 | Header::Break if self.nested > 1 => self.nested -= 1, |
198 | header => match (self.unwrap)(header) { |
199 | Err(..) => return Err(Error::Syntax(offset)), |
200 | Ok(None) => self.nested += 1, |
201 | Ok(Some(len)) => { |
202 | self.finish = self.nested == 0; |
203 | return Ok(Some(Segment { |
204 | reader: self.reader, |
205 | unread: len, |
206 | offset, |
207 | parser: P::default(), |
208 | })); |
209 | } |
210 | }, |
211 | } |
212 | } |
213 | |
214 | Ok(None) |
215 | } |
216 | } |
217 | |