| 1 | use super::*; |
| 2 | |
| 3 | use ciborium_io::Read; |
| 4 | |
| 5 | use core::marker::PhantomData; |
| 6 | |
| 7 | /// A parser for incoming segments |
| 8 | pub trait Parser: Default { |
| 9 | /// The type of item that is parsed |
| 10 | type Item: ?Sized; |
| 11 | |
| 12 | /// The parsing error that may occur |
| 13 | type Error; |
| 14 | |
| 15 | /// The main parsing function |
| 16 | /// |
| 17 | /// This function processes the incoming bytes and returns the item. |
| 18 | /// |
| 19 | /// One important detail that **MUST NOT** be overlooked is that the |
| 20 | /// parser may save data from a previous parsing attempt. The number of |
| 21 | /// bytes saved is indicated by the `Parser::saved()` function. The saved |
| 22 | /// bytes will be copied into the beginning of the `bytes` array before |
| 23 | /// processing. Therefore, two requirements should be met. |
| 24 | /// |
| 25 | /// First, the incoming byte slice should be larger than the saved bytes. |
| 26 | /// |
| 27 | /// Second, the incoming byte slice should contain new bytes only after |
| 28 | /// the saved byte prefix. |
| 29 | /// |
| 30 | /// If both criteria are met, this allows the parser to prepend its saved |
| 31 | /// bytes without any additional allocation. |
| 32 | fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>; |
| 33 | |
| 34 | /// Indicates the number of saved bytes in the parser |
| 35 | fn saved(&self) -> usize { |
| 36 | 0 |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | /// A bytes parser |
| 41 | /// |
| 42 | /// No actual processing is performed and the input bytes are directly |
| 43 | /// returned. This implies that this parser never saves any bytes internally. |
| 44 | #[derive(Default)] |
| 45 | pub struct Bytes(()); |
| 46 | |
| 47 | impl Parser for Bytes { |
| 48 | type Item = [u8]; |
| 49 | type Error = core::convert::Infallible; |
| 50 | |
| 51 | fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> { |
| 52 | Ok(bytes) |
| 53 | } |
| 54 | } |
| 55 | |
| 56 | /// A text parser |
| 57 | /// |
| 58 | /// This parser converts the input bytes to a `str`. This parser preserves |
| 59 | /// trailing invalid UTF-8 sequences in the case that chunking fell in the |
| 60 | /// middle of a valid UTF-8 character. |
| 61 | #[derive(Default)] |
| 62 | pub struct Text { |
| 63 | stored: usize, |
| 64 | buffer: [u8; 3], |
| 65 | } |
| 66 | |
| 67 | impl Parser for Text { |
| 68 | type Item = str; |
| 69 | type Error = core::str::Utf8Error; |
| 70 | |
| 71 | fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> { |
| 72 | // If we cannot advance, return nothing. |
| 73 | if bytes.len() <= self.stored { |
| 74 | return Ok("" ); |
| 75 | } |
| 76 | |
| 77 | // Copy previously invalid data into place. |
| 78 | bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]); |
| 79 | |
| 80 | Ok(match core::str::from_utf8(bytes) { |
| 81 | Ok(s) => { |
| 82 | self.stored = 0; |
| 83 | s |
| 84 | } |
| 85 | Err(e) => { |
| 86 | let valid_len = e.valid_up_to(); |
| 87 | let invalid_len = bytes.len() - valid_len; |
| 88 | |
| 89 | // If the size of the invalid UTF-8 is large enough to hold |
| 90 | // all valid UTF-8 characters, we have a syntax error. |
| 91 | if invalid_len > self.buffer.len() { |
| 92 | return Err(e); |
| 93 | } |
| 94 | |
| 95 | // Otherwise, store the invalid bytes for the next read cycle. |
| 96 | self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]); |
| 97 | self.stored = invalid_len; |
| 98 | |
| 99 | // Decode the valid part of the string. |
| 100 | core::str::from_utf8(&bytes[..valid_len]).unwrap() |
| 101 | } |
| 102 | }) |
| 103 | } |
| 104 | |
| 105 | fn saved(&self) -> usize { |
| 106 | self.stored |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | /// A CBOR segment |
| 111 | /// |
| 112 | /// This type represents a single bytes or text segment on the wire. It can be |
| 113 | /// read out in parsed chunks based on the size of the input scratch buffer. |
| 114 | pub struct Segment<'r, R: Read, P: Parser> { |
| 115 | reader: &'r mut Decoder<R>, |
| 116 | unread: usize, |
| 117 | offset: usize, |
| 118 | parser: P, |
| 119 | } |
| 120 | |
| 121 | impl<'r, R: Read, P: Parser> Segment<'r, R, P> { |
| 122 | /// Gets the number of unprocessed bytes |
| 123 | #[inline ] |
| 124 | pub fn left(&self) -> usize { |
| 125 | self.unread + self.parser.saved() |
| 126 | } |
| 127 | |
| 128 | /// Gets the next parsed chunk within the segment |
| 129 | /// |
| 130 | /// Returns `Ok(None)` when all chunks have been read. |
| 131 | #[inline ] |
| 132 | pub fn pull<'a>( |
| 133 | &mut self, |
| 134 | buffer: &'a mut [u8], |
| 135 | ) -> Result<Option<&'a P::Item>, Error<R::Error>> { |
| 136 | use core::cmp::min; |
| 137 | |
| 138 | let prev = self.parser.saved(); |
| 139 | match self.unread { |
| 140 | 0 if prev == 0 => return Ok(None), |
| 141 | 0 => return Err(Error::Syntax(self.offset)), |
| 142 | _ => (), |
| 143 | } |
| 144 | |
| 145 | // Determine how many bytes to read. |
| 146 | let size = min(buffer.len(), prev + self.unread); |
| 147 | let full = &mut buffer[..size]; |
| 148 | let next = &mut full[min(size, prev)..]; |
| 149 | |
| 150 | // Read additional bytes. |
| 151 | self.reader.read_exact(next)?; |
| 152 | self.unread -= next.len(); |
| 153 | |
| 154 | self.parser |
| 155 | .parse(full) |
| 156 | .or(Err(Error::Syntax(self.offset))) |
| 157 | .map(Some) |
| 158 | } |
| 159 | } |
| 160 | |
| 161 | /// A sequence of CBOR segments |
| 162 | /// |
| 163 | /// CBOR allows for bytes or text items to be segmented. This type represents |
| 164 | /// the state of that segmented input stream. |
| 165 | pub struct Segments<'r, R: Read, P: Parser> { |
| 166 | reader: &'r mut Decoder<R>, |
| 167 | finish: bool, |
| 168 | nested: usize, |
| 169 | parser: PhantomData<P>, |
| 170 | unwrap: fn(Header) -> Result<Option<usize>, ()>, |
| 171 | } |
| 172 | |
| 173 | impl<'r, R: Read, P: Parser> Segments<'r, R, P> { |
| 174 | #[inline ] |
| 175 | pub(crate) fn new( |
| 176 | decoder: &'r mut Decoder<R>, |
| 177 | unwrap: fn(Header) -> Result<Option<usize>, ()>, |
| 178 | ) -> Self { |
| 179 | Self { |
| 180 | reader: decoder, |
| 181 | finish: false, |
| 182 | nested: 0, |
| 183 | parser: PhantomData, |
| 184 | unwrap, |
| 185 | } |
| 186 | } |
| 187 | |
| 188 | /// Gets the next segment in the stream |
| 189 | /// |
| 190 | /// Returns `Ok(None)` at the conclusion of the stream. |
| 191 | #[inline ] |
| 192 | pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> { |
| 193 | while !self.finish { |
| 194 | let offset = self.reader.offset(); |
| 195 | match self.reader.pull()? { |
| 196 | Header::Break if self.nested == 1 => return Ok(None), |
| 197 | Header::Break if self.nested > 1 => self.nested -= 1, |
| 198 | header => match (self.unwrap)(header) { |
| 199 | Err(..) => return Err(Error::Syntax(offset)), |
| 200 | Ok(None) => self.nested += 1, |
| 201 | Ok(Some(len)) => { |
| 202 | self.finish = self.nested == 0; |
| 203 | return Ok(Some(Segment { |
| 204 | reader: self.reader, |
| 205 | unread: len, |
| 206 | offset, |
| 207 | parser: P::default(), |
| 208 | })); |
| 209 | } |
| 210 | }, |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | Ok(None) |
| 215 | } |
| 216 | } |
| 217 | |