| 1 | use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE}; |
| 2 | use std::{cmp, fmt, io}; |
| 3 | |
| 4 | // This should be large, but it has to fit on the stack. |
| 5 | pub(crate) const BUF_SIZE: usize = 1024; |
| 6 | |
| 7 | // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). |
| 8 | const BASE64_CHUNK_SIZE: usize = 4; |
| 9 | const DECODED_CHUNK_SIZE: usize = 3; |
| 10 | |
| 11 | /// A `Read` implementation that decodes base64 data read from an underlying reader. |
| 12 | /// |
| 13 | /// # Examples |
| 14 | /// |
| 15 | /// ``` |
| 16 | /// use std::io::Read; |
| 17 | /// use std::io::Cursor; |
| 18 | /// use base64::engine::general_purpose; |
| 19 | /// |
| 20 | /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. |
| 21 | /// let mut wrapped_reader = Cursor::new(b"YXNkZg==" ); |
| 22 | /// let mut decoder = base64::read::DecoderReader::new( |
| 23 | /// &mut wrapped_reader, |
| 24 | /// &general_purpose::STANDARD); |
| 25 | /// |
| 26 | /// // handle errors as you normally would |
| 27 | /// let mut result = Vec::new(); |
| 28 | /// decoder.read_to_end(&mut result).unwrap(); |
| 29 | /// |
| 30 | /// assert_eq!(b"asdf" , &result[..]); |
| 31 | /// |
| 32 | /// ``` |
| 33 | pub struct DecoderReader<'e, E: Engine, R: io::Read> { |
| 34 | engine: &'e E, |
| 35 | /// Where b64 data is read from |
| 36 | inner: R, |
| 37 | |
| 38 | /// Holds b64 data read from the delegate reader. |
| 39 | b64_buffer: [u8; BUF_SIZE], |
| 40 | /// The start of the pending buffered data in `b64_buffer`. |
| 41 | b64_offset: usize, |
| 42 | /// The amount of buffered b64 data after `b64_offset` in `b64_len`. |
| 43 | b64_len: usize, |
| 44 | /// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a |
| 45 | /// decoded chunk in to, we have to be able to hang on to a few decoded bytes. |
| 46 | /// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to |
| 47 | /// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest |
| 48 | /// into here, which seems like a lot of complexity for 1 extra byte of storage. |
| 49 | decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE], |
| 50 | /// Index of start of decoded data in `decoded_chunk_buffer` |
| 51 | decoded_offset: usize, |
| 52 | /// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer` |
| 53 | decoded_len: usize, |
| 54 | /// Input length consumed so far. |
| 55 | /// Used to provide accurate offsets in errors |
| 56 | input_consumed_len: usize, |
| 57 | /// offset of previously seen padding, if any |
| 58 | padding_offset: Option<usize>, |
| 59 | } |
| 60 | |
| 61 | // exclude b64_buffer as it's uselessly large |
| 62 | impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { |
| 63 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 64 | f&mut DebugStruct<'_, '_>.debug_struct("DecoderReader" ) |
| 65 | .field("b64_offset" , &self.b64_offset) |
| 66 | .field("b64_len" , &self.b64_len) |
| 67 | .field("decoded_chunk_buffer" , &self.decoded_chunk_buffer) |
| 68 | .field("decoded_offset" , &self.decoded_offset) |
| 69 | .field("decoded_len" , &self.decoded_len) |
| 70 | .field("input_consumed_len" , &self.input_consumed_len) |
| 71 | .field(name:"padding_offset" , &self.padding_offset) |
| 72 | .finish() |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { |
| 77 | /// Create a new decoder that will read from the provided reader `r`. |
| 78 | pub fn new(reader: R, engine: &'e E) -> Self { |
| 79 | DecoderReader { |
| 80 | engine, |
| 81 | inner: reader, |
| 82 | b64_buffer: [0; BUF_SIZE], |
| 83 | b64_offset: 0, |
| 84 | b64_len: 0, |
| 85 | decoded_chunk_buffer: [0; DECODED_CHUNK_SIZE], |
| 86 | decoded_offset: 0, |
| 87 | decoded_len: 0, |
| 88 | input_consumed_len: 0, |
| 89 | padding_offset: None, |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | /// Write as much as possible of the decoded buffer into the target buffer. |
| 94 | /// Must only be called when there is something to write and space to write into. |
| 95 | /// Returns a Result with the number of (decoded) bytes copied. |
| 96 | fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
| 97 | debug_assert!(self.decoded_len > 0); |
| 98 | debug_assert!(!buf.is_empty()); |
| 99 | |
| 100 | let copy_len = cmp::min(self.decoded_len, buf.len()); |
| 101 | debug_assert!(copy_len > 0); |
| 102 | debug_assert!(copy_len <= self.decoded_len); |
| 103 | |
| 104 | buf[..copy_len].copy_from_slice( |
| 105 | &self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len], |
| 106 | ); |
| 107 | |
| 108 | self.decoded_offset += copy_len; |
| 109 | self.decoded_len -= copy_len; |
| 110 | |
| 111 | debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); |
| 112 | |
| 113 | Ok(copy_len) |
| 114 | } |
| 115 | |
| 116 | /// Read into the remaining space in the buffer after the current contents. |
| 117 | /// Must only be called when there is space to read into in the buffer. |
| 118 | /// Returns the number of bytes read. |
| 119 | fn read_from_delegate(&mut self) -> io::Result<usize> { |
| 120 | debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); |
| 121 | |
| 122 | let read = self |
| 123 | .inner |
| 124 | .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; |
| 125 | self.b64_len += read; |
| 126 | |
| 127 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
| 128 | |
| 129 | Ok(read) |
| 130 | } |
| 131 | |
| 132 | /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the |
| 133 | /// caller's responsibility to choose the number of b64 bytes to decode correctly. |
| 134 | /// |
| 135 | /// Returns a Result with the number of decoded bytes written to `buf`. |
| 136 | /// |
| 137 | /// # Panics |
| 138 | /// |
| 139 | /// panics if `buf` is too small |
| 140 | fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> { |
| 141 | debug_assert!(self.b64_len >= b64_len_to_decode); |
| 142 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
| 143 | debug_assert!(!buf.is_empty()); |
| 144 | |
| 145 | let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode]; |
| 146 | let decode_metadata = self |
| 147 | .engine |
| 148 | .internal_decode( |
| 149 | b64_to_decode, |
| 150 | buf, |
| 151 | self.engine.internal_decoded_len_estimate(b64_len_to_decode), |
| 152 | ) |
| 153 | .map_err(|dse| match dse { |
| 154 | DecodeSliceError::DecodeError(de) => { |
| 155 | match de { |
| 156 | DecodeError::InvalidByte(offset, byte) => { |
| 157 | match (byte, self.padding_offset) { |
| 158 | // if there was padding in a previous block of decoding that happened to |
| 159 | // be correct, and we now find more padding that happens to be incorrect, |
| 160 | // to be consistent with non-reader decodes, record the error at the first |
| 161 | // padding |
| 162 | (PAD_BYTE, Some(first_pad_offset)) => { |
| 163 | DecodeError::InvalidByte(first_pad_offset, PAD_BYTE) |
| 164 | } |
| 165 | _ => { |
| 166 | DecodeError::InvalidByte(self.input_consumed_len + offset, byte) |
| 167 | } |
| 168 | } |
| 169 | } |
| 170 | DecodeError::InvalidLength(len) => { |
| 171 | DecodeError::InvalidLength(self.input_consumed_len + len) |
| 172 | } |
| 173 | DecodeError::InvalidLastSymbol(offset, byte) => { |
| 174 | DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte) |
| 175 | } |
| 176 | DecodeError::InvalidPadding => DecodeError::InvalidPadding, |
| 177 | } |
| 178 | } |
| 179 | DecodeSliceError::OutputSliceTooSmall => { |
| 180 | unreachable!("buf is sized correctly in calling code" ) |
| 181 | } |
| 182 | }) |
| 183 | .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; |
| 184 | |
| 185 | if let Some(offset) = self.padding_offset { |
| 186 | // we've already seen padding |
| 187 | if decode_metadata.decoded_len > 0 { |
| 188 | // we read more after already finding padding; report error at first padding byte |
| 189 | return Err(io::Error::new( |
| 190 | io::ErrorKind::InvalidData, |
| 191 | DecodeError::InvalidByte(offset, PAD_BYTE), |
| 192 | )); |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | self.padding_offset = self.padding_offset.or(decode_metadata |
| 197 | .padding_offset |
| 198 | .map(|offset| self.input_consumed_len + offset)); |
| 199 | self.input_consumed_len += b64_len_to_decode; |
| 200 | self.b64_offset += b64_len_to_decode; |
| 201 | self.b64_len -= b64_len_to_decode; |
| 202 | |
| 203 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
| 204 | |
| 205 | Ok(decode_metadata.decoded_len) |
| 206 | } |
| 207 | |
| 208 | /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded |
| 209 | /// input from. |
| 210 | /// |
| 211 | /// Because `DecoderReader` performs internal buffering, the state of the inner reader is |
| 212 | /// unspecified. This function is mainly provided because the inner reader type may provide |
| 213 | /// additional functionality beyond the `Read` implementation which may still be useful. |
| 214 | pub fn into_inner(self) -> R { |
| 215 | self.inner |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { |
| 220 | /// Decode input from the wrapped reader. |
| 221 | /// |
| 222 | /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes |
| 223 | /// written in `buf`. |
| 224 | /// |
| 225 | /// Where possible, this function buffers base64 to minimize the number of read() calls to the |
| 226 | /// delegate reader. |
| 227 | /// |
| 228 | /// # Errors |
| 229 | /// |
| 230 | /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid |
| 231 | /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. |
| 232 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
| 233 | if buf.is_empty() { |
| 234 | return Ok(0); |
| 235 | } |
| 236 | |
| 237 | // offset == BUF_SIZE when we copied it all last time |
| 238 | debug_assert!(self.b64_offset <= BUF_SIZE); |
| 239 | debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); |
| 240 | debug_assert!(if self.b64_offset == BUF_SIZE { |
| 241 | self.b64_len == 0 |
| 242 | } else { |
| 243 | self.b64_len <= BUF_SIZE |
| 244 | }); |
| 245 | |
| 246 | debug_assert!(if self.decoded_len == 0 { |
| 247 | // can be = when we were able to copy the complete chunk |
| 248 | self.decoded_offset <= DECODED_CHUNK_SIZE |
| 249 | } else { |
| 250 | self.decoded_offset < DECODED_CHUNK_SIZE |
| 251 | }); |
| 252 | |
| 253 | // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one |
| 254 | // byte into the provided buf, so the effective length should only be 3 momentarily between |
| 255 | // when we decode and when we copy into the target buffer. |
| 256 | debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); |
| 257 | debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); |
| 258 | |
| 259 | if self.decoded_len > 0 { |
| 260 | // we have a few leftover decoded bytes; flush that rather than pull in more b64 |
| 261 | self.flush_decoded_buf(buf) |
| 262 | } else { |
| 263 | let mut at_eof = false; |
| 264 | while self.b64_len < BASE64_CHUNK_SIZE { |
| 265 | // Copy any bytes we have to the start of the buffer. |
| 266 | self.b64_buffer |
| 267 | .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0); |
| 268 | self.b64_offset = 0; |
| 269 | |
| 270 | // then fill in more data |
| 271 | let read = self.read_from_delegate()?; |
| 272 | if read == 0 { |
| 273 | // we never read into an empty buf, so 0 => we've hit EOF |
| 274 | at_eof = true; |
| 275 | break; |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | if self.b64_len == 0 { |
| 280 | debug_assert!(at_eof); |
| 281 | // we must be at EOF, and we have no data left to decode |
| 282 | return Ok(0); |
| 283 | }; |
| 284 | |
| 285 | debug_assert!(if at_eof { |
| 286 | // if we are at eof, we may not have a complete chunk |
| 287 | self.b64_len > 0 |
| 288 | } else { |
| 289 | // otherwise, we must have at least one chunk |
| 290 | self.b64_len >= BASE64_CHUNK_SIZE |
| 291 | }); |
| 292 | |
| 293 | debug_assert_eq!(0, self.decoded_len); |
| 294 | |
| 295 | if buf.len() < DECODED_CHUNK_SIZE { |
| 296 | // caller requested an annoyingly short read |
| 297 | // have to write to a tmp buf first to avoid double mutable borrow |
| 298 | let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; |
| 299 | // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have |
| 300 | // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 |
| 301 | // tokens, not 1, since 1 token can't decode to 1 byte). |
| 302 | let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); |
| 303 | |
| 304 | let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; |
| 305 | self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); |
| 306 | |
| 307 | self.decoded_offset = 0; |
| 308 | self.decoded_len = decoded; |
| 309 | |
| 310 | // can be less than 3 on last block due to padding |
| 311 | debug_assert!(decoded <= 3); |
| 312 | |
| 313 | self.flush_decoded_buf(buf) |
| 314 | } else { |
| 315 | let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) |
| 316 | .checked_mul(BASE64_CHUNK_SIZE) |
| 317 | .expect("too many chunks" ); |
| 318 | debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); |
| 319 | |
| 320 | let b64_bytes_available_to_decode = if at_eof { |
| 321 | self.b64_len |
| 322 | } else { |
| 323 | // only use complete chunks |
| 324 | self.b64_len - self.b64_len % 4 |
| 325 | }; |
| 326 | |
| 327 | let actual_decode_len = cmp::min( |
| 328 | b64_bytes_that_can_decode_into_buf, |
| 329 | b64_bytes_available_to_decode, |
| 330 | ); |
| 331 | self.decode_to_buf(actual_decode_len, buf) |
| 332 | } |
| 333 | } |
| 334 | } |
| 335 | } |
| 336 | |