| 1 | use crate::engine::Engine; |
| 2 | use std::{ |
| 3 | cmp, fmt, io, |
| 4 | io::{ErrorKind, Result}, |
| 5 | }; |
| 6 | |
| 7 | pub(crate) const BUF_SIZE: usize = 1024; |
| 8 | /// The most bytes whose encoding will fit in `BUF_SIZE` |
| 9 | const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3; |
| 10 | // 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping) |
| 11 | const MIN_ENCODE_CHUNK_SIZE: usize = 3; |
| 12 | |
| 13 | /// A `Write` implementation that base64 encodes data before delegating to the wrapped writer. |
| 14 | /// |
| 15 | /// Because base64 has special handling for the end of the input data (padding, etc), there's a |
| 16 | /// `finish()` method on this type that encodes any leftover input bytes and adds padding if |
| 17 | /// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but |
| 18 | /// any error that occurs when invoking the underlying writer will be suppressed. If you want to |
| 19 | /// handle such errors, call `finish()` yourself. |
| 20 | /// |
| 21 | /// # Examples |
| 22 | /// |
| 23 | /// ``` |
| 24 | /// use std::io::Write; |
| 25 | /// use base64::engine::general_purpose; |
| 26 | /// |
| 27 | /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc. |
| 28 | /// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD); |
| 29 | /// |
| 30 | /// // handle errors as you normally would |
| 31 | /// enc.write_all(b"asdf" ).unwrap(); |
| 32 | /// |
| 33 | /// // could leave this out to be called by Drop, if you don't care |
| 34 | /// // about handling errors or getting the delegate writer back |
| 35 | /// let delegate = enc.finish().unwrap(); |
| 36 | /// |
| 37 | /// // base64 was written to the writer |
| 38 | /// assert_eq!(b"YXNkZg==" , &delegate[..]); |
| 39 | /// |
| 40 | /// ``` |
| 41 | /// |
| 42 | /// # Panics |
| 43 | /// |
| 44 | /// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without |
| 45 | /// error is invalid and will panic. |
| 46 | /// |
| 47 | /// # Errors |
| 48 | /// |
| 49 | /// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be |
| 50 | /// returned as per the contract of `Write`. |
| 51 | /// |
| 52 | /// # Performance |
| 53 | /// |
| 54 | /// It has some minor performance loss compared to encoding slices (a couple percent). |
| 55 | /// It does not do any heap allocation. |
| 56 | /// |
| 57 | /// # Limitations |
| 58 | /// |
| 59 | /// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their |
| 60 | /// implications for a buffering implementation, these methods may not behave as expected. In |
| 61 | /// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`. |
| 62 | /// See the documentation of the `Write` trait implementation for further details. |
| 63 | pub struct EncoderWriter<'e, E: Engine, W: io::Write> { |
| 64 | engine: &'e E, |
| 65 | /// Where encoded data is written to. It's an Option as it's None immediately before Drop is |
| 66 | /// called so that finish() can return the underlying writer. None implies that finish() has |
| 67 | /// been called successfully. |
| 68 | delegate: Option<W>, |
| 69 | /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk |
| 70 | /// with the next `write()`, encode it, then proceed with the rest of the input normally. |
| 71 | extra_input: [u8; MIN_ENCODE_CHUNK_SIZE], |
| 72 | /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`. |
| 73 | extra_input_occupied_len: usize, |
| 74 | /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer |
| 75 | /// did not write last time. |
| 76 | output: [u8; BUF_SIZE], |
| 77 | /// How much of `output` is occupied with encoded data that couldn't be written last time |
| 78 | output_occupied_len: usize, |
| 79 | /// panic safety: don't write again in destructor if writer panicked while we were writing to it |
| 80 | panicked: bool, |
| 81 | } |
| 82 | |
| 83 | impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> { |
| 84 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 85 | write!( |
| 86 | f, |
| 87 | "extra_input: {:?} extra_input_occupied_len: {:?} output[..5]: {:?} output_occupied_len: {:?}" , |
| 88 | self.extra_input, |
| 89 | self.extra_input_occupied_len, |
| 90 | &self.output[0..5], |
| 91 | self.output_occupied_len |
| 92 | ) |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> { |
| 97 | /// Create a new encoder that will write to the provided delegate writer. |
| 98 | pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> { |
| 99 | EncoderWriter { |
| 100 | engine, |
| 101 | delegate: Some(delegate), |
| 102 | extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE], |
| 103 | extra_input_occupied_len: 0, |
| 104 | output: [0u8; BUF_SIZE], |
| 105 | output_occupied_len: 0, |
| 106 | panicked: false, |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | /// Encode all remaining buffered data and write it, including any trailing incomplete input |
| 111 | /// triples and associated padding. |
| 112 | /// |
| 113 | /// Once this succeeds, no further writes or calls to this method are allowed. |
| 114 | /// |
| 115 | /// This may write to the delegate writer multiple times if the delegate writer does not accept |
| 116 | /// all input provided to its `write` each invocation. |
| 117 | /// |
| 118 | /// If you don't care about error handling, it is not necessary to call this function, as the |
| 119 | /// equivalent finalization is done by the Drop impl. |
| 120 | /// |
| 121 | /// Returns the writer that this was constructed around. |
| 122 | /// |
| 123 | /// # Errors |
| 124 | /// |
| 125 | /// The first error that is not of `ErrorKind::Interrupted` will be returned. |
| 126 | pub fn finish(&mut self) -> Result<W> { |
| 127 | // If we could consume self in finish(), we wouldn't have to worry about this case, but |
| 128 | // finish() is retryable in the face of I/O errors, so we can't consume here. |
| 129 | if self.delegate.is_none() { |
| 130 | panic!("Encoder has already had finish() called" ); |
| 131 | }; |
| 132 | |
| 133 | self.write_final_leftovers()?; |
| 134 | |
| 135 | let writer = self.delegate.take().expect("Writer must be present" ); |
| 136 | |
| 137 | Ok(writer) |
| 138 | } |
| 139 | |
| 140 | /// Write any remaining buffered data to the delegate writer. |
| 141 | fn write_final_leftovers(&mut self) -> Result<()> { |
| 142 | if self.delegate.is_none() { |
| 143 | // finish() has already successfully called this, and we are now in drop() with a None |
| 144 | // writer, so just no-op |
| 145 | return Ok(()); |
| 146 | } |
| 147 | |
| 148 | self.write_all_encoded_output()?; |
| 149 | |
| 150 | if self.extra_input_occupied_len > 0 { |
| 151 | let encoded_len = self |
| 152 | .engine |
| 153 | .encode_slice( |
| 154 | &self.extra_input[..self.extra_input_occupied_len], |
| 155 | &mut self.output[..], |
| 156 | ) |
| 157 | .expect("buffer is large enough" ); |
| 158 | |
| 159 | self.output_occupied_len = encoded_len; |
| 160 | |
| 161 | self.write_all_encoded_output()?; |
| 162 | |
| 163 | // write succeeded, do not write the encoding of extra again if finish() is retried |
| 164 | self.extra_input_occupied_len = 0; |
| 165 | } |
| 166 | |
| 167 | Ok(()) |
| 168 | } |
| 169 | |
| 170 | /// Write as much of the encoded output to the delegate writer as it will accept, and store the |
| 171 | /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`. |
| 172 | /// |
| 173 | /// # Errors |
| 174 | /// |
| 175 | /// Errors from the delegate writer are returned. In the case of an error, |
| 176 | /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean |
| 177 | /// that no write took place. |
| 178 | fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> { |
| 179 | self.panicked = true; |
| 180 | let res = self |
| 181 | .delegate |
| 182 | .as_mut() |
| 183 | .expect("Writer must be present" ) |
| 184 | .write(&self.output[..current_output_len]); |
| 185 | self.panicked = false; |
| 186 | |
| 187 | res.map(|consumed| { |
| 188 | debug_assert!(consumed <= current_output_len); |
| 189 | |
| 190 | if consumed < current_output_len { |
| 191 | self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap(); |
| 192 | // If we're blocking on I/O, the minor inefficiency of copying bytes to the |
| 193 | // start of the buffer is the least of our concerns... |
| 194 | // TODO Rotate moves more than we need to; copy_within now stable. |
| 195 | self.output.rotate_left(consumed); |
| 196 | } else { |
| 197 | self.output_occupied_len = 0; |
| 198 | } |
| 199 | }) |
| 200 | } |
| 201 | |
| 202 | /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`. |
| 203 | /// |
| 204 | /// This is basically write_all for the remaining buffered data but without the undesirable |
| 205 | /// abort-on-`Ok(0)` behavior. |
| 206 | /// |
| 207 | /// # Errors |
| 208 | /// |
| 209 | /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's |
| 210 | /// `Interrupted`, in which case the error is ignored and writes will continue. |
| 211 | fn write_all_encoded_output(&mut self) -> Result<()> { |
| 212 | while self.output_occupied_len > 0 { |
| 213 | let remaining_len = self.output_occupied_len; |
| 214 | match self.write_to_delegate(remaining_len) { |
| 215 | // try again on interrupts ala write_all |
| 216 | Err(ref e) if e.kind() == ErrorKind::Interrupted => {} |
| 217 | // other errors return |
| 218 | Err(e) => return Err(e), |
| 219 | // success no-ops because remaining length is already updated |
| 220 | Ok(_) => {} |
| 221 | }; |
| 222 | } |
| 223 | |
| 224 | debug_assert_eq!(0, self.output_occupied_len); |
| 225 | Ok(()) |
| 226 | } |
| 227 | |
| 228 | /// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output |
| 229 | /// to. |
| 230 | /// |
| 231 | /// Normally this method should not be needed, since `finish()` returns the inner writer if |
| 232 | /// it completes successfully. That will also ensure all data has been flushed, which the |
| 233 | /// `into_inner()` function does *not* do. |
| 234 | /// |
| 235 | /// Calling this method after `finish()` has completed successfully will panic, since the |
| 236 | /// writer has already been returned. |
| 237 | /// |
| 238 | /// This method may be useful if the writer implements additional APIs beyond the `Write` |
| 239 | /// trait. Note that the inner writer might be in an error state or have an incomplete |
| 240 | /// base64 string written to it. |
| 241 | pub fn into_inner(mut self) -> W { |
| 242 | self.delegate |
| 243 | .take() |
| 244 | .expect("Encoder has already had finish() called" ) |
| 245 | } |
| 246 | } |
| 247 | |
| 248 | impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> { |
| 249 | /// Encode input and then write to the delegate writer. |
| 250 | /// |
| 251 | /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes |
| 252 | /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which |
| 253 | /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See |
| 254 | /// <https://github.com/rust-lang/rust/issues/56889> for more on that. |
| 255 | /// |
| 256 | /// If the previous call to `write` provided more (encoded) data than the delegate writer could |
| 257 | /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered |
| 258 | /// data is present, subsequent calls to `write` will try to write the remaining buffered data |
| 259 | /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or |
| 260 | /// an error. |
| 261 | /// |
| 262 | /// # Errors |
| 263 | /// |
| 264 | /// Any errors emitted by the delegate writer are returned. |
| 265 | fn write(&mut self, input: &[u8]) -> Result<usize> { |
| 266 | if self.delegate.is_none() { |
| 267 | panic!("Cannot write more after calling finish()" ); |
| 268 | } |
| 269 | |
| 270 | if input.is_empty() { |
| 271 | return Ok(0); |
| 272 | } |
| 273 | |
| 274 | // The contract of `Write::write` places some constraints on this implementation: |
| 275 | // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't |
| 276 | // iterate over the input and encode multiple chunks. |
| 277 | // - Errors mean that "no bytes were written to this writer", so we need to reset the |
| 278 | // internal state to what it was before the error occurred |
| 279 | |
| 280 | // before reading any input, write any leftover encoded output from last time |
| 281 | if self.output_occupied_len > 0 { |
| 282 | let current_len = self.output_occupied_len; |
| 283 | return self |
| 284 | .write_to_delegate(current_len) |
| 285 | // did not read any input |
| 286 | .map(|_| 0); |
| 287 | } |
| 288 | |
| 289 | debug_assert_eq!(0, self.output_occupied_len); |
| 290 | |
| 291 | // how many bytes, if any, were read into `extra` to create a triple to encode |
| 292 | let mut extra_input_read_len = 0; |
| 293 | let mut input = input; |
| 294 | |
| 295 | let orig_extra_len = self.extra_input_occupied_len; |
| 296 | |
| 297 | let mut encoded_size = 0; |
| 298 | // always a multiple of MIN_ENCODE_CHUNK_SIZE |
| 299 | let mut max_input_len = MAX_INPUT_LEN; |
| 300 | |
| 301 | // process leftover un-encoded input from last write |
| 302 | if self.extra_input_occupied_len > 0 { |
| 303 | debug_assert!(self.extra_input_occupied_len < 3); |
| 304 | if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE { |
| 305 | // Fill up `extra`, encode that into `output`, and consume as much of the rest of |
| 306 | // `input` as possible. |
| 307 | // We could write just the encoding of `extra` by itself but then we'd have to |
| 308 | // return after writing only 4 bytes, which is inefficient if the underlying writer |
| 309 | // would make a syscall. |
| 310 | extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len; |
| 311 | debug_assert!(extra_input_read_len > 0); |
| 312 | // overwrite only bytes that weren't already used. If we need to rollback extra_len |
| 313 | // (when the subsequent write errors), the old leading bytes will still be there. |
| 314 | self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE] |
| 315 | .copy_from_slice(&input[0..extra_input_read_len]); |
| 316 | |
| 317 | let len = self.engine.internal_encode( |
| 318 | &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE], |
| 319 | &mut self.output[..], |
| 320 | ); |
| 321 | debug_assert_eq!(4, len); |
| 322 | |
| 323 | input = &input[extra_input_read_len..]; |
| 324 | |
| 325 | // consider extra to be used up, since we encoded it |
| 326 | self.extra_input_occupied_len = 0; |
| 327 | // don't clobber where we just encoded to |
| 328 | encoded_size = 4; |
| 329 | // and don't read more than can be encoded |
| 330 | max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE; |
| 331 | |
| 332 | // fall through to normal encoding |
| 333 | } else { |
| 334 | // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be |
| 335 | // 1 byte in each. |
| 336 | debug_assert_eq!(1, input.len()); |
| 337 | debug_assert_eq!(1, self.extra_input_occupied_len); |
| 338 | |
| 339 | self.extra_input[self.extra_input_occupied_len] = input[0]; |
| 340 | self.extra_input_occupied_len += 1; |
| 341 | return Ok(1); |
| 342 | }; |
| 343 | } else if input.len() < MIN_ENCODE_CHUNK_SIZE { |
| 344 | // `extra` is empty, and `input` fits inside it |
| 345 | self.extra_input[0..input.len()].copy_from_slice(input); |
| 346 | self.extra_input_occupied_len = input.len(); |
| 347 | return Ok(input.len()); |
| 348 | }; |
| 349 | |
| 350 | // either 0 or 1 complete chunks encoded from extra |
| 351 | debug_assert!(encoded_size == 0 || encoded_size == 4); |
| 352 | debug_assert!( |
| 353 | // didn't encode extra input |
| 354 | MAX_INPUT_LEN == max_input_len |
| 355 | // encoded one triple |
| 356 | || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE |
| 357 | ); |
| 358 | |
| 359 | // encode complete triples only |
| 360 | let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE); |
| 361 | let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len); |
| 362 | debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE); |
| 363 | debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE); |
| 364 | |
| 365 | encoded_size += self.engine.internal_encode( |
| 366 | &input[..(input_chunks_to_encode_len)], |
| 367 | &mut self.output[encoded_size..], |
| 368 | ); |
| 369 | |
| 370 | // not updating `self.output_occupied_len` here because if the below write fails, it should |
| 371 | // "never take place" -- the buffer contents we encoded are ignored and perhaps retried |
| 372 | // later, if the consumer chooses. |
| 373 | |
| 374 | self.write_to_delegate(encoded_size) |
| 375 | // no matter whether we wrote the full encoded buffer or not, we consumed the same |
| 376 | // input |
| 377 | .map(|_| extra_input_read_len + input_chunks_to_encode_len) |
| 378 | .map_err(|e| { |
| 379 | // in case we filled and encoded `extra`, reset extra_len |
| 380 | self.extra_input_occupied_len = orig_extra_len; |
| 381 | |
| 382 | e |
| 383 | }) |
| 384 | } |
| 385 | |
| 386 | /// Because this is usually treated as OK to call multiple times, it will *not* flush any |
| 387 | /// incomplete chunks of input or write padding. |
| 388 | /// # Errors |
| 389 | /// |
| 390 | /// The first error that is not of [`ErrorKind::Interrupted`] will be returned. |
| 391 | fn flush(&mut self) -> Result<()> { |
| 392 | self.write_all_encoded_output()?; |
| 393 | self.delegate |
| 394 | .as_mut() |
| 395 | .expect("Writer must be present" ) |
| 396 | .flush() |
| 397 | } |
| 398 | } |
| 399 | |
| 400 | impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> { |
| 401 | fn drop(&mut self) { |
| 402 | if !self.panicked { |
| 403 | // like `BufWriter`, ignore errors during drop |
| 404 | let _ = self.write_final_leftovers(); |
| 405 | } |
| 406 | } |
| 407 | } |
| 408 | |