| 1 | use std::cmp; |
| 2 | use std::io; |
| 3 | use std::io::prelude::*; |
| 4 | use std::mem; |
| 5 | |
| 6 | use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; |
| 7 | use crate::crc::CrcReader; |
| 8 | use crate::deflate; |
| 9 | use crate::Compression; |
| 10 | |
| 11 | fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { |
| 12 | let min: usize = cmp::min(v1:into.len(), v2:from.len() - *pos); |
| 13 | into[..min].copy_from_slice(&from[*pos..*pos + min]); |
| 14 | *pos += min; |
| 15 | min |
| 16 | } |
| 17 | |
| 18 | /// A gzip streaming encoder |
| 19 | /// |
| 20 | /// This structure implements a [`Read`] interface. When read from, it reads |
| 21 | /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. |
| 22 | /// |
| 23 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| 24 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| 25 | /// |
| 26 | /// # Examples |
| 27 | /// |
| 28 | /// ``` |
| 29 | /// use std::io::prelude::*; |
| 30 | /// use std::io; |
| 31 | /// use flate2::Compression; |
| 32 | /// use flate2::bufread::GzEncoder; |
| 33 | /// use std::fs::File; |
| 34 | /// use std::io::BufReader; |
| 35 | /// |
| 36 | /// // Opens sample file, compresses the contents and returns a Vector or error |
| 37 | /// // File wrapped in a BufReader implements BufRead |
| 38 | /// |
| 39 | /// fn open_hello_world() -> io::Result<Vec<u8>> { |
| 40 | /// let f = File::open("examples/hello_world.txt" )?; |
| 41 | /// let b = BufReader::new(f); |
| 42 | /// let mut gz = GzEncoder::new(b, Compression::fast()); |
| 43 | /// let mut buffer = Vec::new(); |
| 44 | /// gz.read_to_end(&mut buffer)?; |
| 45 | /// Ok(buffer) |
| 46 | /// } |
| 47 | /// ``` |
| 48 | #[derive (Debug)] |
| 49 | pub struct GzEncoder<R> { |
| 50 | inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, |
| 51 | header: Vec<u8>, |
| 52 | pos: usize, |
| 53 | eof: bool, |
| 54 | } |
| 55 | |
| 56 | pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
| 57 | let crc: CrcReader = CrcReader::new(r); |
| 58 | GzEncoder { |
| 59 | inner: deflate::bufread::DeflateEncoder::new(r:crc, level:lvl), |
| 60 | header, |
| 61 | pos: 0, |
| 62 | eof: false, |
| 63 | } |
| 64 | } |
| 65 | |
| 66 | impl<R: BufRead> GzEncoder<R> { |
| 67 | /// Creates a new encoder which will use the given compression level. |
| 68 | /// |
| 69 | /// The encoder is not configured specially for the emitted header. For |
| 70 | /// header configuration, see the `GzBuilder` type. |
| 71 | /// |
| 72 | /// The data read from the stream `r` will be compressed and available |
| 73 | /// through the returned reader. |
| 74 | pub fn new(r: R, level: Compression) -> GzEncoder<R> { |
| 75 | GzBuilder::new().buf_read(r, level) |
| 76 | } |
| 77 | |
| 78 | fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 79 | if self.pos == 8 { |
| 80 | return Ok(0); |
| 81 | } |
| 82 | let crc = self.inner.get_ref().crc(); |
| 83 | let calced_crc_bytes = crc.sum().to_le_bytes(); |
| 84 | let arr = [ |
| 85 | calced_crc_bytes[0], |
| 86 | calced_crc_bytes[1], |
| 87 | calced_crc_bytes[2], |
| 88 | calced_crc_bytes[3], |
| 89 | (crc.amount() >> 0) as u8, |
| 90 | (crc.amount() >> 8) as u8, |
| 91 | (crc.amount() >> 16) as u8, |
| 92 | (crc.amount() >> 24) as u8, |
| 93 | ]; |
| 94 | Ok(copy(into, &arr, &mut self.pos)) |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | impl<R> GzEncoder<R> { |
| 99 | /// Acquires a reference to the underlying reader. |
| 100 | pub fn get_ref(&self) -> &R { |
| 101 | self.inner.get_ref().get_ref() |
| 102 | } |
| 103 | |
| 104 | /// Acquires a mutable reference to the underlying reader. |
| 105 | /// |
| 106 | /// Note that mutation of the reader may result in surprising results if |
| 107 | /// this encoder is continued to be used. |
| 108 | pub fn get_mut(&mut self) -> &mut R { |
| 109 | self.inner.get_mut().get_mut() |
| 110 | } |
| 111 | |
| 112 | /// Returns the underlying stream, consuming this encoder |
| 113 | pub fn into_inner(self) -> R { |
| 114 | self.inner.into_inner().into_inner() |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | #[inline ] |
| 119 | fn finish(buf: &[u8; 8]) -> (u32, u32) { |
| 120 | let crc: u32 = ((buf[0] as u32) << 0) |
| 121 | | ((buf[1] as u32) << 8) |
| 122 | | ((buf[2] as u32) << 16) |
| 123 | | ((buf[3] as u32) << 24); |
| 124 | let amt: u32 = ((buf[4] as u32) << 0) |
| 125 | | ((buf[5] as u32) << 8) |
| 126 | | ((buf[6] as u32) << 16) |
| 127 | | ((buf[7] as u32) << 24); |
| 128 | (crc, amt) |
| 129 | } |
| 130 | |
| 131 | impl<R: BufRead> Read for GzEncoder<R> { |
| 132 | fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { |
| 133 | let mut amt: usize = 0; |
| 134 | if self.eof { |
| 135 | return self.read_footer(into); |
| 136 | } else if self.pos < self.header.len() { |
| 137 | amt += copy(into, &self.header, &mut self.pos); |
| 138 | if amt == into.len() { |
| 139 | return Ok(amt); |
| 140 | } |
| 141 | let tmp: &mut [u8] = into; |
| 142 | into = &mut tmp[amt..]; |
| 143 | } |
| 144 | match self.inner.read(buf:into)? { |
| 145 | 0 => { |
| 146 | self.eof = true; |
| 147 | self.pos = 0; |
| 148 | self.read_footer(into) |
| 149 | } |
| 150 | n: usize => Ok(amt + n), |
| 151 | } |
| 152 | } |
| 153 | } |
| 154 | |
| 155 | impl<R: BufRead + Write> Write for GzEncoder<R> { |
| 156 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| 157 | self.get_mut().write(buf) |
| 158 | } |
| 159 | |
| 160 | fn flush(&mut self) -> io::Result<()> { |
| 161 | self.get_mut().flush() |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | /// A decoder for a single member of a [gzip file]. |
| 166 | /// |
| 167 | /// This structure implements a [`Read`] interface. When read from, it reads |
| 168 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
| 169 | /// |
| 170 | /// After reading a single member of the gzip data this reader will return |
| 171 | /// Ok(0) even if there are more bytes available in the underlying reader. |
| 172 | /// If you need the following bytes, call `into_inner()` after Ok(0) to |
| 173 | /// recover the underlying reader. |
| 174 | /// |
| 175 | /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
| 176 | /// or read more |
| 177 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| 178 | /// |
| 179 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| 180 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| 181 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| 182 | /// |
| 183 | /// # Examples |
| 184 | /// |
| 185 | /// ``` |
| 186 | /// use std::io::prelude::*; |
| 187 | /// use std::io; |
| 188 | /// # use flate2::Compression; |
| 189 | /// # use flate2::write::GzEncoder; |
| 190 | /// use flate2::bufread::GzDecoder; |
| 191 | /// |
| 192 | /// # fn main() { |
| 193 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| 194 | /// # e.write_all(b"Hello World" ).unwrap(); |
| 195 | /// # let bytes = e.finish().unwrap(); |
| 196 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
| 197 | /// # } |
| 198 | /// # |
| 199 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
| 200 | /// // Here &[u8] implements BufRead |
| 201 | /// |
| 202 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
| 203 | /// let mut gz = GzDecoder::new(&bytes[..]); |
| 204 | /// let mut s = String::new(); |
| 205 | /// gz.read_to_string(&mut s)?; |
| 206 | /// Ok(s) |
| 207 | /// } |
| 208 | /// ``` |
| 209 | #[derive (Debug)] |
| 210 | pub struct GzDecoder<R> { |
| 211 | state: GzState, |
| 212 | reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, |
| 213 | multi: bool, |
| 214 | } |
| 215 | |
| 216 | #[derive (Debug)] |
| 217 | enum GzState { |
| 218 | Header(GzHeaderParser), |
| 219 | Body(GzHeader), |
| 220 | Finished(GzHeader, usize, [u8; 8]), |
| 221 | Err(io::Error), |
| 222 | End(Option<GzHeader>), |
| 223 | } |
| 224 | |
| 225 | impl<R: BufRead> GzDecoder<R> { |
| 226 | /// Creates a new decoder from the given reader, immediately parsing the |
| 227 | /// gzip header. |
| 228 | pub fn new(mut r: R) -> GzDecoder<R> { |
| 229 | let mut header_parser = GzHeaderParser::new(); |
| 230 | |
| 231 | let state = match header_parser.parse(&mut r) { |
| 232 | Ok(_) => GzState::Body(GzHeader::from(header_parser)), |
| 233 | Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { |
| 234 | GzState::Header(header_parser) |
| 235 | } |
| 236 | Err(err) => GzState::Err(err), |
| 237 | }; |
| 238 | |
| 239 | GzDecoder { |
| 240 | state, |
| 241 | reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), |
| 242 | multi: false, |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | fn multi(mut self, flag: bool) -> GzDecoder<R> { |
| 247 | self.multi = flag; |
| 248 | self |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | impl<R> GzDecoder<R> { |
| 253 | /// Returns the header associated with this stream, if it was valid |
| 254 | pub fn header(&self) -> Option<&GzHeader> { |
| 255 | match &self.state { |
| 256 | GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), |
| 257 | GzState::End(header) => header.as_ref(), |
| 258 | _ => None, |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | /// Acquires a reference to the underlying reader. |
| 263 | pub fn get_ref(&self) -> &R { |
| 264 | self.reader.get_ref().get_ref() |
| 265 | } |
| 266 | |
| 267 | /// Acquires a mutable reference to the underlying stream. |
| 268 | /// |
| 269 | /// Note that mutation of the stream may result in surprising results if |
| 270 | /// this decoder is continued to be used. |
| 271 | pub fn get_mut(&mut self) -> &mut R { |
| 272 | self.reader.get_mut().get_mut() |
| 273 | } |
| 274 | |
| 275 | /// Consumes this decoder, returning the underlying reader. |
| 276 | pub fn into_inner(self) -> R { |
| 277 | self.reader.into_inner().into_inner() |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | impl<R: BufRead> Read for GzDecoder<R> { |
| 282 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 283 | loop { |
| 284 | match &mut self.state { |
| 285 | GzState::Header(parser) => { |
| 286 | parser.parse(self.reader.get_mut().get_mut())?; |
| 287 | self.state = GzState::Body(GzHeader::from(mem::take(parser))); |
| 288 | } |
| 289 | GzState::Body(header) => { |
| 290 | if into.is_empty() { |
| 291 | return Ok(0); |
| 292 | } |
| 293 | match self.reader.read(into)? { |
| 294 | 0 => { |
| 295 | self.state = GzState::Finished(mem::take(header), 0, [0; 8]); |
| 296 | } |
| 297 | n => { |
| 298 | return Ok(n); |
| 299 | } |
| 300 | } |
| 301 | } |
| 302 | GzState::Finished(header, pos, buf) => { |
| 303 | if *pos < buf.len() { |
| 304 | *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; |
| 305 | } else { |
| 306 | let (crc, amt) = finish(&buf); |
| 307 | |
| 308 | if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { |
| 309 | self.state = GzState::End(Some(mem::take(header))); |
| 310 | return Err(corrupt()); |
| 311 | } else if self.multi { |
| 312 | let is_eof = self |
| 313 | .reader |
| 314 | .get_mut() |
| 315 | .get_mut() |
| 316 | .fill_buf() |
| 317 | .map(|buf| buf.is_empty())?; |
| 318 | |
| 319 | if is_eof { |
| 320 | self.state = GzState::End(Some(mem::take(header))); |
| 321 | } else { |
| 322 | self.reader.reset(); |
| 323 | self.reader.get_mut().reset_data(); |
| 324 | self.state = GzState::Header(GzHeaderParser::new()) |
| 325 | } |
| 326 | } else { |
| 327 | self.state = GzState::End(Some(mem::take(header))); |
| 328 | } |
| 329 | } |
| 330 | } |
| 331 | GzState::Err(err) => { |
| 332 | let result = Err(mem::replace(err, io::ErrorKind::Other.into())); |
| 333 | self.state = GzState::End(None); |
| 334 | return result; |
| 335 | } |
| 336 | GzState::End(_) => return Ok(0), |
| 337 | } |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | impl<R: BufRead + Write> Write for GzDecoder<R> { |
| 343 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| 344 | self.get_mut().write(buf) |
| 345 | } |
| 346 | |
| 347 | fn flush(&mut self) -> io::Result<()> { |
| 348 | self.get_mut().flush() |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. |
| 353 | /// |
| 354 | /// This structure implements a [`Read`] interface. When read from, it reads |
| 355 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
| 356 | /// |
| 357 | /// A gzip file consists of a series of *members* concatenated one after another. |
| 358 | /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the |
| 359 | /// underlying reader does. For a file, this reads to the end of the file. |
| 360 | /// |
| 361 | /// To handle members separately, see [GzDecoder] or read more |
| 362 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| 363 | /// |
| 364 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| 365 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| 366 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| 367 | /// |
| 368 | /// # Examples |
| 369 | /// |
| 370 | /// ``` |
| 371 | /// use std::io::prelude::*; |
| 372 | /// use std::io; |
| 373 | /// # use flate2::Compression; |
| 374 | /// # use flate2::write::GzEncoder; |
| 375 | /// use flate2::bufread::MultiGzDecoder; |
| 376 | /// |
| 377 | /// # fn main() { |
| 378 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| 379 | /// # e.write_all(b"Hello World" ).unwrap(); |
| 380 | /// # let bytes = e.finish().unwrap(); |
| 381 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
| 382 | /// # } |
| 383 | /// # |
| 384 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
| 385 | /// // Here &[u8] implements BufRead |
| 386 | /// |
| 387 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
| 388 | /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
| 389 | /// let mut s = String::new(); |
| 390 | /// gz.read_to_string(&mut s)?; |
| 391 | /// Ok(s) |
| 392 | /// } |
| 393 | /// ``` |
| 394 | #[derive (Debug)] |
| 395 | pub struct MultiGzDecoder<R>(GzDecoder<R>); |
| 396 | |
| 397 | impl<R: BufRead> MultiGzDecoder<R> { |
| 398 | /// Creates a new decoder from the given reader, immediately parsing the |
| 399 | /// (first) gzip header. If the gzip stream contains multiple members all will |
| 400 | /// be decoded. |
| 401 | pub fn new(r: R) -> MultiGzDecoder<R> { |
| 402 | MultiGzDecoder(GzDecoder::new(r).multi(flag:true)) |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | impl<R> MultiGzDecoder<R> { |
| 407 | /// Returns the current header associated with this stream, if it's valid |
| 408 | pub fn header(&self) -> Option<&GzHeader> { |
| 409 | self.0.header() |
| 410 | } |
| 411 | |
| 412 | /// Acquires a reference to the underlying reader. |
| 413 | pub fn get_ref(&self) -> &R { |
| 414 | self.0.get_ref() |
| 415 | } |
| 416 | |
| 417 | /// Acquires a mutable reference to the underlying stream. |
| 418 | /// |
| 419 | /// Note that mutation of the stream may result in surprising results if |
| 420 | /// this decoder is continued to be used. |
| 421 | pub fn get_mut(&mut self) -> &mut R { |
| 422 | self.0.get_mut() |
| 423 | } |
| 424 | |
| 425 | /// Consumes this decoder, returning the underlying reader. |
| 426 | pub fn into_inner(self) -> R { |
| 427 | self.0.into_inner() |
| 428 | } |
| 429 | } |
| 430 | |
| 431 | impl<R: BufRead> Read for MultiGzDecoder<R> { |
| 432 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 433 | self.0.read(buf:into) |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | #[cfg (test)] |
| 438 | mod test { |
| 439 | use crate::bufread::GzDecoder; |
| 440 | use crate::gz::write; |
| 441 | use crate::Compression; |
| 442 | use std::io::{Read, Write}; |
| 443 | |
| 444 | // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any |
| 445 | // additional data to be consumed by the caller. |
| 446 | #[test ] |
| 447 | fn decode_extra_data() { |
| 448 | let expected = "Hello World" ; |
| 449 | |
| 450 | let compressed = { |
| 451 | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
| 452 | e.write(expected.as_ref()).unwrap(); |
| 453 | let mut b = e.finish().unwrap(); |
| 454 | b.push(b'x' ); |
| 455 | b |
| 456 | }; |
| 457 | |
| 458 | let mut output = Vec::new(); |
| 459 | let mut decoder = GzDecoder::new(compressed.as_slice()); |
| 460 | let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); |
| 461 | assert_eq!(decoded_bytes, output.len()); |
| 462 | let actual = std::str::from_utf8(&output).expect("String parsing error" ); |
| 463 | assert_eq!( |
| 464 | actual, expected, |
| 465 | "after decompression we obtain the original input" |
| 466 | ); |
| 467 | |
| 468 | output.clear(); |
| 469 | assert_eq!( |
| 470 | decoder.read(&mut output).unwrap(), |
| 471 | 0, |
| 472 | "subsequent read of decoder returns 0, but inner reader can return additional data" |
| 473 | ); |
| 474 | let mut reader = decoder.into_inner(); |
| 475 | assert_eq!( |
| 476 | reader.read_to_end(&mut output).unwrap(), |
| 477 | 1, |
| 478 | "extra data is accessible in underlying buf-read" |
| 479 | ); |
| 480 | assert_eq!(output, b"x" ); |
| 481 | } |
| 482 | } |
| 483 | |