| 1 | use std::cmp; |
| 2 | use std::io; |
| 3 | use std::io::prelude::*; |
| 4 | use std::mem; |
| 5 | |
| 6 | use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; |
| 7 | use crate::crc::CrcReader; |
| 8 | use crate::deflate; |
| 9 | use crate::Compression; |
| 10 | |
| 11 | fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { |
| 12 | let min: usize = cmp::min(v1:into.len(), v2:from.len() - *pos); |
| 13 | for (slot: &mut u8, val: &u8) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { |
| 14 | *slot = *val; |
| 15 | } |
| 16 | *pos += min; |
| 17 | min |
| 18 | } |
| 19 | |
| 20 | /// A gzip streaming encoder |
| 21 | /// |
| 22 | /// This structure implements a [`Read`] interface. When read from, it reads |
| 23 | /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. |
| 24 | /// |
| 25 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| 26 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| 27 | /// |
| 28 | /// # Examples |
| 29 | /// |
| 30 | /// ``` |
| 31 | /// use std::io::prelude::*; |
| 32 | /// use std::io; |
| 33 | /// use flate2::Compression; |
| 34 | /// use flate2::bufread::GzEncoder; |
| 35 | /// use std::fs::File; |
| 36 | /// use std::io::BufReader; |
| 37 | /// |
| 38 | /// // Opens sample file, compresses the contents and returns a Vector or error |
| 39 | /// // File wrapped in a BufReader implements BufRead |
| 40 | /// |
| 41 | /// fn open_hello_world() -> io::Result<Vec<u8>> { |
| 42 | /// let f = File::open("examples/hello_world.txt" )?; |
| 43 | /// let b = BufReader::new(f); |
| 44 | /// let mut gz = GzEncoder::new(b, Compression::fast()); |
| 45 | /// let mut buffer = Vec::new(); |
| 46 | /// gz.read_to_end(&mut buffer)?; |
| 47 | /// Ok(buffer) |
| 48 | /// } |
| 49 | /// ``` |
| 50 | #[derive (Debug)] |
| 51 | pub struct GzEncoder<R> { |
| 52 | inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, |
| 53 | header: Vec<u8>, |
| 54 | pos: usize, |
| 55 | eof: bool, |
| 56 | } |
| 57 | |
| 58 | pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
| 59 | let crc: CrcReader = CrcReader::new(r); |
| 60 | GzEncoder { |
| 61 | inner: deflate::bufread::DeflateEncoder::new(r:crc, level:lvl), |
| 62 | header, |
| 63 | pos: 0, |
| 64 | eof: false, |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | impl<R: BufRead> GzEncoder<R> { |
| 69 | /// Creates a new encoder which will use the given compression level. |
| 70 | /// |
| 71 | /// The encoder is not configured specially for the emitted header. For |
| 72 | /// header configuration, see the `GzBuilder` type. |
| 73 | /// |
| 74 | /// The data read from the stream `r` will be compressed and available |
| 75 | /// through the returned reader. |
| 76 | pub fn new(r: R, level: Compression) -> GzEncoder<R> { |
| 77 | GzBuilder::new().buf_read(r, level) |
| 78 | } |
| 79 | |
| 80 | fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 81 | if self.pos == 8 { |
| 82 | return Ok(0); |
| 83 | } |
| 84 | let crc = self.inner.get_ref().crc(); |
| 85 | let ref arr = [ |
| 86 | (crc.sum() >> 0) as u8, |
| 87 | (crc.sum() >> 8) as u8, |
| 88 | (crc.sum() >> 16) as u8, |
| 89 | (crc.sum() >> 24) as u8, |
| 90 | (crc.amount() >> 0) as u8, |
| 91 | (crc.amount() >> 8) as u8, |
| 92 | (crc.amount() >> 16) as u8, |
| 93 | (crc.amount() >> 24) as u8, |
| 94 | ]; |
| 95 | Ok(copy(into, arr, &mut self.pos)) |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | impl<R> GzEncoder<R> { |
| 100 | /// Acquires a reference to the underlying reader. |
| 101 | pub fn get_ref(&self) -> &R { |
| 102 | self.inner.get_ref().get_ref() |
| 103 | } |
| 104 | |
| 105 | /// Acquires a mutable reference to the underlying reader. |
| 106 | /// |
| 107 | /// Note that mutation of the reader may result in surprising results if |
| 108 | /// this encoder is continued to be used. |
| 109 | pub fn get_mut(&mut self) -> &mut R { |
| 110 | self.inner.get_mut().get_mut() |
| 111 | } |
| 112 | |
| 113 | /// Returns the underlying stream, consuming this encoder |
| 114 | pub fn into_inner(self) -> R { |
| 115 | self.inner.into_inner().into_inner() |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | #[inline ] |
| 120 | fn finish(buf: &[u8; 8]) -> (u32, u32) { |
| 121 | let crc: u32 = ((buf[0] as u32) << 0) |
| 122 | | ((buf[1] as u32) << 8) |
| 123 | | ((buf[2] as u32) << 16) |
| 124 | | ((buf[3] as u32) << 24); |
| 125 | let amt: u32 = ((buf[4] as u32) << 0) |
| 126 | | ((buf[5] as u32) << 8) |
| 127 | | ((buf[6] as u32) << 16) |
| 128 | | ((buf[7] as u32) << 24); |
| 129 | (crc, amt) |
| 130 | } |
| 131 | |
| 132 | impl<R: BufRead> Read for GzEncoder<R> { |
| 133 | fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { |
| 134 | let mut amt: usize = 0; |
| 135 | if self.eof { |
| 136 | return self.read_footer(into); |
| 137 | } else if self.pos < self.header.len() { |
| 138 | amt += copy(into, &self.header, &mut self.pos); |
| 139 | if amt == into.len() { |
| 140 | return Ok(amt); |
| 141 | } |
| 142 | let tmp: &mut [u8] = into; |
| 143 | into = &mut tmp[amt..]; |
| 144 | } |
| 145 | match self.inner.read(buf:into)? { |
| 146 | 0 => { |
| 147 | self.eof = true; |
| 148 | self.pos = 0; |
| 149 | self.read_footer(into) |
| 150 | } |
| 151 | n: usize => Ok(amt + n), |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | impl<R: BufRead + Write> Write for GzEncoder<R> { |
| 157 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| 158 | self.get_mut().write(buf) |
| 159 | } |
| 160 | |
| 161 | fn flush(&mut self) -> io::Result<()> { |
| 162 | self.get_mut().flush() |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /// A decoder for a single member of a [gzip file]. |
| 167 | /// |
| 168 | /// This structure implements a [`Read`] interface. When read from, it reads |
| 169 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
| 170 | /// |
| 171 | /// After reading a single member of the gzip data this reader will return |
| 172 | /// Ok(0) even if there are more bytes available in the underlying reader. |
| 173 | /// If you need the following bytes, call `into_inner()` after Ok(0) to |
| 174 | /// recover the underlying reader. |
| 175 | /// |
| 176 | /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
| 177 | /// or read more |
| 178 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| 179 | /// |
| 180 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| 181 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| 182 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| 183 | /// |
| 184 | /// # Examples |
| 185 | /// |
| 186 | /// ``` |
| 187 | /// use std::io::prelude::*; |
| 188 | /// use std::io; |
| 189 | /// # use flate2::Compression; |
| 190 | /// # use flate2::write::GzEncoder; |
| 191 | /// use flate2::bufread::GzDecoder; |
| 192 | /// |
| 193 | /// # fn main() { |
| 194 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| 195 | /// # e.write_all(b"Hello World" ).unwrap(); |
| 196 | /// # let bytes = e.finish().unwrap(); |
| 197 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
| 198 | /// # } |
| 199 | /// # |
| 200 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
| 201 | /// // Here &[u8] implements BufRead |
| 202 | /// |
| 203 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
| 204 | /// let mut gz = GzDecoder::new(&bytes[..]); |
| 205 | /// let mut s = String::new(); |
| 206 | /// gz.read_to_string(&mut s)?; |
| 207 | /// Ok(s) |
| 208 | /// } |
| 209 | /// ``` |
| 210 | #[derive (Debug)] |
| 211 | pub struct GzDecoder<R> { |
| 212 | state: GzState, |
| 213 | reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, |
| 214 | multi: bool, |
| 215 | } |
| 216 | |
| 217 | #[derive (Debug)] |
| 218 | enum GzState { |
| 219 | Header(GzHeaderParser), |
| 220 | Body(GzHeader), |
| 221 | Finished(GzHeader, usize, [u8; 8]), |
| 222 | Err(io::Error), |
| 223 | End(Option<GzHeader>), |
| 224 | } |
| 225 | |
| 226 | impl<R: BufRead> GzDecoder<R> { |
| 227 | /// Creates a new decoder from the given reader, immediately parsing the |
| 228 | /// gzip header. |
| 229 | pub fn new(mut r: R) -> GzDecoder<R> { |
| 230 | let mut header_parser = GzHeaderParser::new(); |
| 231 | |
| 232 | let state = match header_parser.parse(&mut r) { |
| 233 | Ok(_) => GzState::Body(GzHeader::from(header_parser)), |
| 234 | Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { |
| 235 | GzState::Header(header_parser) |
| 236 | } |
| 237 | Err(err) => GzState::Err(err), |
| 238 | }; |
| 239 | |
| 240 | GzDecoder { |
| 241 | state, |
| 242 | reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), |
| 243 | multi: false, |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | fn multi(mut self, flag: bool) -> GzDecoder<R> { |
| 248 | self.multi = flag; |
| 249 | self |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | impl<R> GzDecoder<R> { |
| 254 | /// Returns the header associated with this stream, if it was valid |
| 255 | pub fn header(&self) -> Option<&GzHeader> { |
| 256 | match &self.state { |
| 257 | GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), |
| 258 | GzState::End(header) => header.as_ref(), |
| 259 | _ => None, |
| 260 | } |
| 261 | } |
| 262 | |
| 263 | /// Acquires a reference to the underlying reader. |
| 264 | pub fn get_ref(&self) -> &R { |
| 265 | self.reader.get_ref().get_ref() |
| 266 | } |
| 267 | |
| 268 | /// Acquires a mutable reference to the underlying stream. |
| 269 | /// |
| 270 | /// Note that mutation of the stream may result in surprising results if |
| 271 | /// this decoder is continued to be used. |
| 272 | pub fn get_mut(&mut self) -> &mut R { |
| 273 | self.reader.get_mut().get_mut() |
| 274 | } |
| 275 | |
| 276 | /// Consumes this decoder, returning the underlying reader. |
| 277 | pub fn into_inner(self) -> R { |
| 278 | self.reader.into_inner().into_inner() |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | impl<R: BufRead> Read for GzDecoder<R> { |
| 283 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 284 | loop { |
| 285 | match &mut self.state { |
| 286 | GzState::Header(parser) => { |
| 287 | parser.parse(self.reader.get_mut().get_mut())?; |
| 288 | self.state = GzState::Body(GzHeader::from(mem::take(parser))); |
| 289 | } |
| 290 | GzState::Body(header) => { |
| 291 | if into.is_empty() { |
| 292 | return Ok(0); |
| 293 | } |
| 294 | match self.reader.read(into)? { |
| 295 | 0 => { |
| 296 | self.state = GzState::Finished(mem::take(header), 0, [0; 8]); |
| 297 | } |
| 298 | n => { |
| 299 | return Ok(n); |
| 300 | } |
| 301 | } |
| 302 | } |
| 303 | GzState::Finished(header, pos, buf) => { |
| 304 | if *pos < buf.len() { |
| 305 | *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; |
| 306 | } else { |
| 307 | let (crc, amt) = finish(&buf); |
| 308 | |
| 309 | if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { |
| 310 | self.state = GzState::End(Some(mem::take(header))); |
| 311 | return Err(corrupt()); |
| 312 | } else if self.multi { |
| 313 | let is_eof = self |
| 314 | .reader |
| 315 | .get_mut() |
| 316 | .get_mut() |
| 317 | .fill_buf() |
| 318 | .map(|buf| buf.is_empty())?; |
| 319 | |
| 320 | if is_eof { |
| 321 | self.state = GzState::End(Some(mem::take(header))); |
| 322 | } else { |
| 323 | self.reader.reset(); |
| 324 | self.reader.get_mut().reset_data(); |
| 325 | self.state = GzState::Header(GzHeaderParser::new()) |
| 326 | } |
| 327 | } else { |
| 328 | self.state = GzState::End(Some(mem::take(header))); |
| 329 | } |
| 330 | } |
| 331 | } |
| 332 | GzState::Err(err) => { |
| 333 | let result = Err(mem::replace(err, io::ErrorKind::Other.into())); |
| 334 | self.state = GzState::End(None); |
| 335 | return result; |
| 336 | } |
| 337 | GzState::End(_) => return Ok(0), |
| 338 | } |
| 339 | } |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | impl<R: BufRead + Write> Write for GzDecoder<R> { |
| 344 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| 345 | self.get_mut().write(buf) |
| 346 | } |
| 347 | |
| 348 | fn flush(&mut self) -> io::Result<()> { |
| 349 | self.get_mut().flush() |
| 350 | } |
| 351 | } |
| 352 | |
| 353 | /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. |
| 354 | /// |
| 355 | /// This structure implements a [`Read`] interface. When read from, it reads |
| 356 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
| 357 | /// |
| 358 | /// A gzip file consists of a series of *members* concatenated one after another. |
| 359 | /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the |
| 360 | /// underlying reader does. For a file, this reads to the end of the file. |
| 361 | /// |
| 362 | /// To handle members separately, see [GzDecoder] or read more |
| 363 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| 364 | /// |
| 365 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| 366 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
| 367 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
| 368 | /// |
| 369 | /// # Examples |
| 370 | /// |
| 371 | /// ``` |
| 372 | /// use std::io::prelude::*; |
| 373 | /// use std::io; |
| 374 | /// # use flate2::Compression; |
| 375 | /// # use flate2::write::GzEncoder; |
| 376 | /// use flate2::bufread::MultiGzDecoder; |
| 377 | /// |
| 378 | /// # fn main() { |
| 379 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| 380 | /// # e.write_all(b"Hello World" ).unwrap(); |
| 381 | /// # let bytes = e.finish().unwrap(); |
| 382 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
| 383 | /// # } |
| 384 | /// # |
| 385 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
| 386 | /// // Here &[u8] implements BufRead |
| 387 | /// |
| 388 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
| 389 | /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
| 390 | /// let mut s = String::new(); |
| 391 | /// gz.read_to_string(&mut s)?; |
| 392 | /// Ok(s) |
| 393 | /// } |
| 394 | /// ``` |
| 395 | #[derive (Debug)] |
| 396 | pub struct MultiGzDecoder<R>(GzDecoder<R>); |
| 397 | |
| 398 | impl<R: BufRead> MultiGzDecoder<R> { |
| 399 | /// Creates a new decoder from the given reader, immediately parsing the |
| 400 | /// (first) gzip header. If the gzip stream contains multiple members all will |
| 401 | /// be decoded. |
| 402 | pub fn new(r: R) -> MultiGzDecoder<R> { |
| 403 | MultiGzDecoder(GzDecoder::new(r).multi(flag:true)) |
| 404 | } |
| 405 | } |
| 406 | |
| 407 | impl<R> MultiGzDecoder<R> { |
| 408 | /// Returns the current header associated with this stream, if it's valid |
| 409 | pub fn header(&self) -> Option<&GzHeader> { |
| 410 | self.0.header() |
| 411 | } |
| 412 | |
| 413 | /// Acquires a reference to the underlying reader. |
| 414 | pub fn get_ref(&self) -> &R { |
| 415 | self.0.get_ref() |
| 416 | } |
| 417 | |
| 418 | /// Acquires a mutable reference to the underlying stream. |
| 419 | /// |
| 420 | /// Note that mutation of the stream may result in surprising results if |
| 421 | /// this decoder is continued to be used. |
| 422 | pub fn get_mut(&mut self) -> &mut R { |
| 423 | self.0.get_mut() |
| 424 | } |
| 425 | |
| 426 | /// Consumes this decoder, returning the underlying reader. |
| 427 | pub fn into_inner(self) -> R { |
| 428 | self.0.into_inner() |
| 429 | } |
| 430 | } |
| 431 | |
| 432 | impl<R: BufRead> Read for MultiGzDecoder<R> { |
| 433 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
| 434 | self.0.read(buf:into) |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | #[cfg (test)] |
| 439 | mod test { |
| 440 | use crate::bufread::GzDecoder; |
| 441 | use crate::gz::write; |
| 442 | use crate::Compression; |
| 443 | use std::io::{Read, Write}; |
| 444 | |
| 445 | // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any |
| 446 | // additional data to be consumed by the caller. |
| 447 | #[test ] |
| 448 | fn decode_extra_data() { |
| 449 | let expected = "Hello World" ; |
| 450 | |
| 451 | let compressed = { |
| 452 | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
| 453 | e.write(expected.as_ref()).unwrap(); |
| 454 | let mut b = e.finish().unwrap(); |
| 455 | b.push(b'x' ); |
| 456 | b |
| 457 | }; |
| 458 | |
| 459 | let mut output = Vec::new(); |
| 460 | let mut decoder = GzDecoder::new(compressed.as_slice()); |
| 461 | let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); |
| 462 | assert_eq!(decoded_bytes, output.len()); |
| 463 | let actual = std::str::from_utf8(&output).expect("String parsing error" ); |
| 464 | assert_eq!( |
| 465 | actual, expected, |
| 466 | "after decompression we obtain the original input" |
| 467 | ); |
| 468 | |
| 469 | output.clear(); |
| 470 | assert_eq!( |
| 471 | decoder.read(&mut output).unwrap(), |
| 472 | 0, |
| 473 | "subsequent read of decoder returns 0, but inner reader can return additional data" |
| 474 | ); |
| 475 | let mut reader = decoder.into_inner(); |
| 476 | assert_eq!( |
| 477 | reader.read_to_end(&mut output).unwrap(), |
| 478 | 1, |
| 479 | "extra data is accessible in underlying buf-read" |
| 480 | ); |
| 481 | assert_eq!(output, b"x" ); |
| 482 | } |
| 483 | } |
| 484 | |