1use std::cmp;
2use std::io;
3use std::io::prelude::*;
4
5use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
6use crate::crc::{Crc, CrcWriter};
7use crate::zio;
8use crate::{Compress, Compression, Decompress, Status};
9
10/// A gzip streaming encoder
11///
12/// This structure exposes a [`Write`] interface that will emit compressed data
13/// to the underlying writer `W`.
14///
15/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
16///
17/// # Examples
18///
19/// ```
20/// use std::io::prelude::*;
21/// use flate2::Compression;
22/// use flate2::write::GzEncoder;
23///
24/// // Vec<u8> implements Write to print the compressed bytes of sample string
25/// # fn main() {
26///
27/// let mut e = GzEncoder::new(Vec::new(), Compression::default());
28/// e.write_all(b"Hello World").unwrap();
29/// println!("{:?}", e.finish().unwrap());
30/// # }
31/// ```
32#[derive(Debug)]
33pub struct GzEncoder<W: Write> {
34 inner: zio::Writer<W, Compress>,
35 crc: Crc,
36 crc_bytes_written: usize,
37 header: Vec<u8>,
38}
39
40pub fn gz_encoder<W: Write>(header: Vec<u8>, w: W, lvl: Compression) -> GzEncoder<W> {
41 GzEncoder {
42 inner: zio::Writer::new(w, d:Compress::new(level:lvl, zlib_header:false)),
43 crc: Crc::new(),
44 header,
45 crc_bytes_written: 0,
46 }
47}
48
49impl<W: Write> GzEncoder<W> {
50 /// Creates a new encoder which will use the given compression level.
51 ///
52 /// The encoder is not configured specially for the emitted header. For
53 /// header configuration, see the `GzBuilder` type.
54 ///
55 /// The data written to the returned encoder will be compressed and then
56 /// written to the stream `w`.
57 pub fn new(w: W, level: Compression) -> GzEncoder<W> {
58 GzBuilder::new().write(w, level)
59 }
60
61 /// Acquires a reference to the underlying writer.
62 pub fn get_ref(&self) -> &W {
63 self.inner.get_ref()
64 }
65
66 /// Acquires a mutable reference to the underlying writer.
67 ///
68 /// Note that mutation of the writer may result in surprising results if
69 /// this encoder is continued to be used.
70 pub fn get_mut(&mut self) -> &mut W {
71 self.inner.get_mut()
72 }
73
74 /// Attempt to finish this output stream, writing out final chunks of data.
75 ///
76 /// Note that this function can only be used once data has finished being
77 /// written to the output stream. After this function is called then further
78 /// calls to `write` may result in a panic.
79 ///
80 /// # Panics
81 ///
82 /// Attempts to write data to this stream may result in a panic after this
83 /// function is called.
84 ///
85 /// # Errors
86 ///
87 /// This function will perform I/O to complete this stream, and any I/O
88 /// errors which occur will be returned from this function.
89 pub fn try_finish(&mut self) -> io::Result<()> {
90 self.write_header()?;
91 self.inner.finish()?;
92
93 while self.crc_bytes_written < 8 {
94 let (sum, amt) = (self.crc.sum(), self.crc.amount());
95 let buf = [
96 (sum >> 0) as u8,
97 (sum >> 8) as u8,
98 (sum >> 16) as u8,
99 (sum >> 24) as u8,
100 (amt >> 0) as u8,
101 (amt >> 8) as u8,
102 (amt >> 16) as u8,
103 (amt >> 24) as u8,
104 ];
105 let inner = self.inner.get_mut();
106 let n = inner.write(&buf[self.crc_bytes_written..])?;
107 self.crc_bytes_written += n;
108 }
109 Ok(())
110 }
111
112 /// Finish encoding this stream, returning the underlying writer once the
113 /// encoding is done.
114 ///
115 /// Note that this function may not be suitable to call in a situation where
116 /// the underlying stream is an asynchronous I/O stream. To finish a stream
117 /// the `try_finish` (or `shutdown`) method should be used instead. To
118 /// re-acquire ownership of a stream it is safe to call this method after
119 /// `try_finish` or `shutdown` has returned `Ok`.
120 ///
121 /// # Errors
122 ///
123 /// This function will perform I/O to complete this stream, and any I/O
124 /// errors which occur will be returned from this function.
125 pub fn finish(mut self) -> io::Result<W> {
126 self.try_finish()?;
127 Ok(self.inner.take_inner())
128 }
129
130 fn write_header(&mut self) -> io::Result<()> {
131 while !self.header.is_empty() {
132 let n = self.inner.get_mut().write(&self.header)?;
133 self.header.drain(..n);
134 }
135 Ok(())
136 }
137}
138
139impl<W: Write> Write for GzEncoder<W> {
140 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
141 assert_eq!(self.crc_bytes_written, 0);
142 self.write_header()?;
143 let n: usize = self.inner.write(buf)?;
144 self.crc.update(&buf[..n]);
145 Ok(n)
146 }
147
148 fn flush(&mut self) -> io::Result<()> {
149 assert_eq!(self.crc_bytes_written, 0);
150 self.write_header()?;
151 self.inner.flush()
152 }
153}
154
155impl<R: Read + Write> Read for GzEncoder<R> {
156 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
157 self.get_mut().read(buf)
158 }
159}
160
161impl<W: Write> Drop for GzEncoder<W> {
162 fn drop(&mut self) {
163 if self.inner.is_present() {
164 let _ = self.try_finish();
165 }
166 }
167}
168
169/// A decoder for a single member of a [gzip file].
170///
171/// This structure exposes a [`Write`] interface, receiving compressed data and
172/// writing uncompressed data to the underlying writer.
173///
174/// After decoding a single member of the gzip data this writer will return the number of bytes up to
175/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
176/// handle any data following the gzip member.
177///
178/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
179/// or read more
180/// [in the introduction](../index.html#about-multi-member-gzip-files).
181///
182/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
183/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
184///
185/// # Examples
186///
187/// ```
188/// use std::io::prelude::*;
189/// use std::io;
190/// use flate2::Compression;
191/// use flate2::write::{GzEncoder, GzDecoder};
192///
193/// # fn main() {
194/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
195/// # e.write(b"Hello World").unwrap();
196/// # let bytes = e.finish().unwrap();
197/// # assert_eq!("Hello World", decode_writer(bytes).unwrap());
198/// # }
199/// // Uncompresses a gzip encoded vector of bytes and returns a string or error
200/// // Here Vec<u8> implements Write
201/// fn decode_writer(bytes: Vec<u8>) -> io::Result<String> {
202/// let mut writer = Vec::new();
203/// let mut decoder = GzDecoder::new(writer);
204/// decoder.write_all(&bytes[..])?;
205/// writer = decoder.finish()?;
206/// let return_string = String::from_utf8(writer).expect("String parsing error");
207/// Ok(return_string)
208/// }
209/// ```
210#[derive(Debug)]
211pub struct GzDecoder<W: Write> {
212 inner: zio::Writer<CrcWriter<W>, Decompress>,
213 crc_bytes: Vec<u8>,
214 header_parser: GzHeaderParser,
215}
216
217const CRC_BYTES_LEN: usize = 8;
218
219impl<W: Write> GzDecoder<W> {
220 /// Creates a new decoder which will write uncompressed data to the stream.
221 ///
222 /// When this encoder is dropped or unwrapped the final pieces of data will
223 /// be flushed.
224 pub fn new(w: W) -> GzDecoder<W> {
225 GzDecoder {
226 inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
227 crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
228 header_parser: GzHeaderParser::new(),
229 }
230 }
231
232 /// Returns the header associated with this stream.
233 pub fn header(&self) -> Option<&GzHeader> {
234 self.header_parser.header()
235 }
236
237 /// Acquires a reference to the underlying writer.
238 pub fn get_ref(&self) -> &W {
239 self.inner.get_ref().get_ref()
240 }
241
242 /// Acquires a mutable reference to the underlying writer.
243 ///
244 /// Note that mutating the output/input state of the stream may corrupt this
245 /// object, so care must be taken when using this method.
246 pub fn get_mut(&mut self) -> &mut W {
247 self.inner.get_mut().get_mut()
248 }
249
250 /// Attempt to finish this output stream, writing out final chunks of data.
251 ///
252 /// Note that this function can only be used once data has finished being
253 /// written to the output stream. After this function is called then further
254 /// calls to `write` may result in a panic.
255 ///
256 /// # Panics
257 ///
258 /// Attempts to write data to this stream may result in a panic after this
259 /// function is called.
260 ///
261 /// # Errors
262 ///
263 /// This function will perform I/O to finish the stream, returning any
264 /// errors which happen.
265 pub fn try_finish(&mut self) -> io::Result<()> {
266 self.finish_and_check_crc()?;
267 Ok(())
268 }
269
270 /// Consumes this decoder, flushing the output stream.
271 ///
272 /// This will flush the underlying data stream and then return the contained
273 /// writer if the flush succeeded.
274 ///
275 /// Note that this function may not be suitable to call in a situation where
276 /// the underlying stream is an asynchronous I/O stream. To finish a stream
277 /// the `try_finish` (or `shutdown`) method should be used instead. To
278 /// re-acquire ownership of a stream it is safe to call this method after
279 /// `try_finish` or `shutdown` has returned `Ok`.
280 ///
281 /// # Errors
282 ///
283 /// This function will perform I/O to complete this stream, and any I/O
284 /// errors which occur will be returned from this function.
285 pub fn finish(mut self) -> io::Result<W> {
286 self.finish_and_check_crc()?;
287 Ok(self.inner.take_inner().into_inner())
288 }
289
290 fn finish_and_check_crc(&mut self) -> io::Result<()> {
291 self.inner.finish()?;
292
293 if self.crc_bytes.len() != 8 {
294 return Err(corrupt());
295 }
296
297 let crc = ((self.crc_bytes[0] as u32) << 0)
298 | ((self.crc_bytes[1] as u32) << 8)
299 | ((self.crc_bytes[2] as u32) << 16)
300 | ((self.crc_bytes[3] as u32) << 24);
301 let amt = ((self.crc_bytes[4] as u32) << 0)
302 | ((self.crc_bytes[5] as u32) << 8)
303 | ((self.crc_bytes[6] as u32) << 16)
304 | ((self.crc_bytes[7] as u32) << 24);
305 if crc != self.inner.get_ref().crc().sum() {
306 return Err(corrupt());
307 }
308 if amt != self.inner.get_ref().crc().amount() {
309 return Err(corrupt());
310 }
311 Ok(())
312 }
313}
314
315impl<W: Write> Write for GzDecoder<W> {
316 fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
317 let buflen = buf.len();
318 if self.header().is_none() {
319 match self.header_parser.parse(&mut buf) {
320 Err(err) => {
321 if err.kind() == io::ErrorKind::UnexpectedEof {
322 // all data read but header still not complete
323 Ok(buflen)
324 } else {
325 Err(err)
326 }
327 }
328 Ok(_) => {
329 debug_assert!(self.header().is_some());
330 // buf now contains the unread part of the original buf
331 let n = buflen - buf.len();
332 Ok(n)
333 }
334 }
335 } else {
336 let (n, status) = self.inner.write_with_status(buf)?;
337
338 if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < 8 {
339 let remaining = buf.len() - n;
340 let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len());
341 self.crc_bytes.extend(&buf[n..n + crc_bytes]);
342 return Ok(n + crc_bytes);
343 }
344 Ok(n)
345 }
346 }
347
348 fn flush(&mut self) -> io::Result<()> {
349 self.inner.flush()
350 }
351}
352
353impl<W: Read + Write> Read for GzDecoder<W> {
354 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
355 self.inner.get_mut().get_mut().read(buf)
356 }
357}
358
359/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
360///
361/// This structure exposes a [`Write`] interface that will consume compressed data and
362/// write uncompressed data to the underlying writer.
363///
364/// A gzip file consists of a series of *members* concatenated one after another.
365/// `MultiGzDecoder` decodes all members of a file and writes them to the
366/// underlying writer one after another.
367///
368/// To handle members separately, see [GzDecoder] or read more
369/// [in the introduction](../index.html#about-multi-member-gzip-files).
370///
371/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
372#[derive(Debug)]
373pub struct MultiGzDecoder<W: Write> {
374 inner: GzDecoder<W>,
375}
376
377impl<W: Write> MultiGzDecoder<W> {
378 /// Creates a new decoder which will write uncompressed data to the stream.
379 /// If the gzip stream contains multiple members all will be decoded.
380 pub fn new(w: W) -> MultiGzDecoder<W> {
381 MultiGzDecoder {
382 inner: GzDecoder::new(w),
383 }
384 }
385
386 /// Returns the header associated with the current member.
387 pub fn header(&self) -> Option<&GzHeader> {
388 self.inner.header()
389 }
390
391 /// Acquires a reference to the underlying writer.
392 pub fn get_ref(&self) -> &W {
393 self.inner.get_ref()
394 }
395
396 /// Acquires a mutable reference to the underlying writer.
397 ///
398 /// Note that mutating the output/input state of the stream may corrupt this
399 /// object, so care must be taken when using this method.
400 pub fn get_mut(&mut self) -> &mut W {
401 self.inner.get_mut()
402 }
403
404 /// Attempt to finish this output stream, writing out final chunks of data.
405 ///
406 /// Note that this function can only be used once data has finished being
407 /// written to the output stream. After this function is called then further
408 /// calls to `write` may result in a panic.
409 ///
410 /// # Panics
411 ///
412 /// Attempts to write data to this stream may result in a panic after this
413 /// function is called.
414 ///
415 /// # Errors
416 ///
417 /// This function will perform I/O to finish the stream, returning any
418 /// errors which happen.
419 pub fn try_finish(&mut self) -> io::Result<()> {
420 self.inner.try_finish()
421 }
422
423 /// Consumes this decoder, flushing the output stream.
424 ///
425 /// This will flush the underlying data stream and then return the contained
426 /// writer if the flush succeeded.
427 ///
428 /// Note that this function may not be suitable to call in a situation where
429 /// the underlying stream is an asynchronous I/O stream. To finish a stream
430 /// the `try_finish` (or `shutdown`) method should be used instead. To
431 /// re-acquire ownership of a stream it is safe to call this method after
432 /// `try_finish` or `shutdown` has returned `Ok`.
433 ///
434 /// # Errors
435 ///
436 /// This function will perform I/O to complete this stream, and any I/O
437 /// errors which occur will be returned from this function.
438 pub fn finish(self) -> io::Result<W> {
439 self.inner.finish()
440 }
441}
442
443impl<W: Write> Write for MultiGzDecoder<W> {
444 fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
445 if buf.is_empty() {
446 Ok(0)
447 } else {
448 match self.inner.write(buf) {
449 Ok(0) => {
450 // When the GzDecoder indicates that it has finished
451 // create a new GzDecoder to handle additional data.
452 self.inner.try_finish()?;
453 let w: W = self.inner.inner.take_inner().into_inner();
454 self.inner = GzDecoder::new(w);
455 self.inner.write(buf)
456 }
457 res: Result => res,
458 }
459 }
460 }
461
462 fn flush(&mut self) -> io::Result<()> {
463 self.inner.flush()
464 }
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 const STR: &str = "Hello World Hello World Hello World Hello World Hello World \
472 Hello World Hello World Hello World Hello World Hello World \
473 Hello World Hello World Hello World Hello World Hello World \
474 Hello World Hello World Hello World Hello World Hello World \
475 Hello World Hello World Hello World Hello World Hello World";
476
477 #[test]
478 fn decode_writer_one_chunk() {
479 let mut e = GzEncoder::new(Vec::new(), Compression::default());
480 e.write(STR.as_ref()).unwrap();
481 let bytes = e.finish().unwrap();
482
483 let mut writer = Vec::new();
484 let mut decoder = GzDecoder::new(writer);
485 let n = decoder.write(&bytes[..]).unwrap();
486 decoder.write(&bytes[n..]).unwrap();
487 decoder.try_finish().unwrap();
488 writer = decoder.finish().unwrap();
489 let return_string = String::from_utf8(writer).expect("String parsing error");
490 assert_eq!(return_string, STR);
491 }
492
493 #[test]
494 fn decode_writer_partial_header() {
495 let mut e = GzEncoder::new(Vec::new(), Compression::default());
496 e.write(STR.as_ref()).unwrap();
497 let bytes = e.finish().unwrap();
498
499 let mut writer = Vec::new();
500 let mut decoder = GzDecoder::new(writer);
501 assert_eq!(decoder.write(&bytes[..5]).unwrap(), 5);
502 let n = decoder.write(&bytes[5..]).unwrap();
503 if n < bytes.len() - 5 {
504 decoder.write(&bytes[n + 5..]).unwrap();
505 }
506 writer = decoder.finish().unwrap();
507 let return_string = String::from_utf8(writer).expect("String parsing error");
508 assert_eq!(return_string, STR);
509 }
510
511 #[test]
512 fn decode_writer_partial_header_filename() {
513 let filename = "test.txt";
514 let mut e = GzBuilder::new()
515 .filename(filename)
516 .read(STR.as_bytes(), Compression::default());
517 let mut bytes = Vec::new();
518 e.read_to_end(&mut bytes).unwrap();
519
520 let mut writer = Vec::new();
521 let mut decoder = GzDecoder::new(writer);
522 assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
523 let n = decoder.write(&bytes[12..]).unwrap();
524 if n < bytes.len() - 12 {
525 decoder.write(&bytes[n + 12..]).unwrap();
526 }
527 assert_eq!(
528 decoder.header().unwrap().filename().unwrap(),
529 filename.as_bytes()
530 );
531 writer = decoder.finish().unwrap();
532 let return_string = String::from_utf8(writer).expect("String parsing error");
533 assert_eq!(return_string, STR);
534 }
535
536 #[test]
537 fn decode_writer_partial_header_comment() {
538 let comment = "test comment";
539 let mut e = GzBuilder::new()
540 .comment(comment)
541 .read(STR.as_bytes(), Compression::default());
542 let mut bytes = Vec::new();
543 e.read_to_end(&mut bytes).unwrap();
544
545 let mut writer = Vec::new();
546 let mut decoder = GzDecoder::new(writer);
547 assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
548 let n = decoder.write(&bytes[12..]).unwrap();
549 if n < bytes.len() - 12 {
550 decoder.write(&bytes[n + 12..]).unwrap();
551 }
552 assert_eq!(
553 decoder.header().unwrap().comment().unwrap(),
554 comment.as_bytes()
555 );
556 writer = decoder.finish().unwrap();
557 let return_string = String::from_utf8(writer).expect("String parsing error");
558 assert_eq!(return_string, STR);
559 }
560
561 #[test]
562 fn decode_writer_exact_header() {
563 let mut e = GzEncoder::new(Vec::new(), Compression::default());
564 e.write(STR.as_ref()).unwrap();
565 let bytes = e.finish().unwrap();
566
567 let mut writer = Vec::new();
568 let mut decoder = GzDecoder::new(writer);
569 assert_eq!(decoder.write(&bytes[..10]).unwrap(), 10);
570 decoder.write(&bytes[10..]).unwrap();
571 writer = decoder.finish().unwrap();
572 let return_string = String::from_utf8(writer).expect("String parsing error");
573 assert_eq!(return_string, STR);
574 }
575
576 #[test]
577 fn decode_writer_partial_crc() {
578 let mut e = GzEncoder::new(Vec::new(), Compression::default());
579 e.write(STR.as_ref()).unwrap();
580 let bytes = e.finish().unwrap();
581
582 let mut writer = Vec::new();
583 let mut decoder = GzDecoder::new(writer);
584 let l = bytes.len() - 5;
585 let n = decoder.write(&bytes[..l]).unwrap();
586 decoder.write(&bytes[n..]).unwrap();
587 writer = decoder.finish().unwrap();
588 let return_string = String::from_utf8(writer).expect("String parsing error");
589 assert_eq!(return_string, STR);
590 }
591
592 // Two or more gzip files concatenated form a multi-member gzip file. MultiGzDecoder will
593 // concatenate the decoded contents of all members.
594 #[test]
595 fn decode_multi_writer() {
596 let mut e = GzEncoder::new(Vec::new(), Compression::default());
597 e.write(STR.as_ref()).unwrap();
598 let bytes = e.finish().unwrap().repeat(2);
599
600 let mut writer = Vec::new();
601 let mut decoder = MultiGzDecoder::new(writer);
602 let mut count = 0;
603 while count < bytes.len() {
604 let n = decoder.write(&bytes[count..]).unwrap();
605 assert!(n != 0);
606 count += n;
607 }
608 writer = decoder.finish().unwrap();
609 let return_string = String::from_utf8(writer).expect("String parsing error");
610 let expected = STR.repeat(2);
611 assert_eq!(return_string, expected);
612 }
613
614 // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
615 // additional data to be consumed by the caller.
616 #[test]
617 fn decode_extra_data() {
618 let compressed = {
619 let mut e = GzEncoder::new(Vec::new(), Compression::default());
620 e.write(STR.as_ref()).unwrap();
621 let mut b = e.finish().unwrap();
622 b.push(b'x');
623 b
624 };
625
626 let mut writer = Vec::new();
627 let mut decoder = GzDecoder::new(writer);
628 let mut consumed_bytes = 0;
629 loop {
630 let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
631 if n == 0 {
632 break;
633 }
634 consumed_bytes += n;
635 }
636 writer = decoder.finish().unwrap();
637 let actual = String::from_utf8(writer).expect("String parsing error");
638 assert_eq!(actual, STR);
639 assert_eq!(&compressed[consumed_bytes..], b"x");
640 }
641}
642