write.rs source code [crates/flate2-1.0.28/src/gz/write.rs]

1	use std::cmp;
2	use std::io;
3	use std::io::prelude::*;
4
5	use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
6	use crate::crc::{Crc, CrcWriter};
7	use crate::zio;
8	use crate::{Compress, Compression, Decompress, Status};
9
10	/// A gzip streaming encoder
11	///
12	/// This structure exposes a [`Write`] interface that will emit compressed data
13	/// to the underlying writer `W`.
14	///
15	/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
16	///
17	/// # Examples
18	///
19	/// ```
20	/// use std::io::prelude::*;
21	/// use flate2::Compression;
22	/// use flate2::write::GzEncoder;
23	///
24	/// // Vec<u8> implements Write to print the compressed bytes of sample string
25	/// # fn main() {
26	///
27	/// let mut e = GzEncoder::new(Vec::new(), Compression::default());
28	/// e.write_all(b"Hello World").unwrap();
29	/// println!("{:?}", e.finish().unwrap());
30	/// # }
31	/// ```
32	#[derive(Debug)]
33	pub struct GzEncoder<W: Write> {
34	inner: zio::Writer<W, Compress>,
35	crc: Crc,
36	crc_bytes_written: usize,
37	header: Vec<u8>,
38	}
39
40	pub fn gz_encoder<W: Write>(header: Vec<u8>, w: W, lvl: Compression) -> GzEncoder<W> {
41	GzEncoder {
42	inner: zio::Writer::new(w, d:Compress::new(level:lvl, zlib_header:`false`)),
43	crc: Crc::new(),
44	header,
45	crc_bytes_written: `0`,
46	}
47	}
48
49	impl<W: Write> GzEncoder<W> {
50	/// Creates a new encoder which will use the given compression level.
51	///
52	/// The encoder is not configured specially for the emitted header. For
53	/// header configuration, see the `GzBuilder` type.
54	///
55	/// The data written to the returned encoder will be compressed and then
56	/// written to the stream `w`.
57	pub fn new(w: W, level: Compression) -> GzEncoder<W> {
58	GzBuilder::new().write(w, level)
59	}
60
61	/// Acquires a reference to the underlying writer.
62	pub fn get_ref(&self) -> &W {
63	self.inner.get_ref()
64	}
65
66	/// Acquires a mutable reference to the underlying writer.
67	///
68	/// Note that mutation of the writer may result in surprising results if
69	/// this encoder is continued to be used.
70	pub fn get_mut(&mut self) -> &mut W {
71	self.inner.get_mut()
72	}
73
74	/// Attempt to finish this output stream, writing out final chunks of data.
75	///
76	/// Note that this function can only be used once data has finished being
77	/// written to the output stream. After this function is called then further
78	/// calls to `write` may result in a panic.
79	///
80	/// # Panics
81	///
82	/// Attempts to write data to this stream may result in a panic after this
83	/// function is called.
84	///
85	/// # Errors
86	///
87	/// This function will perform I/O to complete this stream, and any I/O
88	/// errors which occur will be returned from this function.
89	pub fn try_finish(&mut self) -> io::Result<()> {
90	self.write_header()?;
91	self.inner.finish()?;
92
93	while self.crc_bytes_written < `8` {
94	let (sum, amt) = (self.crc.sum(), self.crc.amount());
95	let buf = [
96	(sum >> `0`) as u8,
97	(sum >> `8`) as u8,
98	(sum >> `16`) as u8,
99	(sum >> `24`) as u8,
100	(amt >> `0`) as u8,
101	(amt >> `8`) as u8,
102	(amt >> `16`) as u8,
103	(amt >> `24`) as u8,
104	];
105	let inner = self.inner.get_mut();
106	let n = inner.write(&buf[self.crc_bytes_written..])?;
107	self.crc_bytes_written += n;
108	}
109	Ok(())
110	}
111
112	/// Finish encoding this stream, returning the underlying writer once the
113	/// encoding is done.
114	///
115	/// Note that this function may not be suitable to call in a situation where
116	/// the underlying stream is an asynchronous I/O stream. To finish a stream
117	/// the `try_finish` (or `shutdown`) method should be used instead. To
118	/// re-acquire ownership of a stream it is safe to call this method after
119	/// `try_finish` or `shutdown` has returned `Ok`.
120	///
121	/// # Errors
122	///
123	/// This function will perform I/O to complete this stream, and any I/O
124	/// errors which occur will be returned from this function.
125	pub fn finish(mut self) -> io::Result<W> {
126	self.try_finish()?;
127	Ok(self.inner.take_inner())
128	}
129
130	fn write_header(&mut self) -> io::Result<()> {
131	while !self.header.is_empty() {
132	let n = self.inner.get_mut().write(&self.header)?;
133	self.header.drain(..n);
134	}
135	Ok(())
136	}
137	}
138
139	impl<W: Write> Write for GzEncoder<W> {
140	fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
141	assert_eq!(self.crc_bytes_written, `0`);
142	self.write_header()?;
143	let n: usize = self.inner.write(buf)?;
144	self.crc.update(&buf[..n]);
145	Ok(n)
146	}
147
148	fn flush(&mut self) -> io::Result<()> {
149	assert_eq!(self.crc_bytes_written, `0`);
150	self.write_header()?;
151	self.inner.flush()
152	}
153	}
154
155	impl<R: Read + Write> Read for GzEncoder<R> {
156	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
157	self.get_mut().read(buf)
158	}
159	}
160
161	impl<W: Write> Drop for GzEncoder<W> {
162	fn drop(&mut self) {
163	if self.inner.is_present() {
164	let _ = self.try_finish();
165	}
166	}
167	}
168
169	/// A decoder for a single member of a [gzip file].
170	///
171	/// This structure exposes a [`Write`] interface, receiving compressed data and
172	/// writing uncompressed data to the underlying writer.
173	///
174	/// After decoding a single member of the gzip data this writer will return the number of bytes up to
175	/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
176	/// handle any data following the gzip member.
177	///
178	/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
179	/// or read more
180	/// [in the introduction](../index.html#about-multi-member-gzip-files).
181	///
182	/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
183	/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
184	///
185	/// # Examples
186	///
187	/// ```
188	/// use std::io::prelude::*;
189	/// use std::io;
190	/// use flate2::Compression;
191	/// use flate2::write::{GzEncoder, GzDecoder};
192	///
193	/// # fn main() {
194	/// # let mut e = GzEncoder::new(Vec::new(), Compression::default());
195	/// # e.write(b"Hello World").unwrap();
196	/// # let bytes = e.finish().unwrap();
197	/// # assert_eq!("Hello World", decode_writer(bytes).unwrap());
198	/// # }
199	/// // Uncompresses a gzip encoded vector of bytes and returns a string or error
200	/// // Here Vec<u8> implements Write
201	/// fn decode_writer(bytes: Vec<u8>) -> io::Result<String> {
202	/// let mut writer = Vec::new();
203	/// let mut decoder = GzDecoder::new(writer);
204	/// decoder.write_all(&bytes[..])?;
205	/// writer = decoder.finish()?;
206	/// let return_string = String::from_utf8(writer).expect("String parsing error");
207	/// Ok(return_string)
208	/// }
209	/// ```
210	#[derive(Debug)]
211	pub struct GzDecoder<W: Write> {
212	inner: zio::Writer<CrcWriter<W>, Decompress>,
213	crc_bytes: Vec<u8>,
214	header_parser: GzHeaderParser,
215	}
216
217	const CRC_BYTES_LEN: usize = `8`;
218
219	impl<W: Write> GzDecoder<W> {
220	/// Creates a new decoder which will write uncompressed data to the stream.
221	///
222	/// When this encoder is dropped or unwrapped the final pieces of data will
223	/// be flushed.
224	pub fn new(w: W) -> GzDecoder<W> {
225	GzDecoder {
226	inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(`false`)),
227	crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
228	header_parser: GzHeaderParser::new(),
229	}
230	}
231
232	/// Returns the header associated with this stream.
233	pub fn header(&self) -> Option<&GzHeader> {
234	self.header_parser.header()
235	}
236
237	/// Acquires a reference to the underlying writer.
238	pub fn get_ref(&self) -> &W {
239	self.inner.get_ref().get_ref()
240	}
241
242	/// Acquires a mutable reference to the underlying writer.
243	///
244	/// Note that mutating the output/input state of the stream may corrupt this
245	/// object, so care must be taken when using this method.
246	pub fn get_mut(&mut self) -> &mut W {
247	self.inner.get_mut().get_mut()
248	}
249
250	/// Attempt to finish this output stream, writing out final chunks of data.
251	///
252	/// Note that this function can only be used once data has finished being
253	/// written to the output stream. After this function is called then further
254	/// calls to `write` may result in a panic.
255	///
256	/// # Panics
257	///
258	/// Attempts to write data to this stream may result in a panic after this
259	/// function is called.
260	///
261	/// # Errors
262	///
263	/// This function will perform I/O to finish the stream, returning any
264	/// errors which happen.
265	pub fn try_finish(&mut self) -> io::Result<()> {
266	self.finish_and_check_crc()?;
267	Ok(())
268	}
269
270	/// Consumes this decoder, flushing the output stream.
271	///
272	/// This will flush the underlying data stream and then return the contained
273	/// writer if the flush succeeded.
274	///
275	/// Note that this function may not be suitable to call in a situation where
276	/// the underlying stream is an asynchronous I/O stream. To finish a stream
277	/// the `try_finish` (or `shutdown`) method should be used instead. To
278	/// re-acquire ownership of a stream it is safe to call this method after
279	/// `try_finish` or `shutdown` has returned `Ok`.
280	///
281	/// # Errors
282	///
283	/// This function will perform I/O to complete this stream, and any I/O
284	/// errors which occur will be returned from this function.
285	pub fn finish(mut self) -> io::Result<W> {
286	self.finish_and_check_crc()?;
287	Ok(self.inner.take_inner().into_inner())
288	}
289
290	fn finish_and_check_crc(&mut self) -> io::Result<()> {
291	self.inner.finish()?;
292
293	if self.crc_bytes.len() != `8` {
294	return Err(corrupt());
295	}
296
297	let crc = ((self.crc_bytes[`0`] as u32) << `0`)
298	\| ((self.crc_bytes[`1`] as u32) << `8`)
299	\| ((self.crc_bytes[`2`] as u32) << `16`)
300	\| ((self.crc_bytes[`3`] as u32) << `24`);
301	let amt = ((self.crc_bytes[`4`] as u32) << `0`)
302	\| ((self.crc_bytes[`5`] as u32) << `8`)
303	\| ((self.crc_bytes[`6`] as u32) << `16`)
304	\| ((self.crc_bytes[`7`] as u32) << `24`);
305	if crc != self.inner.get_ref().crc().sum() {
306	return Err(corrupt());
307	}
308	if amt != self.inner.get_ref().crc().amount() {
309	return Err(corrupt());
310	}
311	Ok(())
312	}
313	}
314
315	impl<W: Write> Write for GzDecoder<W> {
316	fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
317	let buflen = buf.len();
318	if self.header().is_none() {
319	match self.header_parser.parse(&mut buf) {
320	Err(err) => {
321	if err.kind() == io::ErrorKind::UnexpectedEof {
322	// all data read but header still not complete
323	Ok(buflen)
324	} else {
325	Err(err)
326	}
327	}
328	Ok(_) => {
329	debug_assert!(self.header().is_some());
330	// buf now contains the unread part of the original buf
331	let n = buflen - buf.len();
332	Ok(n)
333	}
334	}
335	} else {
336	let (n, status) = self.inner.write_with_status(buf)?;
337
338	if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < `8` {
339	let remaining = buf.len() - n;
340	let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len());
341	self.crc_bytes.extend(&buf[n..n + crc_bytes]);
342	return Ok(n + crc_bytes);
343	}
344	Ok(n)
345	}
346	}
347
348	fn flush(&mut self) -> io::Result<()> {
349	self.inner.flush()
350	}
351	}
352
353	impl<W: Read + Write> Read for GzDecoder<W> {
354	fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
355	self.inner.get_mut().get_mut().read(buf)
356	}
357	}
358
359	/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
360	///
361	/// This structure exposes a [`Write`] interface that will consume compressed data and
362	/// write uncompressed data to the underlying writer.
363	///
364	/// A gzip file consists of a series of members* concatenated one after another.*
365	/// `MultiGzDecoder` decodes all members of a file and writes them to the
366	/// underlying writer one after another.
367	///
368	/// To handle members separately, see [GzDecoder] or read more
369	/// [in the introduction](../index.html#about-multi-member-gzip-files).
370	///
371	/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
372	#[derive(Debug)]
373	pub struct MultiGzDecoder<W: Write> {
374	inner: GzDecoder<W>,
375	}
376
377	impl<W: Write> MultiGzDecoder<W> {
378	/// Creates a new decoder which will write uncompressed data to the stream.
379	/// If the gzip stream contains multiple members all will be decoded.
380	pub fn new(w: W) -> MultiGzDecoder<W> {
381	MultiGzDecoder {
382	inner: GzDecoder::new(w),
383	}
384	}
385
386	/// Returns the header associated with the current member.
387	pub fn header(&self) -> Option<&GzHeader> {
388	self.inner.header()
389	}
390
391	/// Acquires a reference to the underlying writer.
392	pub fn get_ref(&self) -> &W {
393	self.inner.get_ref()
394	}
395
396	/// Acquires a mutable reference to the underlying writer.
397	///
398	/// Note that mutating the output/input state of the stream may corrupt this
399	/// object, so care must be taken when using this method.
400	pub fn get_mut(&mut self) -> &mut W {
401	self.inner.get_mut()
402	}
403
404	/// Attempt to finish this output stream, writing out final chunks of data.
405	///
406	/// Note that this function can only be used once data has finished being
407	/// written to the output stream. After this function is called then further
408	/// calls to `write` may result in a panic.
409	///
410	/// # Panics
411	///
412	/// Attempts to write data to this stream may result in a panic after this
413	/// function is called.
414	///
415	/// # Errors
416	///
417	/// This function will perform I/O to finish the stream, returning any
418	/// errors which happen.
419	pub fn try_finish(&mut self) -> io::Result<()> {
420	self.inner.try_finish()
421	}
422
423	/// Consumes this decoder, flushing the output stream.
424	///
425	/// This will flush the underlying data stream and then return the contained
426	/// writer if the flush succeeded.
427	///
428	/// Note that this function may not be suitable to call in a situation where
429	/// the underlying stream is an asynchronous I/O stream. To finish a stream
430	/// the `try_finish` (or `shutdown`) method should be used instead. To
431	/// re-acquire ownership of a stream it is safe to call this method after
432	/// `try_finish` or `shutdown` has returned `Ok`.
433	///
434	/// # Errors
435	///
436	/// This function will perform I/O to complete this stream, and any I/O
437	/// errors which occur will be returned from this function.
438	pub fn finish(self) -> io::Result<W> {
439	self.inner.finish()
440	}
441	}
442
443	impl<W: Write> Write for MultiGzDecoder<W> {
444	fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
445	if buf.is_empty() {
446	Ok(`0`)
447	} else {
448	match self.inner.write(buf) {
449	Ok(`0`) => {
450	// When the GzDecoder indicates that it has finished
451	// create a new GzDecoder to handle additional data.
452	self.inner.try_finish()?;
453	let w: W = self.inner.inner.take_inner().into_inner();
454	self.inner = GzDecoder::new(w);
455	self.inner.write(buf)
456	}
457	res: Result => res,
458	}
459	}
460	}
461
462	fn flush(&mut self) -> io::Result<()> {
463	self.inner.flush()
464	}
465	}
466
467	#[cfg(test)]
468	mod tests {
469	use super::*;
470
471	const STR: &str = "Hello World Hello World Hello World Hello World Hello World \
472	Hello World Hello World Hello World Hello World Hello World \
473	Hello World Hello World Hello World Hello World Hello World \
474	Hello World Hello World Hello World Hello World Hello World \
475	Hello World Hello World Hello World Hello World Hello World";
476
477	#[test]
478	fn decode_writer_one_chunk() {
479	let mut e = GzEncoder::new(Vec::new(), Compression::default());
480	e.write(STR.as_ref()).unwrap();
481	let bytes = e.finish().unwrap();
482
483	let mut writer = Vec::new();
484	let mut decoder = GzDecoder::new(writer);
485	let n = decoder.write(&bytes[..]).unwrap();
486	decoder.write(&bytes[n..]).unwrap();
487	decoder.try_finish().unwrap();
488	writer = decoder.finish().unwrap();
489	let return_string = String::from_utf8(writer).expect("String parsing error");
490	assert_eq!(return_string, STR);
491	}
492
493	#[test]
494	fn decode_writer_partial_header() {
495	let mut e = GzEncoder::new(Vec::new(), Compression::default());
496	e.write(STR.as_ref()).unwrap();
497	let bytes = e.finish().unwrap();
498
499	let mut writer = Vec::new();
500	let mut decoder = GzDecoder::new(writer);
501	assert_eq!(decoder.write(&bytes[..`5`]).unwrap(), `5`);
502	let n = decoder.write(&bytes[`5`..]).unwrap();
503	if n < bytes.len() - `5` {
504	decoder.write(&bytes[n + `5`..]).unwrap();
505	}
506	writer = decoder.finish().unwrap();
507	let return_string = String::from_utf8(writer).expect("String parsing error");
508	assert_eq!(return_string, STR);
509	}
510
511	#[test]
512	fn decode_writer_partial_header_filename() {
513	let filename = "test.txt";
514	let mut e = GzBuilder::new()
515	.filename(filename)
516	.read(STR.as_bytes(), Compression::default());
517	let mut bytes = Vec::new();
518	e.read_to_end(&mut bytes).unwrap();
519
520	let mut writer = Vec::new();
521	let mut decoder = GzDecoder::new(writer);
522	assert_eq!(decoder.write(&bytes[..`12`]).unwrap(), `12`);
523	let n = decoder.write(&bytes[`12`..]).unwrap();
524	if n < bytes.len() - `12` {
525	decoder.write(&bytes[n + `12`..]).unwrap();
526	}
527	assert_eq!(
528	decoder.header().unwrap().filename().unwrap(),
529	filename.as_bytes()
530	);
531	writer = decoder.finish().unwrap();
532	let return_string = String::from_utf8(writer).expect("String parsing error");
533	assert_eq!(return_string, STR);
534	}
535
536	#[test]
537	fn decode_writer_partial_header_comment() {
538	let comment = "test comment";
539	let mut e = GzBuilder::new()
540	.comment(comment)
541	.read(STR.as_bytes(), Compression::default());
542	let mut bytes = Vec::new();
543	e.read_to_end(&mut bytes).unwrap();
544
545	let mut writer = Vec::new();
546	let mut decoder = GzDecoder::new(writer);
547	assert_eq!(decoder.write(&bytes[..`12`]).unwrap(), `12`);
548	let n = decoder.write(&bytes[`12`..]).unwrap();
549	if n < bytes.len() - `12` {
550	decoder.write(&bytes[n + `12`..]).unwrap();
551	}
552	assert_eq!(
553	decoder.header().unwrap().comment().unwrap(),
554	comment.as_bytes()
555	);
556	writer = decoder.finish().unwrap();
557	let return_string = String::from_utf8(writer).expect("String parsing error");
558	assert_eq!(return_string, STR);
559	}
560
561	#[test]
562	fn decode_writer_exact_header() {
563	let mut e = GzEncoder::new(Vec::new(), Compression::default());
564	e.write(STR.as_ref()).unwrap();
565	let bytes = e.finish().unwrap();
566
567	let mut writer = Vec::new();
568	let mut decoder = GzDecoder::new(writer);
569	assert_eq!(decoder.write(&bytes[..`10`]).unwrap(), `10`);
570	decoder.write(&bytes[`10`..]).unwrap();
571	writer = decoder.finish().unwrap();
572	let return_string = String::from_utf8(writer).expect("String parsing error");
573	assert_eq!(return_string, STR);
574	}
575
576	#[test]
577	fn decode_writer_partial_crc() {
578	let mut e = GzEncoder::new(Vec::new(), Compression::default());
579	e.write(STR.as_ref()).unwrap();
580	let bytes = e.finish().unwrap();
581
582	let mut writer = Vec::new();
583	let mut decoder = GzDecoder::new(writer);
584	let l = bytes.len() - `5`;
585	let n = decoder.write(&bytes[..l]).unwrap();
586	decoder.write(&bytes[n..]).unwrap();
587	writer = decoder.finish().unwrap();
588	let return_string = String::from_utf8(writer).expect("String parsing error");
589	assert_eq!(return_string, STR);
590	}
591
592	// Two or more gzip files concatenated form a multi-member gzip file. MultiGzDecoder will
593	// concatenate the decoded contents of all members.
594	#[test]
595	fn decode_multi_writer() {
596	let mut e = GzEncoder::new(Vec::new(), Compression::default());
597	e.write(STR.as_ref()).unwrap();
598	let bytes = e.finish().unwrap().repeat(`2`);
599
600	let mut writer = Vec::new();
601	let mut decoder = MultiGzDecoder::new(writer);
602	let mut count = `0`;
603	while count < bytes.len() {
604	let n = decoder.write(&bytes[count..]).unwrap();
605	assert!(n != `0`);
606	count += n;
607	}
608	writer = decoder.finish().unwrap();
609	let return_string = String::from_utf8(writer).expect("String parsing error");
610	let expected = STR.repeat(`2`);
611	assert_eq!(return_string, expected);
612	}
613
614	// GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
615	// additional data to be consumed by the caller.
616	#[test]
617	fn decode_extra_data() {
618	let compressed = {
619	let mut e = GzEncoder::new(Vec::new(), Compression::default());
620	e.write(STR.as_ref()).unwrap();
621	let mut b = e.finish().unwrap();
622	b.push(b'x');
623	b
624	};
625
626	let mut writer = Vec::new();
627	let mut decoder = GzDecoder::new(writer);
628	let mut consumed_bytes = `0`;
629	loop {
630	let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
631	if n == `0` {
632	break;
633	}
634	consumed_bytes += n;
635	}
636	writer = decoder.finish().unwrap();
637	let actual = String::from_utf8(writer).expect("String parsing error");
638	assert_eq!(actual, STR);
639	assert_eq!(&compressed[consumed_bytes..], b"x");
640	}
641	}
642