mod.rs source code [crates/flate2-1.0.28/src/gz/mod.rs]

1	use std::ffi::CString;
2	use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3	use std::time;
4
5	use crate::bufreader::BufReader;
6	use crate::{Compression, Crc};
7
8	pub static FHCRC: u8 = `1` << `1`;
9	pub static FEXTRA: u8 = `1` << `2`;
10	pub static FNAME: u8 = `1` << `3`;
11	pub static FCOMMENT: u8 = `1` << `4`;
12	pub static FRESERVED: u8 = `1` << `5` \| `1` << `6` \| `1` << `7`;
13
14	pub mod bufread;
15	pub mod read;
16	pub mod write;
17
18	// The maximum length of the header filename and comment fields. More than
19	// enough for these fields in reasonable use, but prevents possible attacks.
20	const MAX_HEADER_BUF: usize = `65535`;
21
22	/// A structure representing the header of a gzip stream.
23	///
24	/// The header can contain metadata about the file that was compressed, if
25	/// present.
26	#[derive(PartialEq, Clone, Debug, Default)]
27	pub struct GzHeader {
28	extra: Option<Vec<u8>>,
29	filename: Option<Vec<u8>>,
30	comment: Option<Vec<u8>>,
31	operating_system: u8,
32	mtime: u32,
33	}
34
35	impl GzHeader {
36	/// Returns the `filename` field of this gzip stream's header, if present.
37	pub fn filename(&self) -> Option<&[u8]> {
38	self.filename.as_ref().map(\|s\| &s[..])
39	}
40
41	/// Returns the `extra` field of this gzip stream's header, if present.
42	pub fn extra(&self) -> Option<&[u8]> {
43	self.extra.as_ref().map(\|s\| &s[..])
44	}
45
46	/// Returns the `comment` field of this gzip stream's header, if present.
47	pub fn comment(&self) -> Option<&[u8]> {
48	self.comment.as_ref().map(\|s\| &s[..])
49	}
50
51	/// Returns the `operating_system` field of this gzip stream's header.
52	///
53	/// There are predefined values for various operating systems.
54	/// 255 means that the value is unknown.
55	pub fn operating_system(&self) -> u8 {
56	self.operating_system
57	}
58
59	/// This gives the most recent modification time of the original file being compressed.
60	///
61	/// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
62	/// (Note that this may cause problems for MS-DOS and other systems that use local
63	/// rather than Universal time.) If the compressed data did not come from a file,
64	/// `mtime` is set to the time at which compression started.
65	/// `mtime` = 0 means no time stamp is available.
66	///
67	/// The usage of `mtime` is discouraged because of Year 2038 problem.
68	pub fn mtime(&self) -> u32 {
69	self.mtime
70	}
71
72	/// Returns the most recent modification time represented by a date-time type.
73	/// Returns `None` if the value of the underlying counter is 0,
74	/// indicating no time stamp is available.
75	///
76	///
77	/// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
78	/// See [`mtime`](#method.mtime) for more detail.
79	pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80	if self.mtime == `0` {
81	None
82	} else {
83	let duration = time::Duration::new(u64::from(self.mtime), `0`);
84	let datetime = time::UNIX_EPOCH + duration;
85	Some(datetime)
86	}
87	}
88	}
89
90	#[derive(Debug)]
91	pub enum GzHeaderState {
92	Start(u8, [u8; `10`]),
93	Xlen(Option<Box<Crc>>, u8, [u8; `2`]),
94	Extra(Option<Box<Crc>>, u16),
95	Filename(Option<Box<Crc>>),
96	Comment(Option<Box<Crc>>),
97	Crc(Option<Box<Crc>>, u8, [u8; `2`]),
98	Complete,
99	}
100
101	impl Default for GzHeaderState {
102	fn default() -> Self {
103	Self::Complete
104	}
105	}
106
107	#[derive(Debug, Default)]
108	pub struct GzHeaderParser {
109	state: GzHeaderState,
110	flags: u8,
111	header: GzHeader,
112	}
113
114	impl GzHeaderParser {
115	fn new() -> Self {
116	GzHeaderParser {
117	state: GzHeaderState::Start(`0`, [`0`; `10`]),
118	flags: `0`,
119	header: GzHeader::default(),
120	}
121	}
122
123	fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
124	loop {
125	match &mut self.state {
126	GzHeaderState::Start(count, buffer) => {
127	while (count as usize*) < buffer.len() {
128	count += read_into(r, &mut* buffer[count as usize*..])? as u8;
129	}
130	// Gzip identification bytes
131	if buffer[`0`] != `0x1f` \|\| buffer[`1`] != `0x8b` {
132	return Err(bad_header());
133	}
134	// Gzip compression method (8 = deflate)
135	if buffer[`2`] != `8` {
136	return Err(bad_header());
137	}
138	self.flags = buffer[`3`];
139	// RFC1952: "must give an error indication if any reserved bit is non-zero"
140	if self.flags & FRESERVED != `0` {
141	return Err(bad_header());
142	}
143	self.header.mtime = ((buffer[`4`] as u32) << `0`)
144	\| ((buffer[`5`] as u32) << `8`)
145	\| ((buffer[`6`] as u32) << `16`)
146	\| ((buffer[`7`] as u32) << `24`);
147	let _xfl = buffer[`8`];
148	self.header.operating_system = buffer[`9`];
149	let crc = if self.flags & FHCRC != `0` {
150	let mut crc = Box::new(Crc::new());
151	crc.update(buffer);
152	Some(crc)
153	} else {
154	None
155	};
156	self.state = GzHeaderState::Xlen(crc, `0`, [`0`; `2`]);
157	}
158	GzHeaderState::Xlen(crc, count, buffer) => {
159	if self.flags & FEXTRA != `0` {
160	while (count as usize*) < buffer.len() {
161	count += read_into(r, &mut* buffer[count as usize*..])? as u8;
162	}
163	if let Some(crc) = crc {
164	crc.update(buffer);
165	}
166	let xlen = parse_le_u16(&buffer);
167	self.header.extra = Some(vec![`0`; xlen as usize]);
168	self.state = GzHeaderState::Extra(crc.take(), `0`);
169	} else {
170	self.state = GzHeaderState::Filename(crc.take());
171	}
172	}
173	GzHeaderState::Extra(crc, count) => {
174	debug_assert!(self.header.extra.is_some());
175	let extra = self.header.extra.as_mut().unwrap();
176	while (count as usize*) < extra.len() {
177	count += read_into(r, &mut* extra[count as usize..])? as u16*;
178	}
179	if let Some(crc) = crc {
180	crc.update(extra);
181	}
182	self.state = GzHeaderState::Filename(crc.take());
183	}
184	GzHeaderState::Filename(crc) => {
185	if self.flags & FNAME != `0` {
186	let filename = self.header.filename.get_or_insert_with(Vec::new);
187	read_to_nul(r, filename)?;
188	if let Some(crc) = crc {
189	crc.update(filename);
190	crc.update(b"`\0`");
191	}
192	}
193	self.state = GzHeaderState::Comment(crc.take());
194	}
195	GzHeaderState::Comment(crc) => {
196	if self.flags & FCOMMENT != `0` {
197	let comment = self.header.comment.get_or_insert_with(Vec::new);
198	read_to_nul(r, comment)?;
199	if let Some(crc) = crc {
200	crc.update(comment);
201	crc.update(b"`\0`");
202	}
203	}
204	self.state = GzHeaderState::Crc(crc.take(), `0`, [`0`; `2`]);
205	}
206	GzHeaderState::Crc(crc, count, buffer) => {
207	if let Some(crc) = crc {
208	debug_assert!(self.flags & FHCRC != `0`);
209	while (count as usize*) < buffer.len() {
210	count += read_into(r, &mut* buffer[count as usize*..])? as u8;
211	}
212	let stored_crc = parse_le_u16(&buffer);
213	let calced_crc = crc.sum() as u16;
214	if stored_crc != calced_crc {
215	return Err(corrupt());
216	}
217	}
218	self.state = GzHeaderState::Complete;
219	}
220	GzHeaderState::Complete => {
221	return Ok(());
222	}
223	}
224	}
225	}
226
227	fn header(&self) -> Option<&GzHeader> {
228	match self.state {
229	GzHeaderState::Complete => Some(&self.header),
230	_ => None,
231	}
232	}
233	}
234
235	impl From<GzHeaderParser> for GzHeader {
236	fn from(parser: GzHeaderParser) -> Self {
237	debug_assert!(matches!(parser.state, GzHeaderState::Complete));
238	parser.header
239	}
240	}
241
242	// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
243	// Return an error if EOF is read before the buffer is full. This differs
244	// from `read` in that Ok(0) means that more data may be available.
245	fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
246	debug_assert!(!buffer.is_empty());
247	match r.read(buf:buffer) {
248	Ok(`0`) => Err(ErrorKind::UnexpectedEof.into()),
249	Ok(n: usize) => Ok(n),
250	Err(ref e: &Error) if e.kind() == ErrorKind::Interrupted => Ok(`0`),
251	Err(e: Error) => Err(e),
252	}
253	}
254
255	// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
256	fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
257	let mut bytes: Bytes<&mut R> = r.bytes();
258	loop {
259	match bytes.next().transpose()? {
260	Some(byte: u8) if byte == `0` => {
261	return Ok(());
262	}
263	Some(_) if buffer.len() == MAX_HEADER_BUF => {
264	return Err(Error::new(
265	kind:ErrorKind::InvalidInput,
266	error:"gzip header field too long",
267	));
268	}
269	Some(byte: u8) => {
270	buffer.push(byte);
271	}
272	None => {
273	return Err(ErrorKind::UnexpectedEof.into());
274	}
275	}
276	}
277	}
278
279	fn parse_le_u16(buffer: &[u8; `2`]) -> u16 {
280	(buffer[`0`] as u16) \| ((buffer[`1`] as u16) << `8`)
281	}
282
283	fn bad_header() -> Error {
284	Error::new(kind:ErrorKind::InvalidInput, error:"invalid gzip header")
285	}
286
287	fn corrupt() -> Error {
288	Error::new(
289	kind:ErrorKind::InvalidInput,
290	error:"corrupt gzip stream does not have a matching checksum",
291	)
292	}
293
294	/// A builder structure to create a new gzip Encoder.
295	///
296	/// This structure controls header configuration options such as the filename.
297	///
298	/// # Examples
299	///
300	/// ```
301	/// use std::io::prelude::*;
302	/// # use std::io;
303	/// use std::fs::File;
304	/// use flate2::GzBuilder;
305	/// use flate2::Compression;
306	///
307	/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
308	///
309	/// # fn sample_builder() -> Result<(), io::Error> {
310	/// let f = File::create("examples/hello_world.gz")?;
311	/// let mut gz = GzBuilder::new()
312	/// .filename("hello_world.txt")
313	/// .comment("test file, please delete")
314	/// .write(f, Compression::default());
315	/// gz.write_all(b"hello world")?;
316	/// gz.finish()?;
317	/// # Ok(())
318	/// # }
319	/// ```
320	#[derive(Debug)]
321	pub struct GzBuilder {
322	extra: Option<Vec<u8>>,
323	filename: Option<CString>,
324	comment: Option<CString>,
325	operating_system: Option<u8>,
326	mtime: u32,
327	}
328
329	impl Default for GzBuilder {
330	fn default() -> Self {
331	Self::new()
332	}
333	}
334
335	impl GzBuilder {
336	/// Create a new blank builder with no header by default.
337	pub fn new() -> GzBuilder {
338	GzBuilder {
339	extra: None,
340	filename: None,
341	comment: None,
342	operating_system: None,
343	mtime: `0`,
344	}
345	}
346
347	/// Configure the `mtime` field in the gzip header.
348	pub fn mtime(mut self, mtime: u32) -> GzBuilder {
349	self.mtime = mtime;
350	self
351	}
352
353	/// Configure the `operating_system` field in the gzip header.
354	pub fn operating_system(mut self, os: u8) -> GzBuilder {
355	self.operating_system = Some(os);
356	self
357	}
358
359	/// Configure the `extra` field in the gzip header.
360	pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
361	self.extra = Some(extra.into());
362	self
363	}
364
365	/// Configure the `filename` field in the gzip header.
366	///
367	/// # Panics
368	///
369	/// Panics if the `filename` slice contains a zero.
370	pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
371	self.filename = Some(CString::new(filename.into()).unwrap());
372	self
373	}
374
375	/// Configure the `comment` field in the gzip header.
376	///
377	/// # Panics
378	///
379	/// Panics if the `comment` slice contains a zero.
380	pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
381	self.comment = Some(CString::new(comment.into()).unwrap());
382	self
383	}
384
385	/// Consume this builder, creating a writer encoder in the process.
386	///
387	/// The data written to the returned encoder will be compressed and then
388	/// written out to the supplied parameter `w`.
389	pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
390	write::gz_encoder(self.into_header(lvl), w, lvl)
391	}
392
393	/// Consume this builder, creating a reader encoder in the process.
394	///
395	/// Data read from the returned encoder will be the compressed version of
396	/// the data read from the given reader.
397	pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
398	read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
399	}
400
401	/// Consume this builder, creating a reader encoder in the process.
402	///
403	/// Data read from the returned encoder will be the compressed version of
404	/// the data read from the given reader.
405	pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
406	where
407	R: BufRead,
408	{
409	bufread::gz_encoder(self.into_header(lvl), r, lvl)
410	}
411
412	fn into_header(self, lvl: Compression) -> Vec<u8> {
413	let GzBuilder {
414	extra,
415	filename,
416	comment,
417	operating_system,
418	mtime,
419	} = self;
420	let mut flg = `0`;
421	let mut header = vec![`0u8`; `10`];
422	if let Some(v) = extra {
423	flg \|= FEXTRA;
424	header.push((v.len() >> `0`) as u8);
425	header.push((v.len() >> `8`) as u8);
426	header.extend(v);
427	}
428	if let Some(filename) = filename {
429	flg \|= FNAME;
430	header.extend(filename.as_bytes_with_nul().iter().copied());
431	}
432	if let Some(comment) = comment {
433	flg \|= FCOMMENT;
434	header.extend(comment.as_bytes_with_nul().iter().copied());
435	}
436	header[`0`] = `0x1f`;
437	header[`1`] = `0x8b`;
438	header[`2`] = `8`;
439	header[`3`] = flg;
440	header[`4`] = (mtime >> `0`) as u8;
441	header[`5`] = (mtime >> `8`) as u8;
442	header[`6`] = (mtime >> `16`) as u8;
443	header[`7`] = (mtime >> `24`) as u8;
444	header[`8`] = if lvl.0 >= Compression::best().0 {
445	`2`
446	} else if lvl.0 <= Compression::fast().0 {
447	`4`
448	} else {
449	`0`
450	};
451
452	// Typically this byte indicates what OS the gz stream was created on,
453	// but in an effort to have cross-platform reproducible streams just
454	// default this value to 255. I'm not sure that if we "correctly" set
455	// this it'd do anything anyway...
456	header[`9`] = operating_system.unwrap_or(`255`);
457	header
458	}
459	}
460
461	#[cfg(test)]
462	mod tests {
463	use std::io::prelude::*;
464
465	use super::{read, write, GzBuilder, GzHeaderParser};
466	use crate::{Compression, GzHeader};
467	use rand::{thread_rng, Rng};
468
469	#[test]
470	fn roundtrip() {
471	let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
472	e.write_all(b"foo bar baz").unwrap();
473	let inner = e.finish().unwrap();
474	let mut d = read::GzDecoder::new(&inner[..]);
475	let mut s = String::new();
476	d.read_to_string(&mut s).unwrap();
477	assert_eq!(s, "foo bar baz");
478	}
479
480	#[test]
481	fn roundtrip_zero() {
482	let e = write::GzEncoder::new(Vec::new(), Compression::default());
483	let inner = e.finish().unwrap();
484	let mut d = read::GzDecoder::new(&inner[..]);
485	let mut s = String::new();
486	d.read_to_string(&mut s).unwrap();
487	assert_eq!(s, "");
488	}
489
490	#[test]
491	fn roundtrip_big() {
492	let mut real = Vec::new();
493	let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
494	let v = crate::random_bytes().take(`1024`).collect::<Vec<_>>();
495	for _ in `0`..`200` {
496	let to_write = &v[..thread_rng().gen_range(`0`..v.len())];
497	real.extend(to_write.iter().copied());
498	w.write_all(to_write).unwrap();
499	}
500	let result = w.finish().unwrap();
501	let mut r = read::GzDecoder::new(&result[..]);
502	let mut v = Vec::new();
503	r.read_to_end(&mut v).unwrap();
504	assert_eq!(v, real);
505	}
506
507	#[test]
508	fn roundtrip_big2() {
509	let v = crate::random_bytes().take(`1024` * `1024`).collect::<Vec<_>>();
510	let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
511	let mut res = Vec::new();
512	r.read_to_end(&mut res).unwrap();
513	assert_eq!(res, v);
514	}
515
516	// A Rust implementation of CRC that closely matches the C code in RFC1952.
517	// Only use this to create CRCs for tests.
518	struct Rfc1952Crc {
519	/ Table of CRCs of all 8-bit messages. /
520	crc_table: [u32; `256`],
521	}
522
523	impl Rfc1952Crc {
524	fn new() -> Self {
525	let mut crc = Rfc1952Crc {
526	crc_table: [`0`; `256`],
527	};
528	/ Make the table for a fast CRC. /
529	for n in `0usize`..`256` {
530	let mut c = n as u32;
531	for _k in `0`..`8` {
532	if c & `1` != `0` {
533	c = `0xedb88320` ^ (c >> `1`);
534	} else {
535	c = c >> `1`;
536	}
537	}
538	crc.crc_table[n] = c;
539	}
540	crc
541	}
542
543	/*
544	Update a running crc with the bytes buf and return
545	the updated crc. The crc should be initialized to zero. Pre- and
546	post-conditioning (one's complement) is performed within this
547	function so it shouldn't be done by the caller.
548	*/
549	fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
550	let mut c = crc ^ `0xffffffff`;
551
552	for b in buf {
553	c = self.crc_table[(c as u8 ^ b) as usize*] ^ (c >> `8`);
554	}
555	c ^ `0xffffffff`
556	}
557
558	/ Return the CRC of the bytes buf. /
559	fn crc(&self, buf: &[u8]) -> u32 {
560	self.update_crc(`0`, buf)
561	}
562	}
563
564	#[test]
565	fn roundtrip_header() {
566	let mut header = GzBuilder::new()
567	.mtime(`1234`)
568	.operating_system(`57`)
569	.filename("filename")
570	.comment("comment")
571	.into_header(Compression::fast());
572
573	// Add a CRC to the header
574	header[`3`] = header[`3`] ^ super::FHCRC;
575	let rfc1952_crc = Rfc1952Crc::new();
576	let crc32 = rfc1952_crc.crc(&header);
577	let crc16 = crc32 as u16;
578	header.extend(&crc16.to_le_bytes());
579
580	let mut parser = GzHeaderParser::new();
581	parser.parse(&mut header.as_slice()).unwrap();
582	let actual = parser.header().unwrap();
583	assert_eq!(
584	actual,
585	&GzHeader {
586	extra: None,
587	filename: Some("filename".as_bytes().to_vec()),
588	comment: Some("comment".as_bytes().to_vec()),
589	operating_system: `57`,
590	mtime: `1234`
591	}
592	)
593	}
594
595	#[test]
596	fn fields() {
597	let r = vec![`0`, `2`, `4`, `6`];
598	let e = GzBuilder::new()
599	.filename("foo.rs")
600	.comment("bar")
601	.extra(vec![`0`, `1`, `2`, `3`])
602	.read(&r[..], Compression::default());
603	let mut d = read::GzDecoder::new(e);
604	assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
605	assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
606	assert_eq!(d.header().unwrap().extra(), Some(&b"`\x00\x01\x02\x03`"[..]));
607	let mut res = Vec::new();
608	d.read_to_end(&mut res).unwrap();
609	assert_eq!(res, vec![`0`, `2`, `4`, `6`]);
610	}
611
612	#[test]
613	fn keep_reading_after_end() {
614	let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
615	e.write_all(b"foo bar baz").unwrap();
616	let inner = e.finish().unwrap();
617	let mut d = read::GzDecoder::new(&inner[..]);
618	let mut s = String::new();
619	d.read_to_string(&mut s).unwrap();
620	assert_eq!(s, "foo bar baz");
621	d.read_to_string(&mut s).unwrap();
622	assert_eq!(s, "foo bar baz");
623	}
624
625	#[test]
626	fn qc_reader() {
627	::quickcheck::quickcheck(test as fn(_) -> _);
628
629	fn test(v: Vec<u8>) -> bool {
630	let r = read::GzEncoder::new(&v[..], Compression::default());
631	let mut r = read::GzDecoder::new(r);
632	let mut v2 = Vec::new();
633	r.read_to_end(&mut v2).unwrap();
634	v == v2
635	}
636	}
637
638	#[test]
639	fn flush_after_write() {
640	let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
641	write!(f, "Hello world").unwrap();
642	f.flush().unwrap();
643	}
644	}
645