1use std::ffi::CString;
2use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
3use std::time;
4
5use crate::bufreader::BufReader;
6use crate::{Compression, Crc};
7
8pub static FHCRC: u8 = 1 << 1;
9pub static FEXTRA: u8 = 1 << 2;
10pub static FNAME: u8 = 1 << 3;
11pub static FCOMMENT: u8 = 1 << 4;
12pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
13
14pub mod bufread;
15pub mod read;
16pub mod write;
17
18// The maximum length of the header filename and comment fields. More than
19// enough for these fields in reasonable use, but prevents possible attacks.
20const MAX_HEADER_BUF: usize = 65535;
21
22/// A structure representing the header of a gzip stream.
23///
24/// The header can contain metadata about the file that was compressed, if
25/// present.
26#[derive(PartialEq, Clone, Debug, Default)]
27pub struct GzHeader {
28 extra: Option<Vec<u8>>,
29 filename: Option<Vec<u8>>,
30 comment: Option<Vec<u8>>,
31 operating_system: u8,
32 mtime: u32,
33}
34
35impl GzHeader {
36 /// Returns the `filename` field of this gzip stream's header, if present.
37 pub fn filename(&self) -> Option<&[u8]> {
38 self.filename.as_ref().map(|s| &s[..])
39 }
40
41 /// Returns the `extra` field of this gzip stream's header, if present.
42 pub fn extra(&self) -> Option<&[u8]> {
43 self.extra.as_ref().map(|s| &s[..])
44 }
45
46 /// Returns the `comment` field of this gzip stream's header, if present.
47 pub fn comment(&self) -> Option<&[u8]> {
48 self.comment.as_ref().map(|s| &s[..])
49 }
50
51 /// Returns the `operating_system` field of this gzip stream's header.
52 ///
53 /// There are predefined values for various operating systems.
54 /// 255 means that the value is unknown.
55 pub fn operating_system(&self) -> u8 {
56 self.operating_system
57 }
58
59 /// This gives the most recent modification time of the original file being compressed.
60 ///
61 /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
62 /// (Note that this may cause problems for MS-DOS and other systems that use local
63 /// rather than Universal time.) If the compressed data did not come from a file,
64 /// `mtime` is set to the time at which compression started.
65 /// `mtime` = 0 means no time stamp is available.
66 ///
67 /// The usage of `mtime` is discouraged because of Year 2038 problem.
68 pub fn mtime(&self) -> u32 {
69 self.mtime
70 }
71
72 /// Returns the most recent modification time represented by a date-time type.
73 /// Returns `None` if the value of the underlying counter is 0,
74 /// indicating no time stamp is available.
75 ///
76 ///
77 /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
78 /// See [`mtime`](#method.mtime) for more detail.
79 pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
80 if self.mtime == 0 {
81 None
82 } else {
83 let duration = time::Duration::new(u64::from(self.mtime), 0);
84 let datetime = time::UNIX_EPOCH + duration;
85 Some(datetime)
86 }
87 }
88}
89
90#[derive(Debug)]
91pub enum GzHeaderState {
92 Start(u8, [u8; 10]),
93 Xlen(Option<Box<Crc>>, u8, [u8; 2]),
94 Extra(Option<Box<Crc>>, u16),
95 Filename(Option<Box<Crc>>),
96 Comment(Option<Box<Crc>>),
97 Crc(Option<Box<Crc>>, u8, [u8; 2]),
98 Complete,
99}
100
101impl Default for GzHeaderState {
102 fn default() -> Self {
103 Self::Complete
104 }
105}
106
107#[derive(Debug, Default)]
108pub struct GzHeaderParser {
109 state: GzHeaderState,
110 flags: u8,
111 header: GzHeader,
112}
113
114impl GzHeaderParser {
115 fn new() -> Self {
116 GzHeaderParser {
117 state: GzHeaderState::Start(0, [0; 10]),
118 flags: 0,
119 header: GzHeader::default(),
120 }
121 }
122
123 fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
124 loop {
125 match &mut self.state {
126 GzHeaderState::Start(count, buffer) => {
127 while (*count as usize) < buffer.len() {
128 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
129 }
130 // Gzip identification bytes
131 if buffer[0] != 0x1f || buffer[1] != 0x8b {
132 return Err(bad_header());
133 }
134 // Gzip compression method (8 = deflate)
135 if buffer[2] != 8 {
136 return Err(bad_header());
137 }
138 self.flags = buffer[3];
139 // RFC1952: "must give an error indication if any reserved bit is non-zero"
140 if self.flags & FRESERVED != 0 {
141 return Err(bad_header());
142 }
143 self.header.mtime = ((buffer[4] as u32) << 0)
144 | ((buffer[5] as u32) << 8)
145 | ((buffer[6] as u32) << 16)
146 | ((buffer[7] as u32) << 24);
147 let _xfl = buffer[8];
148 self.header.operating_system = buffer[9];
149 let crc = if self.flags & FHCRC != 0 {
150 let mut crc = Box::new(Crc::new());
151 crc.update(buffer);
152 Some(crc)
153 } else {
154 None
155 };
156 self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
157 }
158 GzHeaderState::Xlen(crc, count, buffer) => {
159 if self.flags & FEXTRA != 0 {
160 while (*count as usize) < buffer.len() {
161 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
162 }
163 if let Some(crc) = crc {
164 crc.update(buffer);
165 }
166 let xlen = parse_le_u16(&buffer);
167 self.header.extra = Some(vec![0; xlen as usize]);
168 self.state = GzHeaderState::Extra(crc.take(), 0);
169 } else {
170 self.state = GzHeaderState::Filename(crc.take());
171 }
172 }
173 GzHeaderState::Extra(crc, count) => {
174 debug_assert!(self.header.extra.is_some());
175 let extra = self.header.extra.as_mut().unwrap();
176 while (*count as usize) < extra.len() {
177 *count += read_into(r, &mut extra[*count as usize..])? as u16;
178 }
179 if let Some(crc) = crc {
180 crc.update(extra);
181 }
182 self.state = GzHeaderState::Filename(crc.take());
183 }
184 GzHeaderState::Filename(crc) => {
185 if self.flags & FNAME != 0 {
186 let filename = self.header.filename.get_or_insert_with(Vec::new);
187 read_to_nul(r, filename)?;
188 if let Some(crc) = crc {
189 crc.update(filename);
190 crc.update(b"\0");
191 }
192 }
193 self.state = GzHeaderState::Comment(crc.take());
194 }
195 GzHeaderState::Comment(crc) => {
196 if self.flags & FCOMMENT != 0 {
197 let comment = self.header.comment.get_or_insert_with(Vec::new);
198 read_to_nul(r, comment)?;
199 if let Some(crc) = crc {
200 crc.update(comment);
201 crc.update(b"\0");
202 }
203 }
204 self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
205 }
206 GzHeaderState::Crc(crc, count, buffer) => {
207 if let Some(crc) = crc {
208 debug_assert!(self.flags & FHCRC != 0);
209 while (*count as usize) < buffer.len() {
210 *count += read_into(r, &mut buffer[*count as usize..])? as u8;
211 }
212 let stored_crc = parse_le_u16(&buffer);
213 let calced_crc = crc.sum() as u16;
214 if stored_crc != calced_crc {
215 return Err(corrupt());
216 }
217 }
218 self.state = GzHeaderState::Complete;
219 }
220 GzHeaderState::Complete => {
221 return Ok(());
222 }
223 }
224 }
225 }
226
227 fn header(&self) -> Option<&GzHeader> {
228 match self.state {
229 GzHeaderState::Complete => Some(&self.header),
230 _ => None,
231 }
232 }
233}
234
235impl From<GzHeaderParser> for GzHeader {
236 fn from(parser: GzHeaderParser) -> Self {
237 debug_assert!(matches!(parser.state, GzHeaderState::Complete));
238 parser.header
239 }
240}
241
242// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
243// Return an error if EOF is read before the buffer is full. This differs
244// from `read` in that Ok(0) means that more data may be available.
245fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
246 debug_assert!(!buffer.is_empty());
247 match r.read(buf:buffer) {
248 Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
249 Ok(n: usize) => Ok(n),
250 Err(ref e: &Error) if e.kind() == ErrorKind::Interrupted => Ok(0),
251 Err(e: Error) => Err(e),
252 }
253}
254
255// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
256fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
257 let mut bytes: Bytes<&mut R> = r.bytes();
258 loop {
259 match bytes.next().transpose()? {
260 Some(byte: u8) if byte == 0 => {
261 return Ok(());
262 }
263 Some(_) if buffer.len() == MAX_HEADER_BUF => {
264 return Err(Error::new(
265 kind:ErrorKind::InvalidInput,
266 error:"gzip header field too long",
267 ));
268 }
269 Some(byte: u8) => {
270 buffer.push(byte);
271 }
272 None => {
273 return Err(ErrorKind::UnexpectedEof.into());
274 }
275 }
276 }
277}
278
279fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
280 (buffer[0] as u16) | ((buffer[1] as u16) << 8)
281}
282
283fn bad_header() -> Error {
284 Error::new(kind:ErrorKind::InvalidInput, error:"invalid gzip header")
285}
286
287fn corrupt() -> Error {
288 Error::new(
289 kind:ErrorKind::InvalidInput,
290 error:"corrupt gzip stream does not have a matching checksum",
291 )
292}
293
294/// A builder structure to create a new gzip Encoder.
295///
296/// This structure controls header configuration options such as the filename.
297///
298/// # Examples
299///
300/// ```
301/// use std::io::prelude::*;
302/// # use std::io;
303/// use std::fs::File;
304/// use flate2::GzBuilder;
305/// use flate2::Compression;
306///
307/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
308///
309/// # fn sample_builder() -> Result<(), io::Error> {
310/// let f = File::create("examples/hello_world.gz")?;
311/// let mut gz = GzBuilder::new()
312/// .filename("hello_world.txt")
313/// .comment("test file, please delete")
314/// .write(f, Compression::default());
315/// gz.write_all(b"hello world")?;
316/// gz.finish()?;
317/// # Ok(())
318/// # }
319/// ```
320#[derive(Debug)]
321pub struct GzBuilder {
322 extra: Option<Vec<u8>>,
323 filename: Option<CString>,
324 comment: Option<CString>,
325 operating_system: Option<u8>,
326 mtime: u32,
327}
328
329impl Default for GzBuilder {
330 fn default() -> Self {
331 Self::new()
332 }
333}
334
335impl GzBuilder {
336 /// Create a new blank builder with no header by default.
337 pub fn new() -> GzBuilder {
338 GzBuilder {
339 extra: None,
340 filename: None,
341 comment: None,
342 operating_system: None,
343 mtime: 0,
344 }
345 }
346
347 /// Configure the `mtime` field in the gzip header.
348 pub fn mtime(mut self, mtime: u32) -> GzBuilder {
349 self.mtime = mtime;
350 self
351 }
352
353 /// Configure the `operating_system` field in the gzip header.
354 pub fn operating_system(mut self, os: u8) -> GzBuilder {
355 self.operating_system = Some(os);
356 self
357 }
358
359 /// Configure the `extra` field in the gzip header.
360 pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
361 self.extra = Some(extra.into());
362 self
363 }
364
365 /// Configure the `filename` field in the gzip header.
366 ///
367 /// # Panics
368 ///
369 /// Panics if the `filename` slice contains a zero.
370 pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
371 self.filename = Some(CString::new(filename.into()).unwrap());
372 self
373 }
374
375 /// Configure the `comment` field in the gzip header.
376 ///
377 /// # Panics
378 ///
379 /// Panics if the `comment` slice contains a zero.
380 pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
381 self.comment = Some(CString::new(comment.into()).unwrap());
382 self
383 }
384
385 /// Consume this builder, creating a writer encoder in the process.
386 ///
387 /// The data written to the returned encoder will be compressed and then
388 /// written out to the supplied parameter `w`.
389 pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
390 write::gz_encoder(self.into_header(lvl), w, lvl)
391 }
392
393 /// Consume this builder, creating a reader encoder in the process.
394 ///
395 /// Data read from the returned encoder will be the compressed version of
396 /// the data read from the given reader.
397 pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
398 read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
399 }
400
401 /// Consume this builder, creating a reader encoder in the process.
402 ///
403 /// Data read from the returned encoder will be the compressed version of
404 /// the data read from the given reader.
405 pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
406 where
407 R: BufRead,
408 {
409 bufread::gz_encoder(self.into_header(lvl), r, lvl)
410 }
411
412 fn into_header(self, lvl: Compression) -> Vec<u8> {
413 let GzBuilder {
414 extra,
415 filename,
416 comment,
417 operating_system,
418 mtime,
419 } = self;
420 let mut flg = 0;
421 let mut header = vec![0u8; 10];
422 if let Some(v) = extra {
423 flg |= FEXTRA;
424 header.push((v.len() >> 0) as u8);
425 header.push((v.len() >> 8) as u8);
426 header.extend(v);
427 }
428 if let Some(filename) = filename {
429 flg |= FNAME;
430 header.extend(filename.as_bytes_with_nul().iter().copied());
431 }
432 if let Some(comment) = comment {
433 flg |= FCOMMENT;
434 header.extend(comment.as_bytes_with_nul().iter().copied());
435 }
436 header[0] = 0x1f;
437 header[1] = 0x8b;
438 header[2] = 8;
439 header[3] = flg;
440 header[4] = (mtime >> 0) as u8;
441 header[5] = (mtime >> 8) as u8;
442 header[6] = (mtime >> 16) as u8;
443 header[7] = (mtime >> 24) as u8;
444 header[8] = if lvl.0 >= Compression::best().0 {
445 2
446 } else if lvl.0 <= Compression::fast().0 {
447 4
448 } else {
449 0
450 };
451
452 // Typically this byte indicates what OS the gz stream was created on,
453 // but in an effort to have cross-platform reproducible streams just
454 // default this value to 255. I'm not sure that if we "correctly" set
455 // this it'd do anything anyway...
456 header[9] = operating_system.unwrap_or(255);
457 header
458 }
459}
460
461#[cfg(test)]
462mod tests {
463 use std::io::prelude::*;
464
465 use super::{read, write, GzBuilder, GzHeaderParser};
466 use crate::{Compression, GzHeader};
467 use rand::{thread_rng, Rng};
468
469 #[test]
470 fn roundtrip() {
471 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
472 e.write_all(b"foo bar baz").unwrap();
473 let inner = e.finish().unwrap();
474 let mut d = read::GzDecoder::new(&inner[..]);
475 let mut s = String::new();
476 d.read_to_string(&mut s).unwrap();
477 assert_eq!(s, "foo bar baz");
478 }
479
480 #[test]
481 fn roundtrip_zero() {
482 let e = write::GzEncoder::new(Vec::new(), Compression::default());
483 let inner = e.finish().unwrap();
484 let mut d = read::GzDecoder::new(&inner[..]);
485 let mut s = String::new();
486 d.read_to_string(&mut s).unwrap();
487 assert_eq!(s, "");
488 }
489
490 #[test]
491 fn roundtrip_big() {
492 let mut real = Vec::new();
493 let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
494 let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
495 for _ in 0..200 {
496 let to_write = &v[..thread_rng().gen_range(0..v.len())];
497 real.extend(to_write.iter().copied());
498 w.write_all(to_write).unwrap();
499 }
500 let result = w.finish().unwrap();
501 let mut r = read::GzDecoder::new(&result[..]);
502 let mut v = Vec::new();
503 r.read_to_end(&mut v).unwrap();
504 assert_eq!(v, real);
505 }
506
507 #[test]
508 fn roundtrip_big2() {
509 let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
510 let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
511 let mut res = Vec::new();
512 r.read_to_end(&mut res).unwrap();
513 assert_eq!(res, v);
514 }
515
516 // A Rust implementation of CRC that closely matches the C code in RFC1952.
517 // Only use this to create CRCs for tests.
518 struct Rfc1952Crc {
519 /* Table of CRCs of all 8-bit messages. */
520 crc_table: [u32; 256],
521 }
522
523 impl Rfc1952Crc {
524 fn new() -> Self {
525 let mut crc = Rfc1952Crc {
526 crc_table: [0; 256],
527 };
528 /* Make the table for a fast CRC. */
529 for n in 0usize..256 {
530 let mut c = n as u32;
531 for _k in 0..8 {
532 if c & 1 != 0 {
533 c = 0xedb88320 ^ (c >> 1);
534 } else {
535 c = c >> 1;
536 }
537 }
538 crc.crc_table[n] = c;
539 }
540 crc
541 }
542
543 /*
544 Update a running crc with the bytes buf and return
545 the updated crc. The crc should be initialized to zero. Pre- and
546 post-conditioning (one's complement) is performed within this
547 function so it shouldn't be done by the caller.
548 */
549 fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
550 let mut c = crc ^ 0xffffffff;
551
552 for b in buf {
553 c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
554 }
555 c ^ 0xffffffff
556 }
557
558 /* Return the CRC of the bytes buf. */
559 fn crc(&self, buf: &[u8]) -> u32 {
560 self.update_crc(0, buf)
561 }
562 }
563
564 #[test]
565 fn roundtrip_header() {
566 let mut header = GzBuilder::new()
567 .mtime(1234)
568 .operating_system(57)
569 .filename("filename")
570 .comment("comment")
571 .into_header(Compression::fast());
572
573 // Add a CRC to the header
574 header[3] = header[3] ^ super::FHCRC;
575 let rfc1952_crc = Rfc1952Crc::new();
576 let crc32 = rfc1952_crc.crc(&header);
577 let crc16 = crc32 as u16;
578 header.extend(&crc16.to_le_bytes());
579
580 let mut parser = GzHeaderParser::new();
581 parser.parse(&mut header.as_slice()).unwrap();
582 let actual = parser.header().unwrap();
583 assert_eq!(
584 actual,
585 &GzHeader {
586 extra: None,
587 filename: Some("filename".as_bytes().to_vec()),
588 comment: Some("comment".as_bytes().to_vec()),
589 operating_system: 57,
590 mtime: 1234
591 }
592 )
593 }
594
595 #[test]
596 fn fields() {
597 let r = vec![0, 2, 4, 6];
598 let e = GzBuilder::new()
599 .filename("foo.rs")
600 .comment("bar")
601 .extra(vec![0, 1, 2, 3])
602 .read(&r[..], Compression::default());
603 let mut d = read::GzDecoder::new(e);
604 assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
605 assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
606 assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
607 let mut res = Vec::new();
608 d.read_to_end(&mut res).unwrap();
609 assert_eq!(res, vec![0, 2, 4, 6]);
610 }
611
612 #[test]
613 fn keep_reading_after_end() {
614 let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
615 e.write_all(b"foo bar baz").unwrap();
616 let inner = e.finish().unwrap();
617 let mut d = read::GzDecoder::new(&inner[..]);
618 let mut s = String::new();
619 d.read_to_string(&mut s).unwrap();
620 assert_eq!(s, "foo bar baz");
621 d.read_to_string(&mut s).unwrap();
622 assert_eq!(s, "foo bar baz");
623 }
624
625 #[test]
626 fn qc_reader() {
627 ::quickcheck::quickcheck(test as fn(_) -> _);
628
629 fn test(v: Vec<u8>) -> bool {
630 let r = read::GzEncoder::new(&v[..], Compression::default());
631 let mut r = read::GzDecoder::new(r);
632 let mut v2 = Vec::new();
633 r.read_to_end(&mut v2).unwrap();
634 v == v2
635 }
636 }
637
638 #[test]
639 fn flush_after_write() {
640 let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
641 write!(f, "Hello world").unwrap();
642 f.flush().unwrap();
643 }
644}
645