1 | use std::ffi::CString; |
2 | use std::io::prelude::*; |
3 | use std::time; |
4 | |
5 | use crate::bufreader::BufReader; |
6 | use crate::Compression; |
7 | |
8 | pub static FHCRC: u8 = 1 << 1; |
9 | pub static FEXTRA: u8 = 1 << 2; |
10 | pub static FNAME: u8 = 1 << 3; |
11 | pub static FCOMMENT: u8 = 1 << 4; |
12 | |
13 | pub mod bufread; |
14 | pub mod read; |
15 | pub mod write; |
16 | |
17 | /// A structure representing the header of a gzip stream. |
18 | /// |
19 | /// The header can contain metadata about the file that was compressed, if |
20 | /// present. |
21 | #[derive (PartialEq, Clone, Debug, Default)] |
22 | pub struct GzHeader { |
23 | extra: Option<Vec<u8>>, |
24 | filename: Option<Vec<u8>>, |
25 | comment: Option<Vec<u8>>, |
26 | operating_system: u8, |
27 | mtime: u32, |
28 | } |
29 | |
30 | impl GzHeader { |
31 | /// Returns the `filename` field of this gzip stream's header, if present. |
32 | pub fn filename(&self) -> Option<&[u8]> { |
33 | self.filename.as_ref().map(|s| &s[..]) |
34 | } |
35 | |
36 | /// Returns the `extra` field of this gzip stream's header, if present. |
37 | pub fn extra(&self) -> Option<&[u8]> { |
38 | self.extra.as_ref().map(|s| &s[..]) |
39 | } |
40 | |
41 | /// Returns the `comment` field of this gzip stream's header, if present. |
42 | pub fn comment(&self) -> Option<&[u8]> { |
43 | self.comment.as_ref().map(|s| &s[..]) |
44 | } |
45 | |
46 | /// Returns the `operating_system` field of this gzip stream's header. |
47 | /// |
48 | /// There are predefined values for various operating systems. |
49 | /// 255 means that the value is unknown. |
50 | pub fn operating_system(&self) -> u8 { |
51 | self.operating_system |
52 | } |
53 | |
54 | /// This gives the most recent modification time of the original file being compressed. |
55 | /// |
56 | /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970. |
57 | /// (Note that this may cause problems for MS-DOS and other systems that use local |
58 | /// rather than Universal time.) If the compressed data did not come from a file, |
59 | /// `mtime` is set to the time at which compression started. |
60 | /// `mtime` = 0 means no time stamp is available. |
61 | /// |
62 | /// The usage of `mtime` is discouraged because of Year 2038 problem. |
63 | pub fn mtime(&self) -> u32 { |
64 | self.mtime |
65 | } |
66 | |
67 | /// Returns the most recent modification time represented by a date-time type. |
68 | /// Returns `None` if the value of the underlying counter is 0, |
69 | /// indicating no time stamp is available. |
70 | /// |
71 | /// |
72 | /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970. |
73 | /// See [`mtime`](#method.mtime) for more detail. |
74 | pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> { |
75 | if self.mtime == 0 { |
76 | None |
77 | } else { |
78 | let duration = time::Duration::new(u64::from(self.mtime), 0); |
79 | let datetime = time::UNIX_EPOCH + duration; |
80 | Some(datetime) |
81 | } |
82 | } |
83 | } |
84 | |
85 | /// A builder structure to create a new gzip Encoder. |
86 | /// |
87 | /// This structure controls header configuration options such as the filename. |
88 | /// |
89 | /// # Examples |
90 | /// |
91 | /// ``` |
92 | /// use std::io::prelude::*; |
93 | /// # use std::io; |
94 | /// use std::fs::File; |
95 | /// use flate2::GzBuilder; |
96 | /// use flate2::Compression; |
97 | /// |
98 | /// // GzBuilder opens a file and writes a sample string using GzBuilder pattern |
99 | /// |
100 | /// # fn sample_builder() -> Result<(), io::Error> { |
101 | /// let f = File::create("examples/hello_world.gz" )?; |
102 | /// let mut gz = GzBuilder::new() |
103 | /// .filename("hello_world.txt" ) |
104 | /// .comment("test file, please delete" ) |
105 | /// .write(f, Compression::default()); |
106 | /// gz.write_all(b"hello world" )?; |
107 | /// gz.finish()?; |
108 | /// # Ok(()) |
109 | /// # } |
110 | /// ``` |
111 | #[derive (Debug)] |
112 | pub struct GzBuilder { |
113 | extra: Option<Vec<u8>>, |
114 | filename: Option<CString>, |
115 | comment: Option<CString>, |
116 | operating_system: Option<u8>, |
117 | mtime: u32, |
118 | } |
119 | |
120 | impl Default for GzBuilder { |
121 | fn default() -> Self { |
122 | Self::new() |
123 | } |
124 | } |
125 | |
126 | impl GzBuilder { |
127 | /// Create a new blank builder with no header by default. |
128 | pub fn new() -> GzBuilder { |
129 | GzBuilder { |
130 | extra: None, |
131 | filename: None, |
132 | comment: None, |
133 | operating_system: None, |
134 | mtime: 0, |
135 | } |
136 | } |
137 | |
138 | /// Configure the `mtime` field in the gzip header. |
139 | pub fn mtime(mut self, mtime: u32) -> GzBuilder { |
140 | self.mtime = mtime; |
141 | self |
142 | } |
143 | |
144 | /// Configure the `operating_system` field in the gzip header. |
145 | pub fn operating_system(mut self, os: u8) -> GzBuilder { |
146 | self.operating_system = Some(os); |
147 | self |
148 | } |
149 | |
150 | /// Configure the `extra` field in the gzip header. |
151 | pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder { |
152 | self.extra = Some(extra.into()); |
153 | self |
154 | } |
155 | |
156 | /// Configure the `filename` field in the gzip header. |
157 | /// |
158 | /// # Panics |
159 | /// |
160 | /// Panics if the `filename` slice contains a zero. |
161 | pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder { |
162 | self.filename = Some(CString::new(filename.into()).unwrap()); |
163 | self |
164 | } |
165 | |
166 | /// Configure the `comment` field in the gzip header. |
167 | /// |
168 | /// # Panics |
169 | /// |
170 | /// Panics if the `comment` slice contains a zero. |
171 | pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder { |
172 | self.comment = Some(CString::new(comment.into()).unwrap()); |
173 | self |
174 | } |
175 | |
176 | /// Consume this builder, creating a writer encoder in the process. |
177 | /// |
178 | /// The data written to the returned encoder will be compressed and then |
179 | /// written out to the supplied parameter `w`. |
180 | pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> { |
181 | write::gz_encoder(self.into_header(lvl), w, lvl) |
182 | } |
183 | |
184 | /// Consume this builder, creating a reader encoder in the process. |
185 | /// |
186 | /// Data read from the returned encoder will be the compressed version of |
187 | /// the data read from the given reader. |
188 | pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> { |
189 | read::gz_encoder(self.buf_read(BufReader::new(r), lvl)) |
190 | } |
191 | |
192 | /// Consume this builder, creating a reader encoder in the process. |
193 | /// |
194 | /// Data read from the returned encoder will be the compressed version of |
195 | /// the data read from the given reader. |
196 | pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R> |
197 | where |
198 | R: BufRead, |
199 | { |
200 | bufread::gz_encoder(self.into_header(lvl), r, lvl) |
201 | } |
202 | |
203 | fn into_header(self, lvl: Compression) -> Vec<u8> { |
204 | let GzBuilder { |
205 | extra, |
206 | filename, |
207 | comment, |
208 | operating_system, |
209 | mtime, |
210 | } = self; |
211 | let mut flg = 0; |
212 | let mut header = vec![0u8; 10]; |
213 | if let Some(v) = extra { |
214 | flg |= FEXTRA; |
215 | header.push((v.len() >> 0) as u8); |
216 | header.push((v.len() >> 8) as u8); |
217 | header.extend(v); |
218 | } |
219 | if let Some(filename) = filename { |
220 | flg |= FNAME; |
221 | header.extend(filename.as_bytes_with_nul().iter().copied()); |
222 | } |
223 | if let Some(comment) = comment { |
224 | flg |= FCOMMENT; |
225 | header.extend(comment.as_bytes_with_nul().iter().copied()); |
226 | } |
227 | header[0] = 0x1f; |
228 | header[1] = 0x8b; |
229 | header[2] = 8; |
230 | header[3] = flg; |
231 | header[4] = (mtime >> 0) as u8; |
232 | header[5] = (mtime >> 8) as u8; |
233 | header[6] = (mtime >> 16) as u8; |
234 | header[7] = (mtime >> 24) as u8; |
235 | header[8] = if lvl.0 >= Compression::best().0 { |
236 | 2 |
237 | } else if lvl.0 <= Compression::fast().0 { |
238 | 4 |
239 | } else { |
240 | 0 |
241 | }; |
242 | |
243 | // Typically this byte indicates what OS the gz stream was created on, |
244 | // but in an effort to have cross-platform reproducible streams just |
245 | // default this value to 255. I'm not sure that if we "correctly" set |
246 | // this it'd do anything anyway... |
247 | header[9] = operating_system.unwrap_or(255); |
248 | header |
249 | } |
250 | } |
251 | |
252 | #[cfg (test)] |
253 | mod tests { |
254 | use std::io::prelude::*; |
255 | |
256 | use super::{read, write, GzBuilder}; |
257 | use crate::Compression; |
258 | use rand::{thread_rng, Rng}; |
259 | |
260 | #[test ] |
261 | fn roundtrip() { |
262 | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
263 | e.write_all(b"foo bar baz" ).unwrap(); |
264 | let inner = e.finish().unwrap(); |
265 | let mut d = read::GzDecoder::new(&inner[..]); |
266 | let mut s = String::new(); |
267 | d.read_to_string(&mut s).unwrap(); |
268 | assert_eq!(s, "foo bar baz" ); |
269 | } |
270 | |
271 | #[test ] |
272 | fn roundtrip_zero() { |
273 | let e = write::GzEncoder::new(Vec::new(), Compression::default()); |
274 | let inner = e.finish().unwrap(); |
275 | let mut d = read::GzDecoder::new(&inner[..]); |
276 | let mut s = String::new(); |
277 | d.read_to_string(&mut s).unwrap(); |
278 | assert_eq!(s, "" ); |
279 | } |
280 | |
281 | #[test ] |
282 | fn roundtrip_big() { |
283 | let mut real = Vec::new(); |
284 | let mut w = write::GzEncoder::new(Vec::new(), Compression::default()); |
285 | let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); |
286 | for _ in 0..200 { |
287 | let to_write = &v[..thread_rng().gen_range(0..v.len())]; |
288 | real.extend(to_write.iter().copied()); |
289 | w.write_all(to_write).unwrap(); |
290 | } |
291 | let result = w.finish().unwrap(); |
292 | let mut r = read::GzDecoder::new(&result[..]); |
293 | let mut v = Vec::new(); |
294 | r.read_to_end(&mut v).unwrap(); |
295 | assert_eq!(v, real); |
296 | } |
297 | |
298 | #[test ] |
299 | fn roundtrip_big2() { |
300 | let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>(); |
301 | let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default())); |
302 | let mut res = Vec::new(); |
303 | r.read_to_end(&mut res).unwrap(); |
304 | assert_eq!(res, v); |
305 | } |
306 | |
307 | #[test ] |
308 | fn fields() { |
309 | let r = vec![0, 2, 4, 6]; |
310 | let e = GzBuilder::new() |
311 | .filename("foo.rs" ) |
312 | .comment("bar" ) |
313 | .extra(vec![0, 1, 2, 3]) |
314 | .read(&r[..], Compression::default()); |
315 | let mut d = read::GzDecoder::new(e); |
316 | assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs" [..])); |
317 | assert_eq!(d.header().unwrap().comment(), Some(&b"bar" [..])); |
318 | assert_eq!(d.header().unwrap().extra(), Some(&b" \x00\x01\x02\x03" [..])); |
319 | let mut res = Vec::new(); |
320 | d.read_to_end(&mut res).unwrap(); |
321 | assert_eq!(res, vec![0, 2, 4, 6]); |
322 | } |
323 | |
324 | #[test ] |
325 | fn keep_reading_after_end() { |
326 | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
327 | e.write_all(b"foo bar baz" ).unwrap(); |
328 | let inner = e.finish().unwrap(); |
329 | let mut d = read::GzDecoder::new(&inner[..]); |
330 | let mut s = String::new(); |
331 | d.read_to_string(&mut s).unwrap(); |
332 | assert_eq!(s, "foo bar baz" ); |
333 | d.read_to_string(&mut s).unwrap(); |
334 | assert_eq!(s, "foo bar baz" ); |
335 | } |
336 | |
337 | #[test ] |
338 | fn qc_reader() { |
339 | ::quickcheck::quickcheck(test as fn(_) -> _); |
340 | |
341 | fn test(v: Vec<u8>) -> bool { |
342 | let r = read::GzEncoder::new(&v[..], Compression::default()); |
343 | let mut r = read::GzDecoder::new(r); |
344 | let mut v2 = Vec::new(); |
345 | r.read_to_end(&mut v2).unwrap(); |
346 | v == v2 |
347 | } |
348 | } |
349 | |
350 | #[test ] |
351 | fn flush_after_write() { |
352 | let mut f = write::GzEncoder::new(Vec::new(), Compression::default()); |
353 | write!(f, "Hello world" ).unwrap(); |
354 | f.flush().unwrap(); |
355 | } |
356 | |
357 | use crate::gz::bufread::tests::BlockingCursor; |
358 | #[test ] |
359 | // test function read_and_forget of Buffer |
360 | fn blocked_partial_header_read() { |
361 | // this is a reader which receives data afterwards |
362 | let mut r = BlockingCursor::new(); |
363 | let data = vec![1, 2, 3]; |
364 | |
365 | match r.write_all(&data) { |
366 | Ok(()) => {} |
367 | _ => { |
368 | panic!("Unexpected result for write_all" ); |
369 | } |
370 | } |
371 | r.set_position(0); |
372 | |
373 | // this is unused except for the buffering |
374 | let mut decoder = read::GzDecoder::new(r); |
375 | let mut out = Vec::with_capacity(7); |
376 | match decoder.read(&mut out) { |
377 | Err(e) => { |
378 | assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock); |
379 | } |
380 | _ => { |
381 | panic!("Unexpected result for decoder.read" ); |
382 | } |
383 | } |
384 | } |
385 | } |
386 | |