1 | use std::cmp; |
2 | use std::io; |
3 | use std::io::prelude::*; |
4 | use std::mem; |
5 | |
6 | use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; |
7 | use crate::crc::CrcReader; |
8 | use crate::deflate; |
9 | use crate::Compression; |
10 | |
11 | fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { |
12 | let min: usize = cmp::min(v1:into.len(), v2:from.len() - *pos); |
13 | for (slot: &mut u8, val: &u8) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { |
14 | *slot = *val; |
15 | } |
16 | *pos += min; |
17 | min |
18 | } |
19 | |
20 | /// A gzip streaming encoder |
21 | /// |
22 | /// This structure implements a [`Read`] interface. When read from, it reads |
23 | /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. |
24 | /// |
25 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
26 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
27 | /// |
28 | /// # Examples |
29 | /// |
30 | /// ``` |
31 | /// use std::io::prelude::*; |
32 | /// use std::io; |
33 | /// use flate2::Compression; |
34 | /// use flate2::bufread::GzEncoder; |
35 | /// use std::fs::File; |
36 | /// use std::io::BufReader; |
37 | /// |
38 | /// // Opens sample file, compresses the contents and returns a Vector or error |
39 | /// // File wrapped in a BufReader implements BufRead |
40 | /// |
41 | /// fn open_hello_world() -> io::Result<Vec<u8>> { |
42 | /// let f = File::open("examples/hello_world.txt" )?; |
43 | /// let b = BufReader::new(f); |
44 | /// let mut gz = GzEncoder::new(b, Compression::fast()); |
45 | /// let mut buffer = Vec::new(); |
46 | /// gz.read_to_end(&mut buffer)?; |
47 | /// Ok(buffer) |
48 | /// } |
49 | /// ``` |
50 | #[derive (Debug)] |
51 | pub struct GzEncoder<R> { |
52 | inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, |
53 | header: Vec<u8>, |
54 | pos: usize, |
55 | eof: bool, |
56 | } |
57 | |
58 | pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
59 | let crc: CrcReader = CrcReader::new(r); |
60 | GzEncoder { |
61 | inner: deflate::bufread::DeflateEncoder::new(r:crc, level:lvl), |
62 | header, |
63 | pos: 0, |
64 | eof: false, |
65 | } |
66 | } |
67 | |
68 | impl<R: BufRead> GzEncoder<R> { |
69 | /// Creates a new encoder which will use the given compression level. |
70 | /// |
71 | /// The encoder is not configured specially for the emitted header. For |
72 | /// header configuration, see the `GzBuilder` type. |
73 | /// |
74 | /// The data read from the stream `r` will be compressed and available |
75 | /// through the returned reader. |
76 | pub fn new(r: R, level: Compression) -> GzEncoder<R> { |
77 | GzBuilder::new().buf_read(r, level) |
78 | } |
79 | |
80 | fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { |
81 | if self.pos == 8 { |
82 | return Ok(0); |
83 | } |
84 | let crc = self.inner.get_ref().crc(); |
85 | let ref arr = [ |
86 | (crc.sum() >> 0) as u8, |
87 | (crc.sum() >> 8) as u8, |
88 | (crc.sum() >> 16) as u8, |
89 | (crc.sum() >> 24) as u8, |
90 | (crc.amount() >> 0) as u8, |
91 | (crc.amount() >> 8) as u8, |
92 | (crc.amount() >> 16) as u8, |
93 | (crc.amount() >> 24) as u8, |
94 | ]; |
95 | Ok(copy(into, arr, &mut self.pos)) |
96 | } |
97 | } |
98 | |
99 | impl<R> GzEncoder<R> { |
100 | /// Acquires a reference to the underlying reader. |
101 | pub fn get_ref(&self) -> &R { |
102 | self.inner.get_ref().get_ref() |
103 | } |
104 | |
105 | /// Acquires a mutable reference to the underlying reader. |
106 | /// |
107 | /// Note that mutation of the reader may result in surprising results if |
108 | /// this encoder is continued to be used. |
109 | pub fn get_mut(&mut self) -> &mut R { |
110 | self.inner.get_mut().get_mut() |
111 | } |
112 | |
113 | /// Returns the underlying stream, consuming this encoder |
114 | pub fn into_inner(self) -> R { |
115 | self.inner.into_inner().into_inner() |
116 | } |
117 | } |
118 | |
119 | #[inline ] |
120 | fn finish(buf: &[u8; 8]) -> (u32, u32) { |
121 | let crc: u32 = ((buf[0] as u32) << 0) |
122 | | ((buf[1] as u32) << 8) |
123 | | ((buf[2] as u32) << 16) |
124 | | ((buf[3] as u32) << 24); |
125 | let amt: u32 = ((buf[4] as u32) << 0) |
126 | | ((buf[5] as u32) << 8) |
127 | | ((buf[6] as u32) << 16) |
128 | | ((buf[7] as u32) << 24); |
129 | (crc, amt) |
130 | } |
131 | |
132 | impl<R: BufRead> Read for GzEncoder<R> { |
133 | fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { |
134 | let mut amt: usize = 0; |
135 | if self.eof { |
136 | return self.read_footer(into); |
137 | } else if self.pos < self.header.len() { |
138 | amt += copy(into, &self.header, &mut self.pos); |
139 | if amt == into.len() { |
140 | return Ok(amt); |
141 | } |
142 | let tmp: &mut [u8] = into; |
143 | into = &mut tmp[amt..]; |
144 | } |
145 | match self.inner.read(buf:into)? { |
146 | 0 => { |
147 | self.eof = true; |
148 | self.pos = 0; |
149 | self.read_footer(into) |
150 | } |
151 | n: usize => Ok(amt + n), |
152 | } |
153 | } |
154 | } |
155 | |
156 | impl<R: BufRead + Write> Write for GzEncoder<R> { |
157 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
158 | self.get_mut().write(buf) |
159 | } |
160 | |
161 | fn flush(&mut self) -> io::Result<()> { |
162 | self.get_mut().flush() |
163 | } |
164 | } |
165 | |
166 | /// A decoder for a single member of a [gzip file]. |
167 | /// |
168 | /// This structure implements a [`Read`] interface. When read from, it reads |
169 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
170 | /// |
171 | /// After reading a single member of the gzip data this reader will return |
172 | /// Ok(0) even if there are more bytes available in the underlying reader. |
173 | /// If you need the following bytes, call `into_inner()` after Ok(0) to |
174 | /// recover the underlying reader. |
175 | /// |
176 | /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
177 | /// or read more |
178 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
179 | /// |
180 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
181 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
182 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
183 | /// |
184 | /// # Examples |
185 | /// |
186 | /// ``` |
187 | /// use std::io::prelude::*; |
188 | /// use std::io; |
189 | /// # use flate2::Compression; |
190 | /// # use flate2::write::GzEncoder; |
191 | /// use flate2::bufread::GzDecoder; |
192 | /// |
193 | /// # fn main() { |
194 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
195 | /// # e.write_all(b"Hello World" ).unwrap(); |
196 | /// # let bytes = e.finish().unwrap(); |
197 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
198 | /// # } |
199 | /// # |
200 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
201 | /// // Here &[u8] implements BufRead |
202 | /// |
203 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
204 | /// let mut gz = GzDecoder::new(&bytes[..]); |
205 | /// let mut s = String::new(); |
206 | /// gz.read_to_string(&mut s)?; |
207 | /// Ok(s) |
208 | /// } |
209 | /// ``` |
210 | #[derive (Debug)] |
211 | pub struct GzDecoder<R> { |
212 | state: GzState, |
213 | reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, |
214 | multi: bool, |
215 | } |
216 | |
217 | #[derive (Debug)] |
218 | enum GzState { |
219 | Header(GzHeaderParser), |
220 | Body(GzHeader), |
221 | Finished(GzHeader, usize, [u8; 8]), |
222 | Err(io::Error), |
223 | End(Option<GzHeader>), |
224 | } |
225 | |
226 | impl<R: BufRead> GzDecoder<R> { |
227 | /// Creates a new decoder from the given reader, immediately parsing the |
228 | /// gzip header. |
229 | pub fn new(mut r: R) -> GzDecoder<R> { |
230 | let mut header_parser = GzHeaderParser::new(); |
231 | |
232 | let state = match header_parser.parse(&mut r) { |
233 | Ok(_) => GzState::Body(GzHeader::from(header_parser)), |
234 | Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { |
235 | GzState::Header(header_parser) |
236 | } |
237 | Err(err) => GzState::Err(err), |
238 | }; |
239 | |
240 | GzDecoder { |
241 | state, |
242 | reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), |
243 | multi: false, |
244 | } |
245 | } |
246 | |
247 | fn multi(mut self, flag: bool) -> GzDecoder<R> { |
248 | self.multi = flag; |
249 | self |
250 | } |
251 | } |
252 | |
253 | impl<R> GzDecoder<R> { |
254 | /// Returns the header associated with this stream, if it was valid |
255 | pub fn header(&self) -> Option<&GzHeader> { |
256 | match &self.state { |
257 | GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), |
258 | GzState::End(header) => header.as_ref(), |
259 | _ => None, |
260 | } |
261 | } |
262 | |
263 | /// Acquires a reference to the underlying reader. |
264 | pub fn get_ref(&self) -> &R { |
265 | self.reader.get_ref().get_ref() |
266 | } |
267 | |
268 | /// Acquires a mutable reference to the underlying stream. |
269 | /// |
270 | /// Note that mutation of the stream may result in surprising results if |
271 | /// this decoder is continued to be used. |
272 | pub fn get_mut(&mut self) -> &mut R { |
273 | self.reader.get_mut().get_mut() |
274 | } |
275 | |
276 | /// Consumes this decoder, returning the underlying reader. |
277 | pub fn into_inner(self) -> R { |
278 | self.reader.into_inner().into_inner() |
279 | } |
280 | } |
281 | |
282 | impl<R: BufRead> Read for GzDecoder<R> { |
283 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
284 | loop { |
285 | match &mut self.state { |
286 | GzState::Header(parser) => { |
287 | parser.parse(self.reader.get_mut().get_mut())?; |
288 | self.state = GzState::Body(GzHeader::from(mem::take(parser))); |
289 | } |
290 | GzState::Body(header) => { |
291 | if into.is_empty() { |
292 | return Ok(0); |
293 | } |
294 | match self.reader.read(into)? { |
295 | 0 => { |
296 | self.state = GzState::Finished(mem::take(header), 0, [0; 8]); |
297 | } |
298 | n => { |
299 | return Ok(n); |
300 | } |
301 | } |
302 | } |
303 | GzState::Finished(header, pos, buf) => { |
304 | if *pos < buf.len() { |
305 | *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; |
306 | } else { |
307 | let (crc, amt) = finish(&buf); |
308 | |
309 | if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { |
310 | self.state = GzState::End(Some(mem::take(header))); |
311 | return Err(corrupt()); |
312 | } else if self.multi { |
313 | let is_eof = self |
314 | .reader |
315 | .get_mut() |
316 | .get_mut() |
317 | .fill_buf() |
318 | .map(|buf| buf.is_empty())?; |
319 | |
320 | if is_eof { |
321 | self.state = GzState::End(Some(mem::take(header))); |
322 | } else { |
323 | self.reader.reset(); |
324 | self.reader.get_mut().reset_data(); |
325 | self.state = GzState::Header(GzHeaderParser::new()) |
326 | } |
327 | } else { |
328 | self.state = GzState::End(Some(mem::take(header))); |
329 | } |
330 | } |
331 | } |
332 | GzState::Err(err) => { |
333 | let result = Err(mem::replace(err, io::ErrorKind::Other.into())); |
334 | self.state = GzState::End(None); |
335 | return result; |
336 | } |
337 | GzState::End(_) => return Ok(0), |
338 | } |
339 | } |
340 | } |
341 | } |
342 | |
343 | impl<R: BufRead + Write> Write for GzDecoder<R> { |
344 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
345 | self.get_mut().write(buf) |
346 | } |
347 | |
348 | fn flush(&mut self) -> io::Result<()> { |
349 | self.get_mut().flush() |
350 | } |
351 | } |
352 | |
353 | /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. |
354 | /// |
355 | /// This structure implements a [`Read`] interface. When read from, it reads |
356 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
357 | /// |
358 | /// A gzip file consists of a series of *members* concatenated one after another. |
359 | /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the |
360 | /// underlying reader does. For a file, this reads to the end of the file. |
361 | /// |
362 | /// To handle members seperately, see [GzDecoder] or read more |
363 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
364 | /// |
365 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
366 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
367 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
368 | /// |
369 | /// # Examples |
370 | /// |
371 | /// ``` |
372 | /// use std::io::prelude::*; |
373 | /// use std::io; |
374 | /// # use flate2::Compression; |
375 | /// # use flate2::write::GzEncoder; |
376 | /// use flate2::bufread::MultiGzDecoder; |
377 | /// |
378 | /// # fn main() { |
379 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
380 | /// # e.write_all(b"Hello World" ).unwrap(); |
381 | /// # let bytes = e.finish().unwrap(); |
382 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
383 | /// # } |
384 | /// # |
385 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
386 | /// // Here &[u8] implements BufRead |
387 | /// |
388 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
389 | /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
390 | /// let mut s = String::new(); |
391 | /// gz.read_to_string(&mut s)?; |
392 | /// Ok(s) |
393 | /// } |
394 | /// ``` |
395 | #[derive (Debug)] |
396 | pub struct MultiGzDecoder<R>(GzDecoder<R>); |
397 | |
398 | impl<R: BufRead> MultiGzDecoder<R> { |
399 | /// Creates a new decoder from the given reader, immediately parsing the |
400 | /// (first) gzip header. If the gzip stream contains multiple members all will |
401 | /// be decoded. |
402 | pub fn new(r: R) -> MultiGzDecoder<R> { |
403 | MultiGzDecoder(GzDecoder::new(r).multi(flag:true)) |
404 | } |
405 | } |
406 | |
407 | impl<R> MultiGzDecoder<R> { |
408 | /// Returns the current header associated with this stream, if it's valid |
409 | pub fn header(&self) -> Option<&GzHeader> { |
410 | self.0.header() |
411 | } |
412 | |
413 | /// Acquires a reference to the underlying reader. |
414 | pub fn get_ref(&self) -> &R { |
415 | self.0.get_ref() |
416 | } |
417 | |
418 | /// Acquires a mutable reference to the underlying stream. |
419 | /// |
420 | /// Note that mutation of the stream may result in surprising results if |
421 | /// this decoder is continued to be used. |
422 | pub fn get_mut(&mut self) -> &mut R { |
423 | self.0.get_mut() |
424 | } |
425 | |
426 | /// Consumes this decoder, returning the underlying reader. |
427 | pub fn into_inner(self) -> R { |
428 | self.0.into_inner() |
429 | } |
430 | } |
431 | |
432 | impl<R: BufRead> Read for MultiGzDecoder<R> { |
433 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
434 | self.0.read(buf:into) |
435 | } |
436 | } |
437 | |
438 | #[cfg (test)] |
439 | mod test { |
440 | use crate::bufread::GzDecoder; |
441 | use crate::gz::write; |
442 | use crate::Compression; |
443 | use std::io::{Read, Write}; |
444 | |
445 | // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any |
446 | // additional data to be consumed by the caller. |
447 | #[test ] |
448 | fn decode_extra_data() { |
449 | let expected = "Hello World" ; |
450 | |
451 | let compressed = { |
452 | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
453 | e.write(expected.as_ref()).unwrap(); |
454 | let mut b = e.finish().unwrap(); |
455 | b.push(b'x' ); |
456 | b |
457 | }; |
458 | |
459 | let mut output = Vec::new(); |
460 | let mut decoder = GzDecoder::new(compressed.as_slice()); |
461 | let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); |
462 | assert_eq!(decoded_bytes, output.len()); |
463 | let actual = std::str::from_utf8(&output).expect("String parsing error" ); |
464 | assert_eq!( |
465 | actual, expected, |
466 | "after decompression we obtain the original input" |
467 | ); |
468 | |
469 | output.clear(); |
470 | assert_eq!( |
471 | decoder.read(&mut output).unwrap(), |
472 | 0, |
473 | "subsequent read of decoder returns 0, but inner reader can return additional data" |
474 | ); |
475 | let mut reader = decoder.into_inner(); |
476 | assert_eq!( |
477 | reader.read_to_end(&mut output).unwrap(), |
478 | 1, |
479 | "extra data is accessible in underlying buf-read" |
480 | ); |
481 | assert_eq!(output, b"x" ); |
482 | } |
483 | } |
484 | |