1 | use std::cmp; |
2 | use std::io; |
3 | use std::io::prelude::*; |
4 | use std::mem; |
5 | |
6 | use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; |
7 | use crate::crc::CrcReader; |
8 | use crate::deflate; |
9 | use crate::Compression; |
10 | |
11 | fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { |
12 | let min: usize = cmp::min(v1:into.len(), v2:from.len() - *pos); |
13 | into[..min].copy_from_slice(&from[*pos..*pos + min]); |
14 | *pos += min; |
15 | min |
16 | } |
17 | |
18 | /// A gzip streaming encoder |
19 | /// |
20 | /// This structure implements a [`Read`] interface. When read from, it reads |
21 | /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. |
22 | /// |
23 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
24 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
25 | /// |
26 | /// # Examples |
27 | /// |
28 | /// ``` |
29 | /// use std::io::prelude::*; |
30 | /// use std::io; |
31 | /// use flate2::Compression; |
32 | /// use flate2::bufread::GzEncoder; |
33 | /// use std::fs::File; |
34 | /// use std::io::BufReader; |
35 | /// |
36 | /// // Opens sample file, compresses the contents and returns a Vector or error |
37 | /// // File wrapped in a BufReader implements BufRead |
38 | /// |
39 | /// fn open_hello_world() -> io::Result<Vec<u8>> { |
40 | /// let f = File::open("examples/hello_world.txt" )?; |
41 | /// let b = BufReader::new(f); |
42 | /// let mut gz = GzEncoder::new(b, Compression::fast()); |
43 | /// let mut buffer = Vec::new(); |
44 | /// gz.read_to_end(&mut buffer)?; |
45 | /// Ok(buffer) |
46 | /// } |
47 | /// ``` |
48 | #[derive (Debug)] |
49 | pub struct GzEncoder<R> { |
50 | inner: deflate::bufread::DeflateEncoder<CrcReader<R>>, |
51 | header: Vec<u8>, |
52 | pos: usize, |
53 | eof: bool, |
54 | } |
55 | |
56 | pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> { |
57 | let crc: CrcReader = CrcReader::new(r); |
58 | GzEncoder { |
59 | inner: deflate::bufread::DeflateEncoder::new(r:crc, level:lvl), |
60 | header, |
61 | pos: 0, |
62 | eof: false, |
63 | } |
64 | } |
65 | |
66 | impl<R: BufRead> GzEncoder<R> { |
67 | /// Creates a new encoder which will use the given compression level. |
68 | /// |
69 | /// The encoder is not configured specially for the emitted header. For |
70 | /// header configuration, see the `GzBuilder` type. |
71 | /// |
72 | /// The data read from the stream `r` will be compressed and available |
73 | /// through the returned reader. |
74 | pub fn new(r: R, level: Compression) -> GzEncoder<R> { |
75 | GzBuilder::new().buf_read(r, level) |
76 | } |
77 | |
78 | fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> { |
79 | if self.pos == 8 { |
80 | return Ok(0); |
81 | } |
82 | let crc = self.inner.get_ref().crc(); |
83 | let calced_crc_bytes = crc.sum().to_le_bytes(); |
84 | let arr = [ |
85 | calced_crc_bytes[0], |
86 | calced_crc_bytes[1], |
87 | calced_crc_bytes[2], |
88 | calced_crc_bytes[3], |
89 | (crc.amount() >> 0) as u8, |
90 | (crc.amount() >> 8) as u8, |
91 | (crc.amount() >> 16) as u8, |
92 | (crc.amount() >> 24) as u8, |
93 | ]; |
94 | Ok(copy(into, &arr, &mut self.pos)) |
95 | } |
96 | } |
97 | |
98 | impl<R> GzEncoder<R> { |
99 | /// Acquires a reference to the underlying reader. |
100 | pub fn get_ref(&self) -> &R { |
101 | self.inner.get_ref().get_ref() |
102 | } |
103 | |
104 | /// Acquires a mutable reference to the underlying reader. |
105 | /// |
106 | /// Note that mutation of the reader may result in surprising results if |
107 | /// this encoder is continued to be used. |
108 | pub fn get_mut(&mut self) -> &mut R { |
109 | self.inner.get_mut().get_mut() |
110 | } |
111 | |
112 | /// Returns the underlying stream, consuming this encoder |
113 | pub fn into_inner(self) -> R { |
114 | self.inner.into_inner().into_inner() |
115 | } |
116 | } |
117 | |
118 | #[inline ] |
119 | fn finish(buf: &[u8; 8]) -> (u32, u32) { |
120 | let crc: u32 = ((buf[0] as u32) << 0) |
121 | | ((buf[1] as u32) << 8) |
122 | | ((buf[2] as u32) << 16) |
123 | | ((buf[3] as u32) << 24); |
124 | let amt: u32 = ((buf[4] as u32) << 0) |
125 | | ((buf[5] as u32) << 8) |
126 | | ((buf[6] as u32) << 16) |
127 | | ((buf[7] as u32) << 24); |
128 | (crc, amt) |
129 | } |
130 | |
131 | impl<R: BufRead> Read for GzEncoder<R> { |
132 | fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> { |
133 | let mut amt: usize = 0; |
134 | if self.eof { |
135 | return self.read_footer(into); |
136 | } else if self.pos < self.header.len() { |
137 | amt += copy(into, &self.header, &mut self.pos); |
138 | if amt == into.len() { |
139 | return Ok(amt); |
140 | } |
141 | let tmp: &mut [u8] = into; |
142 | into = &mut tmp[amt..]; |
143 | } |
144 | match self.inner.read(buf:into)? { |
145 | 0 => { |
146 | self.eof = true; |
147 | self.pos = 0; |
148 | self.read_footer(into) |
149 | } |
150 | n: usize => Ok(amt + n), |
151 | } |
152 | } |
153 | } |
154 | |
155 | impl<R: BufRead + Write> Write for GzEncoder<R> { |
156 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
157 | self.get_mut().write(buf) |
158 | } |
159 | |
160 | fn flush(&mut self) -> io::Result<()> { |
161 | self.get_mut().flush() |
162 | } |
163 | } |
164 | |
165 | /// A decoder for a single member of a [gzip file]. |
166 | /// |
167 | /// This structure implements a [`Read`] interface. When read from, it reads |
168 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
169 | /// |
170 | /// After reading a single member of the gzip data this reader will return |
171 | /// Ok(0) even if there are more bytes available in the underlying reader. |
172 | /// If you need the following bytes, call `into_inner()` after Ok(0) to |
173 | /// recover the underlying reader. |
174 | /// |
175 | /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
176 | /// or read more |
177 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
178 | /// |
179 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
180 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
181 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
182 | /// |
183 | /// # Examples |
184 | /// |
185 | /// ``` |
186 | /// use std::io::prelude::*; |
187 | /// use std::io; |
188 | /// # use flate2::Compression; |
189 | /// # use flate2::write::GzEncoder; |
190 | /// use flate2::bufread::GzDecoder; |
191 | /// |
192 | /// # fn main() { |
193 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
194 | /// # e.write_all(b"Hello World" ).unwrap(); |
195 | /// # let bytes = e.finish().unwrap(); |
196 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
197 | /// # } |
198 | /// # |
199 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
200 | /// // Here &[u8] implements BufRead |
201 | /// |
202 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
203 | /// let mut gz = GzDecoder::new(&bytes[..]); |
204 | /// let mut s = String::new(); |
205 | /// gz.read_to_string(&mut s)?; |
206 | /// Ok(s) |
207 | /// } |
208 | /// ``` |
209 | #[derive (Debug)] |
210 | pub struct GzDecoder<R> { |
211 | state: GzState, |
212 | reader: CrcReader<deflate::bufread::DeflateDecoder<R>>, |
213 | multi: bool, |
214 | } |
215 | |
216 | #[derive (Debug)] |
217 | enum GzState { |
218 | Header(GzHeaderParser), |
219 | Body(GzHeader), |
220 | Finished(GzHeader, usize, [u8; 8]), |
221 | Err(io::Error), |
222 | End(Option<GzHeader>), |
223 | } |
224 | |
225 | impl<R: BufRead> GzDecoder<R> { |
226 | /// Creates a new decoder from the given reader, immediately parsing the |
227 | /// gzip header. |
228 | pub fn new(mut r: R) -> GzDecoder<R> { |
229 | let mut header_parser = GzHeaderParser::new(); |
230 | |
231 | let state = match header_parser.parse(&mut r) { |
232 | Ok(_) => GzState::Body(GzHeader::from(header_parser)), |
233 | Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { |
234 | GzState::Header(header_parser) |
235 | } |
236 | Err(err) => GzState::Err(err), |
237 | }; |
238 | |
239 | GzDecoder { |
240 | state, |
241 | reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), |
242 | multi: false, |
243 | } |
244 | } |
245 | |
246 | fn multi(mut self, flag: bool) -> GzDecoder<R> { |
247 | self.multi = flag; |
248 | self |
249 | } |
250 | } |
251 | |
252 | impl<R> GzDecoder<R> { |
253 | /// Returns the header associated with this stream, if it was valid |
254 | pub fn header(&self) -> Option<&GzHeader> { |
255 | match &self.state { |
256 | GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), |
257 | GzState::End(header) => header.as_ref(), |
258 | _ => None, |
259 | } |
260 | } |
261 | |
262 | /// Acquires a reference to the underlying reader. |
263 | pub fn get_ref(&self) -> &R { |
264 | self.reader.get_ref().get_ref() |
265 | } |
266 | |
267 | /// Acquires a mutable reference to the underlying stream. |
268 | /// |
269 | /// Note that mutation of the stream may result in surprising results if |
270 | /// this decoder is continued to be used. |
271 | pub fn get_mut(&mut self) -> &mut R { |
272 | self.reader.get_mut().get_mut() |
273 | } |
274 | |
275 | /// Consumes this decoder, returning the underlying reader. |
276 | pub fn into_inner(self) -> R { |
277 | self.reader.into_inner().into_inner() |
278 | } |
279 | } |
280 | |
281 | impl<R: BufRead> Read for GzDecoder<R> { |
282 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
283 | loop { |
284 | match &mut self.state { |
285 | GzState::Header(parser) => { |
286 | parser.parse(self.reader.get_mut().get_mut())?; |
287 | self.state = GzState::Body(GzHeader::from(mem::take(parser))); |
288 | } |
289 | GzState::Body(header) => { |
290 | if into.is_empty() { |
291 | return Ok(0); |
292 | } |
293 | match self.reader.read(into)? { |
294 | 0 => { |
295 | self.state = GzState::Finished(mem::take(header), 0, [0; 8]); |
296 | } |
297 | n => { |
298 | return Ok(n); |
299 | } |
300 | } |
301 | } |
302 | GzState::Finished(header, pos, buf) => { |
303 | if *pos < buf.len() { |
304 | *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; |
305 | } else { |
306 | let (crc, amt) = finish(&buf); |
307 | |
308 | if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { |
309 | self.state = GzState::End(Some(mem::take(header))); |
310 | return Err(corrupt()); |
311 | } else if self.multi { |
312 | let is_eof = self |
313 | .reader |
314 | .get_mut() |
315 | .get_mut() |
316 | .fill_buf() |
317 | .map(|buf| buf.is_empty())?; |
318 | |
319 | if is_eof { |
320 | self.state = GzState::End(Some(mem::take(header))); |
321 | } else { |
322 | self.reader.reset(); |
323 | self.reader.get_mut().reset_data(); |
324 | self.state = GzState::Header(GzHeaderParser::new()) |
325 | } |
326 | } else { |
327 | self.state = GzState::End(Some(mem::take(header))); |
328 | } |
329 | } |
330 | } |
331 | GzState::Err(err) => { |
332 | let result = Err(mem::replace(err, io::ErrorKind::Other.into())); |
333 | self.state = GzState::End(None); |
334 | return result; |
335 | } |
336 | GzState::End(_) => return Ok(0), |
337 | } |
338 | } |
339 | } |
340 | } |
341 | |
342 | impl<R: BufRead + Write> Write for GzDecoder<R> { |
343 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
344 | self.get_mut().write(buf) |
345 | } |
346 | |
347 | fn flush(&mut self) -> io::Result<()> { |
348 | self.get_mut().flush() |
349 | } |
350 | } |
351 | |
352 | /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. |
353 | /// |
354 | /// This structure implements a [`Read`] interface. When read from, it reads |
355 | /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. |
356 | /// |
357 | /// A gzip file consists of a series of *members* concatenated one after another. |
358 | /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the |
359 | /// underlying reader does. For a file, this reads to the end of the file. |
360 | /// |
361 | /// To handle members separately, see [GzDecoder] or read more |
362 | /// [in the introduction](../index.html#about-multi-member-gzip-files). |
363 | /// |
364 | /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
365 | /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html |
366 | /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html |
367 | /// |
368 | /// # Examples |
369 | /// |
370 | /// ``` |
371 | /// use std::io::prelude::*; |
372 | /// use std::io; |
373 | /// # use flate2::Compression; |
374 | /// # use flate2::write::GzEncoder; |
375 | /// use flate2::bufread::MultiGzDecoder; |
376 | /// |
377 | /// # fn main() { |
378 | /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
379 | /// # e.write_all(b"Hello World" ).unwrap(); |
380 | /// # let bytes = e.finish().unwrap(); |
381 | /// # println!("{}" , decode_reader(bytes).unwrap()); |
382 | /// # } |
383 | /// # |
384 | /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error |
385 | /// // Here &[u8] implements BufRead |
386 | /// |
387 | /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> { |
388 | /// let mut gz = MultiGzDecoder::new(&bytes[..]); |
389 | /// let mut s = String::new(); |
390 | /// gz.read_to_string(&mut s)?; |
391 | /// Ok(s) |
392 | /// } |
393 | /// ``` |
394 | #[derive (Debug)] |
395 | pub struct MultiGzDecoder<R>(GzDecoder<R>); |
396 | |
397 | impl<R: BufRead> MultiGzDecoder<R> { |
398 | /// Creates a new decoder from the given reader, immediately parsing the |
399 | /// (first) gzip header. If the gzip stream contains multiple members all will |
400 | /// be decoded. |
401 | pub fn new(r: R) -> MultiGzDecoder<R> { |
402 | MultiGzDecoder(GzDecoder::new(r).multi(flag:true)) |
403 | } |
404 | } |
405 | |
406 | impl<R> MultiGzDecoder<R> { |
407 | /// Returns the current header associated with this stream, if it's valid |
408 | pub fn header(&self) -> Option<&GzHeader> { |
409 | self.0.header() |
410 | } |
411 | |
412 | /// Acquires a reference to the underlying reader. |
413 | pub fn get_ref(&self) -> &R { |
414 | self.0.get_ref() |
415 | } |
416 | |
417 | /// Acquires a mutable reference to the underlying stream. |
418 | /// |
419 | /// Note that mutation of the stream may result in surprising results if |
420 | /// this decoder is continued to be used. |
421 | pub fn get_mut(&mut self) -> &mut R { |
422 | self.0.get_mut() |
423 | } |
424 | |
425 | /// Consumes this decoder, returning the underlying reader. |
426 | pub fn into_inner(self) -> R { |
427 | self.0.into_inner() |
428 | } |
429 | } |
430 | |
431 | impl<R: BufRead> Read for MultiGzDecoder<R> { |
432 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { |
433 | self.0.read(buf:into) |
434 | } |
435 | } |
436 | |
437 | #[cfg (test)] |
438 | mod test { |
439 | use crate::bufread::GzDecoder; |
440 | use crate::gz::write; |
441 | use crate::Compression; |
442 | use std::io::{Read, Write}; |
443 | |
444 | // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any |
445 | // additional data to be consumed by the caller. |
446 | #[test ] |
447 | fn decode_extra_data() { |
448 | let expected = "Hello World" ; |
449 | |
450 | let compressed = { |
451 | let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); |
452 | e.write(expected.as_ref()).unwrap(); |
453 | let mut b = e.finish().unwrap(); |
454 | b.push(b'x' ); |
455 | b |
456 | }; |
457 | |
458 | let mut output = Vec::new(); |
459 | let mut decoder = GzDecoder::new(compressed.as_slice()); |
460 | let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); |
461 | assert_eq!(decoded_bytes, output.len()); |
462 | let actual = std::str::from_utf8(&output).expect("String parsing error" ); |
463 | assert_eq!( |
464 | actual, expected, |
465 | "after decompression we obtain the original input" |
466 | ); |
467 | |
468 | output.clear(); |
469 | assert_eq!( |
470 | decoder.read(&mut output).unwrap(), |
471 | 0, |
472 | "subsequent read of decoder returns 0, but inner reader can return additional data" |
473 | ); |
474 | let mut reader = decoder.into_inner(); |
475 | assert_eq!( |
476 | reader.read_to_end(&mut output).unwrap(), |
477 | 1, |
478 | "extra data is accessible in underlying buf-read" |
479 | ); |
480 | assert_eq!(output, b"x" ); |
481 | } |
482 | } |
483 | |