1 | //! Reader-based compression/decompression streams |
2 | |
3 | use std::io::prelude::*; |
4 | use std::io::{self, BufReader}; |
5 | |
6 | #[cfg (feature = "tokio" )] |
7 | use futures::Poll; |
8 | #[cfg (feature = "tokio" )] |
9 | use tokio_io::{AsyncRead, AsyncWrite}; |
10 | |
11 | use crate::bufread; |
12 | use crate::stream::Stream; |
13 | |
14 | /// A compression stream which wraps an uncompressed stream of data. Compressed |
15 | /// data will be read from the stream. |
16 | pub struct XzEncoder<R: Read> { |
17 | inner: bufread::XzEncoder<BufReader<R>>, |
18 | } |
19 | |
20 | /// A decompression stream which wraps a compressed stream of data. Decompressed |
21 | /// data will be read from the stream. |
22 | pub struct XzDecoder<R: Read> { |
23 | inner: bufread::XzDecoder<BufReader<R>>, |
24 | } |
25 | |
26 | impl<R: Read> XzEncoder<R> { |
27 | /// Create a new compression stream which will compress at the given level |
28 | /// to read compress output to the give output stream. |
29 | /// |
30 | /// The `level` argument here is typically 0-9 with 6 being a good default. |
31 | pub fn new(r: R, level: u32) -> XzEncoder<R> { |
32 | XzEncoder { |
33 | inner: bufread::XzEncoder::new(BufReader::new(r), level), |
34 | } |
35 | } |
36 | |
37 | /// Creates a new encoder with a custom `Stream`. |
38 | /// |
39 | /// The `Stream` can be pre-configured for multithreaded encoding, different |
40 | /// compression options/tuning, etc. |
41 | pub fn new_stream(r: R, stream: Stream) -> XzEncoder<R> { |
42 | XzEncoder { |
43 | inner: bufread::XzEncoder::new_stream(BufReader::new(r), stream), |
44 | } |
45 | } |
46 | |
47 | /// Acquires a reference to the underlying stream |
48 | pub fn get_ref(&self) -> &R { |
49 | self.inner.get_ref().get_ref() |
50 | } |
51 | |
52 | /// Acquires a mutable reference to the underlying stream |
53 | /// |
54 | /// Note that mutation of the stream may result in surprising results if |
55 | /// this encoder is continued to be used. |
56 | pub fn get_mut(&mut self) -> &mut R { |
57 | self.inner.get_mut().get_mut() |
58 | } |
59 | |
60 | /// Unwrap the underlying writer, finishing the compression stream. |
61 | pub fn into_inner(self) -> R { |
62 | self.inner.into_inner().into_inner() |
63 | } |
64 | |
65 | /// Returns the number of bytes produced by the compressor |
66 | /// (e.g. the number of bytes read from this stream) |
67 | /// |
68 | /// Note that, due to buffering, this only bears any relation to |
69 | /// total_in() when the compressor chooses to flush its data |
70 | /// (unfortunately, this won't happen this won't happen in general |
71 | /// at the end of the stream, because the compressor doesn't know |
72 | /// if there's more data to come). At that point, |
73 | /// `total_out() / total_in()` would be the compression ratio. |
74 | pub fn total_out(&self) -> u64 { |
75 | self.inner.total_out() |
76 | } |
77 | |
78 | /// Returns the number of bytes consumed by the compressor |
79 | /// (e.g. the number of bytes read from the underlying stream) |
80 | pub fn total_in(&self) -> u64 { |
81 | self.inner.total_in() |
82 | } |
83 | } |
84 | |
85 | impl<R: Read> Read for XzEncoder<R> { |
86 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
87 | self.inner.read(buf) |
88 | } |
89 | } |
90 | |
91 | #[cfg (feature = "tokio" )] |
92 | impl<R: AsyncRead> AsyncRead for XzEncoder<R> {} |
93 | |
94 | impl<W: Write + Read> Write for XzEncoder<W> { |
95 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
96 | self.get_mut().write(buf) |
97 | } |
98 | |
99 | fn flush(&mut self) -> io::Result<()> { |
100 | self.get_mut().flush() |
101 | } |
102 | } |
103 | |
104 | #[cfg (feature = "tokio" )] |
105 | impl<R: AsyncWrite + Read> AsyncWrite for XzEncoder<R> { |
106 | fn shutdown(&mut self) -> Poll<(), io::Error> { |
107 | self.get_mut().shutdown() |
108 | } |
109 | } |
110 | |
111 | impl<R: Read> XzDecoder<R> { |
112 | /// Create a new decompression stream, which will read compressed |
113 | /// data from the given input stream, and decompress one xz stream. |
114 | /// It may also consume input data that follows the xz stream. |
115 | /// Use [`xz::bufread::XzDecoder`] instead to process a mix of xz and non-xz data. |
116 | pub fn new(r: R) -> XzDecoder<R> { |
117 | XzDecoder { |
118 | inner: bufread::XzDecoder::new(BufReader::new(r)), |
119 | } |
120 | } |
121 | |
122 | /// Create a new decompression stream, which will read compressed |
123 | /// data from the given input and decompress all the xz stream it contains. |
124 | pub fn new_multi_decoder(r: R) -> XzDecoder<R> { |
125 | XzDecoder { |
126 | inner: bufread::XzDecoder::new_multi_decoder(BufReader::new(r)), |
127 | } |
128 | } |
129 | |
130 | /// Creates a new decoder with a custom `Stream`. |
131 | /// |
132 | /// The `Stream` can be pre-configured for various checks, different |
133 | /// decompression options/tuning, etc. |
134 | pub fn new_stream(r: R, stream: Stream) -> XzDecoder<R> { |
135 | XzDecoder { |
136 | inner: bufread::XzDecoder::new_stream(BufReader::new(r), stream), |
137 | } |
138 | } |
139 | |
140 | /// Acquires a reference to the underlying stream |
141 | pub fn get_ref(&self) -> &R { |
142 | self.inner.get_ref().get_ref() |
143 | } |
144 | |
145 | /// Acquires a mutable reference to the underlying stream |
146 | /// |
147 | /// Note that mutation of the stream may result in surprising results if |
148 | /// this encoder is continued to be used. |
149 | pub fn get_mut(&mut self) -> &mut R { |
150 | self.inner.get_mut().get_mut() |
151 | } |
152 | |
153 | /// Unwrap the underlying writer, finishing the compression stream. |
154 | pub fn into_inner(self) -> R { |
155 | self.inner.into_inner().into_inner() |
156 | } |
157 | |
158 | /// Returns the number of bytes produced by the decompressor |
159 | /// (e.g. the number of bytes read from this stream) |
160 | /// |
161 | /// Note that, due to buffering, this only bears any relation to |
162 | /// total_in() when the decompressor reaches a sync point |
163 | /// (e.g. where the original compressed stream was flushed). |
164 | /// At that point, `total_in() / total_out()` is the compression ratio. |
165 | pub fn total_out(&self) -> u64 { |
166 | self.inner.total_out() |
167 | } |
168 | |
169 | /// Returns the number of bytes consumed by the decompressor |
170 | /// (e.g. the number of bytes read from the underlying stream) |
171 | pub fn total_in(&self) -> u64 { |
172 | self.inner.total_in() |
173 | } |
174 | } |
175 | |
176 | impl<R: Read> Read for XzDecoder<R> { |
177 | fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
178 | self.inner.read(buf) |
179 | } |
180 | } |
181 | |
182 | #[cfg (feature = "tokio" )] |
183 | impl<R: AsyncRead + Read> AsyncRead for XzDecoder<R> {} |
184 | |
185 | impl<W: Write + Read> Write for XzDecoder<W> { |
186 | fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
187 | self.get_mut().write(buf) |
188 | } |
189 | |
190 | fn flush(&mut self) -> io::Result<()> { |
191 | self.get_mut().flush() |
192 | } |
193 | } |
194 | |
195 | #[cfg (feature = "tokio" )] |
196 | impl<R: AsyncWrite + Read> AsyncWrite for XzDecoder<R> { |
197 | fn shutdown(&mut self) -> Poll<(), io::Error> { |
198 | self.get_mut().shutdown() |
199 | } |
200 | } |
201 | |
202 | #[cfg (test)] |
203 | mod tests { |
204 | use crate::read::{XzDecoder, XzEncoder}; |
205 | use rand::{thread_rng, Rng}; |
206 | use std::io::prelude::*; |
207 | use std::iter; |
208 | |
209 | #[test ] |
210 | fn smoke() { |
211 | let m: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8]; |
212 | let mut c = XzEncoder::new(m, 6); |
213 | let mut data = vec![]; |
214 | c.read_to_end(&mut data).unwrap(); |
215 | let mut d = XzDecoder::new(&data[..]); |
216 | let mut data2 = Vec::new(); |
217 | d.read_to_end(&mut data2).unwrap(); |
218 | assert_eq!(data2, m); |
219 | } |
220 | |
221 | #[test ] |
222 | fn smoke2() { |
223 | let m: &[u8] = &[1, 2, 3, 4, 5, 6, 7, 8]; |
224 | let c = XzEncoder::new(m, 6); |
225 | let mut d = XzDecoder::new(c); |
226 | let mut data = vec![]; |
227 | d.read_to_end(&mut data).unwrap(); |
228 | assert_eq!(data, [1, 2, 3, 4, 5, 6, 7, 8]); |
229 | } |
230 | |
231 | #[test ] |
232 | fn smoke3() { |
233 | let m = vec![3u8; 128 * 1024 + 1]; |
234 | let c = XzEncoder::new(&m[..], 6); |
235 | let mut d = XzDecoder::new(c); |
236 | let mut data = vec![]; |
237 | d.read_to_end(&mut data).unwrap(); |
238 | assert!(data == &m[..]); |
239 | } |
240 | |
241 | #[test ] |
242 | fn self_terminating() { |
243 | let m = vec![3u8; 128 * 1024 + 1]; |
244 | let mut c = XzEncoder::new(&m[..], 6); |
245 | |
246 | let mut result = Vec::new(); |
247 | c.read_to_end(&mut result).unwrap(); |
248 | |
249 | let mut rng = thread_rng(); |
250 | let v = iter::repeat_with(|| rng.gen::<u8>()) |
251 | .take(1024) |
252 | .collect::<Vec<_>>(); |
253 | for _ in 0..200 { |
254 | result.extend(v.iter().map(|x| *x)); |
255 | } |
256 | |
257 | let mut d = XzDecoder::new(&result[..]); |
258 | let mut data = Vec::with_capacity(m.len()); |
259 | unsafe { |
260 | data.set_len(m.len()); |
261 | } |
262 | assert!(d.read(&mut data).unwrap() == m.len()); |
263 | assert!(data == &m[..]); |
264 | } |
265 | |
266 | #[test ] |
267 | fn zero_length_read_at_eof() { |
268 | let m = Vec::new(); |
269 | let mut c = XzEncoder::new(&m[..], 6); |
270 | |
271 | let mut result = Vec::new(); |
272 | c.read_to_end(&mut result).unwrap(); |
273 | |
274 | let mut d = XzDecoder::new(&result[..]); |
275 | let mut data = Vec::new(); |
276 | assert!(d.read(&mut data).unwrap() == 0); |
277 | } |
278 | |
279 | #[test ] |
280 | fn zero_length_read_with_data() { |
281 | let m = vec![3u8; 128 * 1024 + 1]; |
282 | let mut c = XzEncoder::new(&m[..], 6); |
283 | |
284 | let mut result = Vec::new(); |
285 | c.read_to_end(&mut result).unwrap(); |
286 | |
287 | let mut d = XzDecoder::new(&result[..]); |
288 | let mut data = Vec::new(); |
289 | assert!(d.read(&mut data).unwrap() == 0); |
290 | } |
291 | |
292 | #[test ] |
293 | fn qc() { |
294 | ::quickcheck::quickcheck(test as fn(_) -> _); |
295 | |
296 | fn test(v: Vec<u8>) -> bool { |
297 | let r = XzEncoder::new(&v[..], 6); |
298 | let mut r = XzDecoder::new(r); |
299 | let mut v2 = Vec::new(); |
300 | r.read_to_end(&mut v2).unwrap(); |
301 | v == v2 |
302 | } |
303 | } |
304 | |
305 | #[test ] |
306 | fn two_streams() { |
307 | let mut input_stream1: Vec<u8> = Vec::new(); |
308 | let mut input_stream2: Vec<u8> = Vec::new(); |
309 | let mut all_input: Vec<u8> = Vec::new(); |
310 | |
311 | // Generate input data. |
312 | const STREAM1_SIZE: usize = 1024; |
313 | for num in 0..STREAM1_SIZE { |
314 | input_stream1.push(num as u8) |
315 | } |
316 | const STREAM2_SIZE: usize = 532; |
317 | for num in 0..STREAM2_SIZE { |
318 | input_stream2.push((num + 32) as u8) |
319 | } |
320 | all_input.extend(&input_stream1); |
321 | all_input.extend(&input_stream2); |
322 | |
323 | // Make a vector with compressed data |
324 | let mut decoder_input = Vec::new(); |
325 | { |
326 | let mut encoder = XzEncoder::new(&input_stream1[..], 6); |
327 | encoder.read_to_end(&mut decoder_input).unwrap(); |
328 | } |
329 | { |
330 | let mut encoder = XzEncoder::new(&input_stream2[..], 6); |
331 | encoder.read_to_end(&mut decoder_input).unwrap(); |
332 | } |
333 | |
334 | // Decoder must be able to read the 2 concatenated xz streams and get the same data as input. |
335 | let mut decoder_reader = &decoder_input[..]; |
336 | { |
337 | // using `XzDecoder::new` here would fail because only 1 xz stream would be processed. |
338 | let mut decoder = XzDecoder::new_multi_decoder(&mut decoder_reader); |
339 | let mut decompressed_data = vec![0u8; all_input.len()]; |
340 | |
341 | assert_eq!( |
342 | decoder.read(&mut decompressed_data).unwrap(), |
343 | all_input.len() |
344 | ); |
345 | assert_eq!(decompressed_data, &all_input[..]); |
346 | } |
347 | } |
348 | } |
349 | |