1 | use simd_adler32::Adler32; |
2 | use std::{ |
3 | convert::TryInto, |
4 | io::{self, Seek, SeekFrom, Write}, |
5 | }; |
6 | |
7 | use crate::tables::{ |
8 | BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL, |
9 | }; |
10 | |
11 | /// Compressor that produces fdeflate compressed streams. |
12 | pub struct Compressor<W: Write> { |
13 | checksum: Adler32, |
14 | buffer: u64, |
15 | nbits: u8, |
16 | writer: W, |
17 | } |
18 | impl<W: Write> Compressor<W> { |
19 | fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> { |
20 | debug_assert!(nbits <= 64); |
21 | |
22 | self.buffer |= bits << self.nbits; |
23 | self.nbits += nbits; |
24 | |
25 | if self.nbits >= 64 { |
26 | self.writer.write_all(&self.buffer.to_le_bytes())?; |
27 | self.nbits -= 64; |
28 | self.buffer = bits.checked_shr((nbits - self.nbits) as u32).unwrap_or(0); |
29 | } |
30 | debug_assert!(self.nbits < 64); |
31 | Ok(()) |
32 | } |
33 | |
34 | fn flush(&mut self) -> io::Result<()> { |
35 | if self.nbits % 8 != 0 { |
36 | self.write_bits(0, 8 - self.nbits % 8)?; |
37 | } |
38 | if self.nbits > 0 { |
39 | self.writer |
40 | .write_all(&self.buffer.to_le_bytes()[..self.nbits as usize / 8]) |
41 | .unwrap(); |
42 | self.buffer = 0; |
43 | self.nbits = 0; |
44 | } |
45 | Ok(()) |
46 | } |
47 | |
48 | fn write_run(&mut self, mut run: u32) -> io::Result<()> { |
49 | self.write_bits(HUFFMAN_CODES[0] as u64, HUFFMAN_LENGTHS[0])?; |
50 | run -= 1; |
51 | |
52 | while run >= 258 { |
53 | self.write_bits(HUFFMAN_CODES[285] as u64, HUFFMAN_LENGTHS[285] + 1)?; |
54 | run -= 258; |
55 | } |
56 | |
57 | if run > 4 { |
58 | let sym = LENGTH_TO_SYMBOL[run as usize - 3] as usize; |
59 | self.write_bits(HUFFMAN_CODES[sym] as u64, HUFFMAN_LENGTHS[sym])?; |
60 | |
61 | let len_extra = LENGTH_TO_LEN_EXTRA[run as usize - 3]; |
62 | let extra = ((run - 3) & BITMASKS[len_extra as usize]) as u64; |
63 | self.write_bits(extra, len_extra + 1)?; |
64 | } else { |
65 | debug_assert_eq!(HUFFMAN_CODES[0], 0); |
66 | self.write_bits(0, run as u8 * HUFFMAN_LENGTHS[0])?; |
67 | } |
68 | |
69 | Ok(()) |
70 | } |
71 | |
72 | /// Create a new Compressor. |
73 | pub fn new(writer: W) -> io::Result<Self> { |
74 | let mut compressor = Self { |
75 | checksum: Adler32::new(), |
76 | buffer: 0, |
77 | nbits: 0, |
78 | writer, |
79 | }; |
80 | compressor.write_headers()?; |
81 | Ok(compressor) |
82 | } |
83 | |
84 | fn write_headers(&mut self) -> io::Result<()> { |
85 | self.write_bits(0x0178, 16)?; // zlib header |
86 | |
87 | self.write_bits(0b1, 1)?; // BFINAL |
88 | self.write_bits(0b10, 2)?; // Dynamic Huffman block |
89 | |
90 | self.write_bits((HUFFMAN_LENGTHS.len() - 257) as u64, 5)?; // # of length / literal codes |
91 | self.write_bits(0, 5)?; // 1 distance code |
92 | self.write_bits(15, 4)?; // 16 code length codes |
93 | |
94 | // Write code lengths for code length alphabet |
95 | for _ in 0..3 { |
96 | self.write_bits(0, 3)?; |
97 | } |
98 | for _ in 0..16 { |
99 | self.write_bits(4, 3)?; |
100 | } |
101 | |
102 | // Write code lengths for length/literal alphabet |
103 | for &len in &HUFFMAN_LENGTHS { |
104 | self.write_bits((len.reverse_bits() >> 4) as u64, 4)?; |
105 | } |
106 | |
107 | // Write code lengths for distance alphabet |
108 | for _ in 0..1 { |
109 | self.write_bits(0b1000, 4)?; |
110 | } |
111 | |
112 | Ok(()) |
113 | } |
114 | |
115 | /// Write data to the compressor. |
116 | pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> { |
117 | self.checksum.write(data); |
118 | |
119 | let mut run = 0; |
120 | let mut chunks = data.chunks_exact(8); |
121 | for chunk in &mut chunks { |
122 | let ichunk = u64::from_le_bytes(chunk.try_into().unwrap()); |
123 | |
124 | if ichunk == 0 { |
125 | run += 8; |
126 | continue; |
127 | } else if run > 0 { |
128 | let run_extra = ichunk.trailing_zeros() / 8; |
129 | self.write_run(run + run_extra)?; |
130 | run = 0; |
131 | |
132 | if run_extra > 0 { |
133 | run = ichunk.leading_zeros() / 8; |
134 | for &b in &chunk[run_extra as usize..8 - run as usize] { |
135 | self.write_bits( |
136 | HUFFMAN_CODES[b as usize] as u64, |
137 | HUFFMAN_LENGTHS[b as usize], |
138 | )?; |
139 | } |
140 | continue; |
141 | } |
142 | } |
143 | |
144 | let run_start = ichunk.leading_zeros() / 8; |
145 | if run_start > 0 { |
146 | for &b in &chunk[..8 - run_start as usize] { |
147 | self.write_bits( |
148 | HUFFMAN_CODES[b as usize] as u64, |
149 | HUFFMAN_LENGTHS[b as usize], |
150 | )?; |
151 | } |
152 | run = run_start; |
153 | continue; |
154 | } |
155 | |
156 | let n0 = HUFFMAN_LENGTHS[chunk[0] as usize]; |
157 | let n1 = HUFFMAN_LENGTHS[chunk[1] as usize]; |
158 | let n2 = HUFFMAN_LENGTHS[chunk[2] as usize]; |
159 | let n3 = HUFFMAN_LENGTHS[chunk[3] as usize]; |
160 | let bits = HUFFMAN_CODES[chunk[0] as usize] as u64 |
161 | | ((HUFFMAN_CODES[chunk[1] as usize] as u64) << n0) |
162 | | ((HUFFMAN_CODES[chunk[2] as usize] as u64) << (n0 + n1)) |
163 | | ((HUFFMAN_CODES[chunk[3] as usize] as u64) << (n0 + n1 + n2)); |
164 | self.write_bits(bits, n0 + n1 + n2 + n3)?; |
165 | |
166 | let n4 = HUFFMAN_LENGTHS[chunk[4] as usize]; |
167 | let n5 = HUFFMAN_LENGTHS[chunk[5] as usize]; |
168 | let n6 = HUFFMAN_LENGTHS[chunk[6] as usize]; |
169 | let n7 = HUFFMAN_LENGTHS[chunk[7] as usize]; |
170 | let bits2 = HUFFMAN_CODES[chunk[4] as usize] as u64 |
171 | | ((HUFFMAN_CODES[chunk[5] as usize] as u64) << n4) |
172 | | ((HUFFMAN_CODES[chunk[6] as usize] as u64) << (n4 + n5)) |
173 | | ((HUFFMAN_CODES[chunk[7] as usize] as u64) << (n4 + n5 + n6)); |
174 | self.write_bits(bits2, n4 + n5 + n6 + n7)?; |
175 | } |
176 | |
177 | if run > 0 { |
178 | self.write_run(run)?; |
179 | } |
180 | |
181 | for &b in chunks.remainder() { |
182 | self.write_bits( |
183 | HUFFMAN_CODES[b as usize] as u64, |
184 | HUFFMAN_LENGTHS[b as usize], |
185 | )?; |
186 | } |
187 | |
188 | Ok(()) |
189 | } |
190 | |
191 | /// Write the remainder of the stream and return the inner writer. |
192 | pub fn finish(mut self) -> io::Result<W> { |
193 | // Write end of block |
194 | self.write_bits(HUFFMAN_CODES[256] as u64, HUFFMAN_LENGTHS[256])?; |
195 | self.flush()?; |
196 | |
197 | // Write Adler32 checksum |
198 | let checksum: u32 = self.checksum.finish(); |
199 | self.writer |
200 | .write_all(checksum.to_be_bytes().as_ref()) |
201 | .unwrap(); |
202 | Ok(self.writer) |
203 | } |
204 | } |
205 | |
206 | /// Compressor that only writes the stored blocks. |
207 | /// |
208 | /// This is useful for writing files that are not compressed, but still need to be wrapped in a |
209 | /// zlib stream. |
210 | pub struct StoredOnlyCompressor<W> { |
211 | writer: W, |
212 | checksum: Adler32, |
213 | block_bytes: u16, |
214 | } |
215 | impl<W: Write + Seek> StoredOnlyCompressor<W> { |
216 | /// Creates a new `StoredOnlyCompressor` that writes to the given writer. |
217 | pub fn new(mut writer: W) -> io::Result<Self> { |
218 | writer.write_all(&[0x78, 0x01])?; // zlib header |
219 | writer.write_all(&[0; 5])?; // placeholder stored block header |
220 | |
221 | Ok(Self { |
222 | writer, |
223 | checksum: Adler32::new(), |
224 | block_bytes: 0, |
225 | }) |
226 | } |
227 | |
228 | fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> { |
229 | self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?; |
230 | self.writer.write_all(&[ |
231 | last as u8, |
232 | (size & 0xFF) as u8, |
233 | ((size >> 8) & 0xFF) as u8, |
234 | (!size & 0xFF) as u8, |
235 | ((!size >> 8) & 0xFF) as u8, |
236 | ])?; |
237 | self.writer.seek(SeekFrom::Current(size as i64))?; |
238 | |
239 | Ok(()) |
240 | } |
241 | |
242 | /// Writes the given data to the underlying writer. |
243 | pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> { |
244 | self.checksum.write(data); |
245 | while !data.is_empty() { |
246 | if self.block_bytes == u16::MAX { |
247 | self.set_block_header(u16::MAX, false)?; |
248 | self.writer.write_all(&[0; 5])?; // placeholder stored block header |
249 | self.block_bytes = 0; |
250 | } |
251 | |
252 | let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize); |
253 | self.writer.write_all(&data[..prefix_bytes])?; |
254 | self.block_bytes += prefix_bytes as u16; |
255 | data = &data[prefix_bytes..]; |
256 | } |
257 | |
258 | Ok(()) |
259 | } |
260 | |
261 | /// Finish writing the final block and return the underlying writer. |
262 | pub fn finish(mut self) -> io::Result<W> { |
263 | self.set_block_header(self.block_bytes, true)?; |
264 | |
265 | // Write Adler32 checksum |
266 | let checksum: u32 = self.checksum.finish(); |
267 | self.writer |
268 | .write_all(checksum.to_be_bytes().as_ref()) |
269 | .unwrap(); |
270 | |
271 | Ok(self.writer) |
272 | } |
273 | } |
274 | impl<W> StoredOnlyCompressor<W> { |
275 | /// Return the number of bytes that will be written to the output stream |
276 | /// for the given input size. Because this compressor only writes stored blocks, |
277 | /// the output size is always slightly *larger* than the input size. |
278 | pub fn compressed_size(raw_size: usize) -> usize { |
279 | (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5) |
280 | + (raw_size % u16::MAX as usize + 5) |
281 | + 6 |
282 | } |
283 | } |
284 | |
285 | /// Compresses the given data. |
286 | pub fn compress_to_vec(input: &[u8]) -> Vec<u8> { |
287 | let mut compressor: Compressor> = Compressor::new(writer:Vec::with_capacity(input.len() / 4)).unwrap(); |
288 | compressor.write_data(input).unwrap(); |
289 | compressor.finish().unwrap() |
290 | } |
291 | |
292 | #[cfg (test)] |
293 | mod tests { |
294 | use super::*; |
295 | use rand::Rng; |
296 | |
297 | fn roundtrip(data: &[u8]) { |
298 | let compressed = compress_to_vec(data); |
299 | let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap(); |
300 | assert_eq!(&decompressed, data); |
301 | } |
302 | |
303 | #[test ] |
304 | fn it_works() { |
305 | roundtrip(b"Hello world!" ); |
306 | } |
307 | |
308 | #[test ] |
309 | fn constant() { |
310 | roundtrip(&vec![0; 2048]); |
311 | roundtrip(&vec![5; 2048]); |
312 | roundtrip(&vec![128; 2048]); |
313 | roundtrip(&vec![254; 2048]); |
314 | } |
315 | |
316 | #[test ] |
317 | fn random() { |
318 | let mut rng = rand::thread_rng(); |
319 | let mut data = vec![0; 2048]; |
320 | for _ in 0..10 { |
321 | for byte in &mut data { |
322 | *byte = rng.gen(); |
323 | } |
324 | roundtrip(&data); |
325 | } |
326 | } |
327 | } |
328 | |