1use simd_adler32::Adler32;
2use std::{
3 convert::TryInto,
4 io::{self, Seek, SeekFrom, Write},
5};
6
7use crate::tables::{
8 BITMASKS, HUFFMAN_CODES, HUFFMAN_LENGTHS, LENGTH_TO_LEN_EXTRA, LENGTH_TO_SYMBOL,
9};
10
11/// Compressor that produces fdeflate compressed streams.
12pub struct Compressor<W: Write> {
13 checksum: Adler32,
14 buffer: u64,
15 nbits: u8,
16 writer: W,
17}
18impl<W: Write> Compressor<W> {
19 fn write_bits(&mut self, bits: u64, nbits: u8) -> io::Result<()> {
20 debug_assert!(nbits <= 64);
21
22 self.buffer |= bits << self.nbits;
23 self.nbits += nbits;
24
25 if self.nbits >= 64 {
26 self.writer.write_all(&self.buffer.to_le_bytes())?;
27 self.nbits -= 64;
28 self.buffer = bits.checked_shr((nbits - self.nbits) as u32).unwrap_or(0);
29 }
30 debug_assert!(self.nbits < 64);
31 Ok(())
32 }
33
34 fn flush(&mut self) -> io::Result<()> {
35 if self.nbits % 8 != 0 {
36 self.write_bits(0, 8 - self.nbits % 8)?;
37 }
38 if self.nbits > 0 {
39 self.writer
40 .write_all(&self.buffer.to_le_bytes()[..self.nbits as usize / 8])
41 .unwrap();
42 self.buffer = 0;
43 self.nbits = 0;
44 }
45 Ok(())
46 }
47
48 fn write_run(&mut self, mut run: u32) -> io::Result<()> {
49 self.write_bits(HUFFMAN_CODES[0] as u64, HUFFMAN_LENGTHS[0])?;
50 run -= 1;
51
52 while run >= 258 {
53 self.write_bits(HUFFMAN_CODES[285] as u64, HUFFMAN_LENGTHS[285] + 1)?;
54 run -= 258;
55 }
56
57 if run > 4 {
58 let sym = LENGTH_TO_SYMBOL[run as usize - 3] as usize;
59 self.write_bits(HUFFMAN_CODES[sym] as u64, HUFFMAN_LENGTHS[sym])?;
60
61 let len_extra = LENGTH_TO_LEN_EXTRA[run as usize - 3];
62 let extra = ((run - 3) & BITMASKS[len_extra as usize]) as u64;
63 self.write_bits(extra, len_extra + 1)?;
64 } else {
65 debug_assert_eq!(HUFFMAN_CODES[0], 0);
66 self.write_bits(0, run as u8 * HUFFMAN_LENGTHS[0])?;
67 }
68
69 Ok(())
70 }
71
72 /// Create a new Compressor.
73 pub fn new(writer: W) -> io::Result<Self> {
74 let mut compressor = Self {
75 checksum: Adler32::new(),
76 buffer: 0,
77 nbits: 0,
78 writer,
79 };
80 compressor.write_headers()?;
81 Ok(compressor)
82 }
83
84 fn write_headers(&mut self) -> io::Result<()> {
85 self.write_bits(0x0178, 16)?; // zlib header
86
87 self.write_bits(0b1, 1)?; // BFINAL
88 self.write_bits(0b10, 2)?; // Dynamic Huffman block
89
90 self.write_bits((HUFFMAN_LENGTHS.len() - 257) as u64, 5)?; // # of length / literal codes
91 self.write_bits(0, 5)?; // 1 distance code
92 self.write_bits(15, 4)?; // 16 code length codes
93
94 // Write code lengths for code length alphabet
95 for _ in 0..3 {
96 self.write_bits(0, 3)?;
97 }
98 for _ in 0..16 {
99 self.write_bits(4, 3)?;
100 }
101
102 // Write code lengths for length/literal alphabet
103 for &len in &HUFFMAN_LENGTHS {
104 self.write_bits((len.reverse_bits() >> 4) as u64, 4)?;
105 }
106
107 // Write code lengths for distance alphabet
108 for _ in 0..1 {
109 self.write_bits(0b1000, 4)?;
110 }
111
112 Ok(())
113 }
114
115 /// Write data to the compressor.
116 pub fn write_data(&mut self, data: &[u8]) -> io::Result<()> {
117 self.checksum.write(data);
118
119 let mut run = 0;
120 let mut chunks = data.chunks_exact(8);
121 for chunk in &mut chunks {
122 let ichunk = u64::from_le_bytes(chunk.try_into().unwrap());
123
124 if ichunk == 0 {
125 run += 8;
126 continue;
127 } else if run > 0 {
128 let run_extra = ichunk.trailing_zeros() / 8;
129 self.write_run(run + run_extra)?;
130 run = 0;
131
132 if run_extra > 0 {
133 run = ichunk.leading_zeros() / 8;
134 for &b in &chunk[run_extra as usize..8 - run as usize] {
135 self.write_bits(
136 HUFFMAN_CODES[b as usize] as u64,
137 HUFFMAN_LENGTHS[b as usize],
138 )?;
139 }
140 continue;
141 }
142 }
143
144 let run_start = ichunk.leading_zeros() / 8;
145 if run_start > 0 {
146 for &b in &chunk[..8 - run_start as usize] {
147 self.write_bits(
148 HUFFMAN_CODES[b as usize] as u64,
149 HUFFMAN_LENGTHS[b as usize],
150 )?;
151 }
152 run = run_start;
153 continue;
154 }
155
156 let n0 = HUFFMAN_LENGTHS[chunk[0] as usize];
157 let n1 = HUFFMAN_LENGTHS[chunk[1] as usize];
158 let n2 = HUFFMAN_LENGTHS[chunk[2] as usize];
159 let n3 = HUFFMAN_LENGTHS[chunk[3] as usize];
160 let bits = HUFFMAN_CODES[chunk[0] as usize] as u64
161 | ((HUFFMAN_CODES[chunk[1] as usize] as u64) << n0)
162 | ((HUFFMAN_CODES[chunk[2] as usize] as u64) << (n0 + n1))
163 | ((HUFFMAN_CODES[chunk[3] as usize] as u64) << (n0 + n1 + n2));
164 self.write_bits(bits, n0 + n1 + n2 + n3)?;
165
166 let n4 = HUFFMAN_LENGTHS[chunk[4] as usize];
167 let n5 = HUFFMAN_LENGTHS[chunk[5] as usize];
168 let n6 = HUFFMAN_LENGTHS[chunk[6] as usize];
169 let n7 = HUFFMAN_LENGTHS[chunk[7] as usize];
170 let bits2 = HUFFMAN_CODES[chunk[4] as usize] as u64
171 | ((HUFFMAN_CODES[chunk[5] as usize] as u64) << n4)
172 | ((HUFFMAN_CODES[chunk[6] as usize] as u64) << (n4 + n5))
173 | ((HUFFMAN_CODES[chunk[7] as usize] as u64) << (n4 + n5 + n6));
174 self.write_bits(bits2, n4 + n5 + n6 + n7)?;
175 }
176
177 if run > 0 {
178 self.write_run(run)?;
179 }
180
181 for &b in chunks.remainder() {
182 self.write_bits(
183 HUFFMAN_CODES[b as usize] as u64,
184 HUFFMAN_LENGTHS[b as usize],
185 )?;
186 }
187
188 Ok(())
189 }
190
191 /// Write the remainder of the stream and return the inner writer.
192 pub fn finish(mut self) -> io::Result<W> {
193 // Write end of block
194 self.write_bits(HUFFMAN_CODES[256] as u64, HUFFMAN_LENGTHS[256])?;
195 self.flush()?;
196
197 // Write Adler32 checksum
198 let checksum: u32 = self.checksum.finish();
199 self.writer
200 .write_all(checksum.to_be_bytes().as_ref())
201 .unwrap();
202 Ok(self.writer)
203 }
204}
205
206/// Compressor that only writes the stored blocks.
207///
208/// This is useful for writing files that are not compressed, but still need to be wrapped in a
209/// zlib stream.
210pub struct StoredOnlyCompressor<W> {
211 writer: W,
212 checksum: Adler32,
213 block_bytes: u16,
214}
215impl<W: Write + Seek> StoredOnlyCompressor<W> {
216 /// Creates a new `StoredOnlyCompressor` that writes to the given writer.
217 pub fn new(mut writer: W) -> io::Result<Self> {
218 writer.write_all(&[0x78, 0x01])?; // zlib header
219 writer.write_all(&[0; 5])?; // placeholder stored block header
220
221 Ok(Self {
222 writer,
223 checksum: Adler32::new(),
224 block_bytes: 0,
225 })
226 }
227
228 fn set_block_header(&mut self, size: u16, last: bool) -> io::Result<()> {
229 self.writer.seek(SeekFrom::Current(-(size as i64 + 5)))?;
230 self.writer.write_all(&[
231 last as u8,
232 (size & 0xFF) as u8,
233 ((size >> 8) & 0xFF) as u8,
234 (!size & 0xFF) as u8,
235 ((!size >> 8) & 0xFF) as u8,
236 ])?;
237 self.writer.seek(SeekFrom::Current(size as i64))?;
238
239 Ok(())
240 }
241
242 /// Writes the given data to the underlying writer.
243 pub fn write_data(&mut self, mut data: &[u8]) -> io::Result<()> {
244 self.checksum.write(data);
245 while !data.is_empty() {
246 if self.block_bytes == u16::MAX {
247 self.set_block_header(u16::MAX, false)?;
248 self.writer.write_all(&[0; 5])?; // placeholder stored block header
249 self.block_bytes = 0;
250 }
251
252 let prefix_bytes = data.len().min((u16::MAX - self.block_bytes) as usize);
253 self.writer.write_all(&data[..prefix_bytes])?;
254 self.block_bytes += prefix_bytes as u16;
255 data = &data[prefix_bytes..];
256 }
257
258 Ok(())
259 }
260
261 /// Finish writing the final block and return the underlying writer.
262 pub fn finish(mut self) -> io::Result<W> {
263 self.set_block_header(self.block_bytes, true)?;
264
265 // Write Adler32 checksum
266 let checksum: u32 = self.checksum.finish();
267 self.writer
268 .write_all(checksum.to_be_bytes().as_ref())
269 .unwrap();
270
271 Ok(self.writer)
272 }
273}
274impl<W> StoredOnlyCompressor<W> {
275 /// Return the number of bytes that will be written to the output stream
276 /// for the given input size. Because this compressor only writes stored blocks,
277 /// the output size is always slightly *larger* than the input size.
278 pub fn compressed_size(raw_size: usize) -> usize {
279 (raw_size.saturating_sub(1) / u16::MAX as usize) * (u16::MAX as usize + 5)
280 + (raw_size % u16::MAX as usize + 5)
281 + 6
282 }
283}
284
285/// Compresses the given data.
286pub fn compress_to_vec(input: &[u8]) -> Vec<u8> {
287 let mut compressor: Compressor> = Compressor::new(writer:Vec::with_capacity(input.len() / 4)).unwrap();
288 compressor.write_data(input).unwrap();
289 compressor.finish().unwrap()
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295 use rand::Rng;
296
297 fn roundtrip(data: &[u8]) {
298 let compressed = compress_to_vec(data);
299 let decompressed = miniz_oxide::inflate::decompress_to_vec_zlib(&compressed).unwrap();
300 assert_eq!(&decompressed, data);
301 }
302
303 #[test]
304 fn it_works() {
305 roundtrip(b"Hello world!");
306 }
307
308 #[test]
309 fn constant() {
310 roundtrip(&vec![0; 2048]);
311 roundtrip(&vec![5; 2048]);
312 roundtrip(&vec![128; 2048]);
313 roundtrip(&vec![254; 2048]);
314 }
315
316 #[test]
317 fn random() {
318 let mut rng = rand::thread_rng();
319 let mut data = vec![0; 2048];
320 for _ in 0..10 {
321 for byte in &mut data {
322 *byte = rng.gen();
323 }
324 roundtrip(&data);
325 }
326 }
327}
328

Provided by KDAB

Privacy Policy