1 | //! Streaming compression functionality. |
2 | |
3 | use alloc::boxed::Box; |
4 | use core::convert::TryInto; |
5 | use core::{cmp, mem}; |
6 | |
7 | use super::super::*; |
8 | use super::deflate_flags::*; |
9 | use super::CompressionLevel; |
10 | use crate::deflate::buffer::{ |
11 | update_hash, HashBuffers, LocalBuf, LZ_CODE_BUF_SIZE, LZ_DICT_FULL_SIZE, LZ_HASH_BITS, |
12 | LZ_HASH_SHIFT, LZ_HASH_SIZE, OUT_BUF_SIZE, |
13 | }; |
14 | use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER, MZ_ADLER32_INIT}; |
15 | use crate::DataFormat; |
16 | |
17 | // Currently not bubbled up outside this module, so can fill in with more |
18 | // context eventually if needed. |
19 | type Result<T, E = Error> = core::result::Result<T, E>; |
20 | struct Error {} |
21 | |
22 | const MAX_PROBES_MASK: i32 = 0xFFF; |
23 | |
24 | const MAX_SUPPORTED_HUFF_CODESIZE: usize = 32; |
25 | |
26 | /// Length code for length values. |
27 | #[rustfmt::skip] |
28 | const LEN_SYM: [u16; 256] = [ |
29 | 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, |
30 | 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, |
31 | 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, |
32 | 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, |
33 | 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, |
34 | 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, |
35 | 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, |
36 | 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, |
37 | 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, |
38 | 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, |
39 | 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, |
40 | 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, |
41 | 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, |
42 | 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, |
43 | 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, |
44 | 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 |
45 | ]; |
46 | |
47 | /// Number of extra bits for length values. |
48 | #[rustfmt::skip] |
49 | const LEN_EXTRA: [u8; 256] = [ |
50 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
51 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
52 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
53 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
54 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
55 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
56 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
57 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
58 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
59 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
60 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
61 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
62 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
63 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
64 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
65 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 |
66 | ]; |
67 | |
68 | /// Distance codes for distances smaller than 512. |
69 | #[rustfmt::skip] |
70 | const SMALL_DIST_SYM: [u8; 512] = [ |
71 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, |
72 | 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, |
73 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
74 | 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, |
75 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
76 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
77 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
78 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
79 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
80 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
81 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
82 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
83 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
84 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
85 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
86 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
87 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
88 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
89 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
90 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
91 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
92 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
93 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
94 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
95 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
96 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
97 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
98 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
99 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
100 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
101 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
102 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 |
103 | ]; |
104 | |
105 | /// Number of extra bits for distances smaller than 512. |
106 | #[rustfmt::skip] |
107 | const SMALL_DIST_EXTRA: [u8; 512] = [ |
108 | 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
109 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
110 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
111 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
112 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
113 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
114 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
115 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
116 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
117 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
118 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
119 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
120 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
121 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
122 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
123 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 |
124 | ]; |
125 | |
126 | /// Base values to calculate distances above 512. |
127 | #[rustfmt::skip] |
128 | const LARGE_DIST_SYM: [u8; 128] = [ |
129 | 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, |
130 | 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, |
131 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, |
132 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, |
133 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, |
134 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, |
135 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, |
136 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 |
137 | ]; |
138 | |
139 | /// Number of extra bits distances above 512. |
140 | #[rustfmt::skip] |
141 | const LARGE_DIST_EXTRA: [u8; 128] = [ |
142 | 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, |
143 | 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, |
144 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
145 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
146 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
147 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
148 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
149 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 |
150 | ]; |
151 | |
152 | #[rustfmt::skip] |
153 | const BITMASKS: [u32; 17] = [ |
154 | 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, |
155 | 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF |
156 | ]; |
157 | |
158 | /// The maximum number of checks for matches in the hash table the compressor will make for each |
159 | /// compression level. |
160 | const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500]; |
161 | |
162 | #[derive (Copy, Clone)] |
163 | struct SymFreq { |
164 | key: u16, |
165 | sym_index: u16, |
166 | } |
167 | |
168 | pub mod deflate_flags { |
169 | /// Whether to use a zlib wrapper. |
170 | pub const TDEFL_WRITE_ZLIB_HEADER: u32 = 0x0000_1000; |
171 | /// Should we compute the adler32 checksum. |
172 | pub const TDEFL_COMPUTE_ADLER32: u32 = 0x0000_2000; |
173 | /// Should we use greedy parsing (as opposed to lazy parsing where look ahead one or more |
174 | /// bytes to check for better matches.) |
175 | pub const TDEFL_GREEDY_PARSING_FLAG: u32 = 0x0000_4000; |
176 | /// Used in miniz to skip zero-initializing hash and dict. We don't do this here, so |
177 | /// this flag is ignored. |
178 | pub const TDEFL_NONDETERMINISTIC_PARSING_FLAG: u32 = 0x0000_8000; |
179 | /// Only look for matches with a distance of 0. |
180 | pub const TDEFL_RLE_MATCHES: u32 = 0x0001_0000; |
181 | /// Only use matches that are at least 6 bytes long. |
182 | pub const TDEFL_FILTER_MATCHES: u32 = 0x0002_0000; |
183 | /// Force the compressor to only output static blocks. (Blocks using the default huffman codes |
184 | /// specified in the deflate specification.) |
185 | pub const TDEFL_FORCE_ALL_STATIC_BLOCKS: u32 = 0x0004_0000; |
186 | /// Force the compressor to only output raw/uncompressed blocks. |
187 | pub const TDEFL_FORCE_ALL_RAW_BLOCKS: u32 = 0x0008_0000; |
188 | } |
189 | |
190 | /// Strategy setting for compression. |
191 | /// |
192 | /// The non-default settings offer some special-case compression variants. |
193 | #[repr (i32)] |
194 | #[derive (Debug, Copy, Clone, PartialEq, Eq, Hash)] |
195 | pub enum CompressionStrategy { |
196 | /// Don't use any of the special strategies. |
197 | Default = 0, |
198 | /// Only use matches that are at least 5 bytes long. |
199 | Filtered = 1, |
200 | /// Don't look for matches, only huffman encode the literals. |
201 | HuffmanOnly = 2, |
202 | /// Only look for matches with a distance of 1, i.e do run-length encoding only. |
203 | RLE = 3, |
204 | /// Only use static/fixed blocks. (Blocks using the default huffman codes |
205 | /// specified in the deflate specification.) |
206 | Fixed = 4, |
207 | } |
208 | |
209 | /// A list of deflate flush types. |
210 | #[derive (Debug, Copy, Clone, PartialEq, Eq, Hash)] |
211 | pub enum TDEFLFlush { |
212 | /// Normal operation. |
213 | /// |
214 | /// Compress as much as there is space for, and then return waiting for more input. |
215 | None = 0, |
216 | |
217 | /// Try to flush all the current data and output an empty raw block. |
218 | Sync = 2, |
219 | |
220 | /// Same as [`Sync`][Self::Sync], but reset the dictionary so that the following data does not |
221 | /// depend on previous data. |
222 | Full = 3, |
223 | |
224 | /// Try to flush everything and end the deflate stream. |
225 | /// |
226 | /// On success this will yield a [`TDEFLStatus::Done`] return status. |
227 | Finish = 4, |
228 | } |
229 | |
230 | impl From<MZFlush> for TDEFLFlush { |
231 | fn from(flush: MZFlush) -> Self { |
232 | match flush { |
233 | MZFlush::None => TDEFLFlush::None, |
234 | MZFlush::Sync => TDEFLFlush::Sync, |
235 | MZFlush::Full => TDEFLFlush::Full, |
236 | MZFlush::Finish => TDEFLFlush::Finish, |
237 | _ => TDEFLFlush::None, // TODO: ??? What to do ??? |
238 | } |
239 | } |
240 | } |
241 | |
242 | impl TDEFLFlush { |
243 | pub fn new(flush: i32) -> Result<Self, MZError> { |
244 | match flush { |
245 | 0 => Ok(TDEFLFlush::None), |
246 | 2 => Ok(TDEFLFlush::Sync), |
247 | 3 => Ok(TDEFLFlush::Full), |
248 | 4 => Ok(TDEFLFlush::Finish), |
249 | _ => Err(MZError::Param), |
250 | } |
251 | } |
252 | } |
253 | |
254 | /// Return status of compression. |
255 | #[repr (i32)] |
256 | #[derive (Debug, Copy, Clone, PartialEq, Eq, Hash)] |
257 | pub enum TDEFLStatus { |
258 | /// Usage error. |
259 | /// |
260 | /// This indicates that either the [`CompressorOxide`] experienced a previous error, or the |
261 | /// stream has already been [`TDEFLFlush::Finish`]'d. |
262 | BadParam = -2, |
263 | |
264 | /// Error putting data into output buffer. |
265 | /// |
266 | /// This usually indicates a too-small buffer. |
267 | PutBufFailed = -1, |
268 | |
269 | /// Compression succeeded normally. |
270 | Okay = 0, |
271 | |
272 | /// Compression succeeded and the deflate stream was ended. |
273 | /// |
274 | /// This is the result of calling compression with [`TDEFLFlush::Finish`]. |
275 | Done = 1, |
276 | } |
277 | |
278 | const MAX_HUFF_SYMBOLS: usize = 288; |
279 | /// Size of hash chain for fast compression mode. |
280 | const LEVEL1_HASH_SIZE_MASK: u32 = 4095; |
281 | /// The number of huffman tables used by the compressor. |
282 | /// Literal/length, Distances and Length of the huffman codes for the other two tables. |
283 | const MAX_HUFF_TABLES: usize = 3; |
284 | /// Literal/length codes |
285 | const MAX_HUFF_SYMBOLS_0: usize = 288; |
286 | /// Distance codes. |
287 | const MAX_HUFF_SYMBOLS_1: usize = 32; |
288 | /// Huffman length values. |
289 | const MAX_HUFF_SYMBOLS_2: usize = 19; |
290 | /// Size of the chained hash table. |
291 | pub(crate) const LZ_DICT_SIZE: usize = 32_768; |
292 | /// Mask used when stepping through the hash chains. |
293 | const LZ_DICT_SIZE_MASK: usize = (LZ_DICT_SIZE as u32 - 1) as usize; |
294 | /// The minimum length of a match. |
295 | const MIN_MATCH_LEN: u8 = 3; |
296 | /// The maximum length of a match. |
297 | pub(crate) const MAX_MATCH_LEN: usize = 258; |
298 | |
299 | const DEFAULT_FLAGS: u32 = NUM_PROBES[4] | TDEFL_WRITE_ZLIB_HEADER; |
300 | |
301 | mod zlib { |
302 | const DEFAULT_CM: u8 = 8; |
303 | const DEFAULT_CINFO: u8 = 7 << 4; |
304 | const _DEFAULT_FDICT: u8 = 0; |
305 | const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO; |
306 | /// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid. |
307 | const FCHECK_DIVISOR: u8 = 31; |
308 | |
309 | /// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the |
310 | /// specification, i.e (CMF*256 + FCHK) % 31 = 0. |
311 | /// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored). |
312 | fn add_fcheck(cmf: u8, flg: u8) -> u8 { |
313 | let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR); |
314 | |
315 | // Clear existing FCHECK if any |
316 | let flg = flg & 0b11100000; |
317 | |
318 | // Casting is safe as rem can't overflow since it is a value mod 31 |
319 | // We can simply add the value to flg as (31 - rem) will never be above 2^5 |
320 | flg + (FCHECK_DIVISOR - rem as u8) |
321 | } |
322 | |
323 | fn zlib_level_from_flags(flags: u32) -> u8 { |
324 | use super::NUM_PROBES; |
325 | |
326 | let num_probes = flags & (super::MAX_PROBES_MASK as u32); |
327 | if flags & super::TDEFL_GREEDY_PARSING_FLAG != 0 { |
328 | if num_probes <= 1 { |
329 | 0 |
330 | } else { |
331 | 1 |
332 | } |
333 | } else if num_probes >= NUM_PROBES[9] { |
334 | 3 |
335 | } else { |
336 | 2 |
337 | } |
338 | } |
339 | |
340 | /// Get the zlib header for the level using the default window size and no |
341 | /// dictionary. |
342 | fn header_from_level(level: u8) -> [u8; 2] { |
343 | let cmf = DEFAULT_CMF; |
344 | [cmf, add_fcheck(cmf, (level as u8) << 6)] |
345 | } |
346 | |
347 | /// Create a zlib header from the given compression flags. |
348 | /// Only level is considered. |
349 | pub fn header_from_flags(flags: u32) -> [u8; 2] { |
350 | let level = zlib_level_from_flags(flags); |
351 | header_from_level(level) |
352 | } |
353 | |
354 | #[cfg (test)] |
355 | mod test { |
356 | #[test ] |
357 | fn zlib() { |
358 | use super::super::*; |
359 | use super::*; |
360 | |
361 | let test_level = |level, expected| { |
362 | let flags = create_comp_flags_from_zip_params( |
363 | level, |
364 | MZ_DEFAULT_WINDOW_BITS, |
365 | CompressionStrategy::Default as i32, |
366 | ); |
367 | assert_eq!(zlib_level_from_flags(flags), expected); |
368 | }; |
369 | |
370 | assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2); |
371 | test_level(0, 0); |
372 | test_level(1, 0); |
373 | test_level(2, 1); |
374 | test_level(3, 1); |
375 | for i in 4..=8 { |
376 | test_level(i, 2) |
377 | } |
378 | test_level(9, 3); |
379 | test_level(10, 3); |
380 | } |
381 | |
382 | #[test ] |
383 | fn test_header() { |
384 | let header = super::header_from_level(3); |
385 | assert_eq!( |
386 | ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31, |
387 | 0 |
388 | ); |
389 | } |
390 | } |
391 | } |
392 | |
393 | fn memset<T: Copy>(slice: &mut [T], val: T) { |
394 | for x: &mut T in slice { |
395 | *x = val |
396 | } |
397 | } |
398 | |
399 | #[cfg (test)] |
400 | #[inline ] |
401 | fn write_u16_le(val: u16, slice: &mut [u8], pos: usize) { |
402 | slice[pos] = val as u8; |
403 | slice[pos + 1] = (val >> 8) as u8; |
404 | } |
405 | |
406 | // Read the two bytes starting at pos and interpret them as an u16. |
407 | #[inline ] |
408 | const fn read_u16_le(slice: &[u8], pos: usize) -> u16 { |
409 | // The compiler is smart enough to optimize this into an unaligned load. |
410 | slice[pos] as u16 | ((slice[pos + 1] as u16) << 8) |
411 | } |
412 | |
413 | /// Main compression struct. |
414 | pub struct CompressorOxide { |
415 | lz: LZOxide, |
416 | params: ParamsOxide, |
417 | huff: Box<HuffmanOxide>, |
418 | dict: DictOxide, |
419 | } |
420 | |
421 | impl CompressorOxide { |
422 | /// Create a new `CompressorOxide` with the given flags. |
423 | /// |
424 | /// # Notes |
425 | /// This function may be changed to take different parameters in the future. |
426 | pub fn new(flags: u32) -> Self { |
427 | CompressorOxide { |
428 | lz: LZOxide::new(), |
429 | params: ParamsOxide::new(flags), |
430 | /// Put HuffmanOxide on the heap with default trick to avoid |
431 | /// excessive stack copies. |
432 | huff: Box::default(), |
433 | dict: DictOxide::new(flags), |
434 | } |
435 | } |
436 | |
437 | /// Get the adler32 checksum of the currently encoded data. |
438 | pub const fn adler32(&self) -> u32 { |
439 | self.params.adler32 |
440 | } |
441 | |
442 | /// Get the return status of the previous [`compress`](fn.compress.html) |
443 | /// call with this compressor. |
444 | pub const fn prev_return_status(&self) -> TDEFLStatus { |
445 | self.params.prev_return_status |
446 | } |
447 | |
448 | /// Get the raw compressor flags. |
449 | /// |
450 | /// # Notes |
451 | /// This function may be deprecated or changed in the future to use more rust-style flags. |
452 | pub const fn flags(&self) -> i32 { |
453 | self.params.flags as i32 |
454 | } |
455 | |
456 | /// Returns whether the compressor is wrapping the data in a zlib format or not. |
457 | pub fn data_format(&self) -> DataFormat { |
458 | if (self.params.flags & TDEFL_WRITE_ZLIB_HEADER) != 0 { |
459 | DataFormat::Zlib |
460 | } else { |
461 | DataFormat::Raw |
462 | } |
463 | } |
464 | |
465 | /// Reset the state of the compressor, keeping the same parameters. |
466 | /// |
467 | /// This avoids re-allocating data. |
468 | pub fn reset(&mut self) { |
469 | // LZ buf and huffman has no settings or dynamic memory |
470 | // that needs to be saved, so we simply replace them. |
471 | self.lz = LZOxide::new(); |
472 | self.params.reset(); |
473 | *self.huff = HuffmanOxide::default(); |
474 | self.dict.reset(); |
475 | } |
476 | |
477 | /// Set the compression level of the compressor. |
478 | /// |
479 | /// Using this to change level after compression has started is supported. |
480 | /// # Notes |
481 | /// The compression strategy will be reset to the default one when this is called. |
482 | pub fn set_compression_level(&mut self, level: CompressionLevel) { |
483 | let format = self.data_format(); |
484 | self.set_format_and_level(format, level as u8); |
485 | } |
486 | |
487 | /// Set the compression level of the compressor using an integer value. |
488 | /// |
489 | /// Using this to change level after compression has started is supported. |
490 | /// # Notes |
491 | /// The compression strategy will be reset to the default one when this is called. |
492 | pub fn set_compression_level_raw(&mut self, level: u8) { |
493 | let format = self.data_format(); |
494 | self.set_format_and_level(format, level); |
495 | } |
496 | |
497 | /// Update the compression settings of the compressor. |
498 | /// |
499 | /// Changing the `DataFormat` after compression has started will result in |
500 | /// a corrupted stream. |
501 | /// |
502 | /// # Notes |
503 | /// This function mainly intended for setting the initial settings after e.g creating with |
504 | /// `default` or after calling `CompressorOxide::reset()`, and behaviour may be changed |
505 | /// to disallow calling it after starting compression in the future. |
506 | pub fn set_format_and_level(&mut self, data_format: DataFormat, level: u8) { |
507 | let flags = create_comp_flags_from_zip_params( |
508 | level.into(), |
509 | data_format.to_window_bits(), |
510 | CompressionStrategy::Default as i32, |
511 | ); |
512 | self.params.update_flags(flags); |
513 | self.dict.update_flags(flags); |
514 | } |
515 | } |
516 | |
517 | impl Default for CompressorOxide { |
518 | /// Initialize the compressor with a level of 4, zlib wrapper and |
519 | /// the default strategy. |
520 | fn default() -> Self { |
521 | CompressorOxide { |
522 | lz: LZOxide::new(), |
523 | params: ParamsOxide::new(DEFAULT_FLAGS), |
524 | /// Put HuffmanOxide on the heap with default trick to avoid |
525 | /// excessive stack copies. |
526 | huff: Box::default(), |
527 | dict: DictOxide::new(DEFAULT_FLAGS), |
528 | } |
529 | } |
530 | } |
531 | |
532 | /// Callback function and user used in `compress_to_output`. |
533 | pub struct CallbackFunc<'a> { |
534 | pub put_buf_func: &'a mut dyn FnMut(&[u8]) -> bool, |
535 | } |
536 | |
537 | impl<'a> CallbackFunc<'a> { |
538 | fn flush_output( |
539 | &mut self, |
540 | saved_output: SavedOutputBufferOxide, |
541 | params: &mut ParamsOxide, |
542 | ) -> i32 { |
543 | // TODO: As this could be unsafe since |
544 | // we can't verify the function pointer |
545 | // this whole function should maybe be unsafe as well. |
546 | let call_success: bool = (self.put_buf_func)(¶ms.local_buf.b[0..saved_output.pos as usize]); |
547 | |
548 | if !call_success { |
549 | params.prev_return_status = TDEFLStatus::PutBufFailed; |
550 | return params.prev_return_status as i32; |
551 | } |
552 | |
553 | params.flush_remaining as i32 |
554 | } |
555 | } |
556 | |
557 | struct CallbackBuf<'a> { |
558 | pub out_buf: &'a mut [u8], |
559 | } |
560 | |
561 | impl<'a> CallbackBuf<'a> { |
562 | fn flush_output( |
563 | &mut self, |
564 | saved_output: SavedOutputBufferOxide, |
565 | params: &mut ParamsOxide, |
566 | ) -> i32 { |
567 | if saved_output.local { |
568 | let n = cmp::min( |
569 | saved_output.pos as usize, |
570 | self.out_buf.len() - params.out_buf_ofs, |
571 | ); |
572 | (&mut self.out_buf[params.out_buf_ofs..params.out_buf_ofs + n]) |
573 | .copy_from_slice(¶ms.local_buf.b[..n]); |
574 | |
575 | params.out_buf_ofs += n; |
576 | if saved_output.pos != n { |
577 | params.flush_ofs = n as u32; |
578 | params.flush_remaining = (saved_output.pos - n) as u32; |
579 | } |
580 | } else { |
581 | params.out_buf_ofs += saved_output.pos; |
582 | } |
583 | |
584 | params.flush_remaining as i32 |
585 | } |
586 | } |
587 | |
588 | enum CallbackOut<'a> { |
589 | Func(CallbackFunc<'a>), |
590 | Buf(CallbackBuf<'a>), |
591 | } |
592 | |
593 | impl<'a> CallbackOut<'a> { |
594 | fn new_output_buffer<'b>( |
595 | &'b mut self, |
596 | local_buf: &'b mut [u8], |
597 | out_buf_ofs: usize, |
598 | ) -> OutputBufferOxide<'b> { |
599 | let is_local; |
600 | let buf_len = OUT_BUF_SIZE - 16; |
601 | let chosen_buffer = match *self { |
602 | CallbackOut::Buf(ref mut cb) if cb.out_buf.len() - out_buf_ofs >= OUT_BUF_SIZE => { |
603 | is_local = false; |
604 | &mut cb.out_buf[out_buf_ofs..out_buf_ofs + buf_len] |
605 | } |
606 | _ => { |
607 | is_local = true; |
608 | &mut local_buf[..buf_len] |
609 | } |
610 | }; |
611 | |
612 | OutputBufferOxide { |
613 | inner: chosen_buffer, |
614 | inner_pos: 0, |
615 | local: is_local, |
616 | bit_buffer: 0, |
617 | bits_in: 0, |
618 | } |
619 | } |
620 | } |
621 | |
622 | struct CallbackOxide<'a> { |
623 | in_buf: Option<&'a [u8]>, |
624 | in_buf_size: Option<&'a mut usize>, |
625 | out_buf_size: Option<&'a mut usize>, |
626 | out: CallbackOut<'a>, |
627 | } |
628 | |
629 | impl<'a> CallbackOxide<'a> { |
630 | fn new_callback_buf(in_buf: &'a [u8], out_buf: &'a mut [u8]) -> Self { |
631 | CallbackOxide { |
632 | in_buf: Some(in_buf), |
633 | in_buf_size: None, |
634 | out_buf_size: None, |
635 | out: CallbackOut::Buf(CallbackBuf { out_buf }), |
636 | } |
637 | } |
638 | |
639 | fn new_callback_func(in_buf: &'a [u8], callback_func: CallbackFunc<'a>) -> Self { |
640 | CallbackOxide { |
641 | in_buf: Some(in_buf), |
642 | in_buf_size: None, |
643 | out_buf_size: None, |
644 | out: CallbackOut::Func(callback_func), |
645 | } |
646 | } |
647 | |
648 | fn update_size(&mut self, in_size: Option<usize>, out_size: Option<usize>) { |
649 | if let (Some(in_size), Some(size)) = (in_size, self.in_buf_size.as_mut()) { |
650 | **size = in_size; |
651 | } |
652 | |
653 | if let (Some(out_size), Some(size)) = (out_size, self.out_buf_size.as_mut()) { |
654 | **size = out_size |
655 | } |
656 | } |
657 | |
658 | fn flush_output( |
659 | &mut self, |
660 | saved_output: SavedOutputBufferOxide, |
661 | params: &mut ParamsOxide, |
662 | ) -> i32 { |
663 | if saved_output.pos == 0 { |
664 | return params.flush_remaining as i32; |
665 | } |
666 | |
667 | self.update_size(Some(params.src_pos), None); |
668 | match self.out { |
669 | CallbackOut::Func(ref mut cf) => cf.flush_output(saved_output, params), |
670 | CallbackOut::Buf(ref mut cb) => cb.flush_output(saved_output, params), |
671 | } |
672 | } |
673 | } |
674 | |
675 | struct OutputBufferOxide<'a> { |
676 | pub inner: &'a mut [u8], |
677 | pub inner_pos: usize, |
678 | pub local: bool, |
679 | |
680 | pub bit_buffer: u32, |
681 | pub bits_in: u32, |
682 | } |
683 | |
684 | impl<'a> OutputBufferOxide<'a> { |
685 | fn put_bits(&mut self, bits: u32, len: u32) { |
686 | assert!(bits <= ((1u32 << len) - 1u32)); |
687 | self.bit_buffer |= bits << self.bits_in; |
688 | self.bits_in += len; |
689 | while self.bits_in >= 8 { |
690 | self.inner[self.inner_pos] = self.bit_buffer as u8; |
691 | self.inner_pos += 1; |
692 | self.bit_buffer >>= 8; |
693 | self.bits_in -= 8; |
694 | } |
695 | } |
696 | |
697 | const fn save(&self) -> SavedOutputBufferOxide { |
698 | SavedOutputBufferOxide { |
699 | pos: self.inner_pos, |
700 | bit_buffer: self.bit_buffer, |
701 | bits_in: self.bits_in, |
702 | local: self.local, |
703 | } |
704 | } |
705 | |
706 | fn load(&mut self, saved: SavedOutputBufferOxide) { |
707 | self.inner_pos = saved.pos; |
708 | self.bit_buffer = saved.bit_buffer; |
709 | self.bits_in = saved.bits_in; |
710 | self.local = saved.local; |
711 | } |
712 | |
713 | fn pad_to_bytes(&mut self) { |
714 | if self.bits_in != 0 { |
715 | let len = 8 - self.bits_in; |
716 | self.put_bits(0, len); |
717 | } |
718 | } |
719 | } |
720 | |
721 | struct SavedOutputBufferOxide { |
722 | pub pos: usize, |
723 | pub bit_buffer: u32, |
724 | pub bits_in: u32, |
725 | pub local: bool, |
726 | } |
727 | |
728 | struct BitBuffer { |
729 | pub bit_buffer: u64, |
730 | pub bits_in: u32, |
731 | } |
732 | |
733 | impl BitBuffer { |
734 | fn put_fast(&mut self, bits: u64, len: u32) { |
735 | self.bit_buffer |= bits << self.bits_in; |
736 | self.bits_in += len; |
737 | } |
738 | |
739 | fn flush(&mut self, output: &mut OutputBufferOxide) -> Result<()> { |
740 | let pos: usize = output.inner_pos; |
741 | { |
742 | // isolation to please borrow checker |
743 | let inner: &mut [u8] = &mut output.inner[pos..pos + 8]; |
744 | let bytes: [u8; 8] = u64::to_le_bytes(self.bit_buffer); |
745 | inner.copy_from_slice(&bytes); |
746 | } |
747 | match output.inner_pos.checked_add((self.bits_in >> 3) as usize) { |
748 | Some(n: usize) if n <= output.inner.len() => output.inner_pos = n, |
749 | _ => return Err(Error {}), |
750 | } |
751 | self.bit_buffer >>= self.bits_in & !7; |
752 | self.bits_in &= 7; |
753 | Ok(()) |
754 | } |
755 | } |
756 | |
757 | /// A struct containing data about huffman codes and symbol frequencies. |
758 | /// |
759 | /// NOTE: Only the literal/lengths have enough symbols to actually use |
760 | /// the full array. It's unclear why it's defined like this in miniz, |
761 | /// it could be for cache/alignment reasons. |
762 | struct HuffmanOxide { |
763 | /// Number of occurrences of each symbol. |
764 | pub count: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
765 | /// The bits of the huffman code assigned to the symbol |
766 | pub codes: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
767 | /// The length of the huffman code assigned to the symbol. |
768 | pub code_sizes: [[u8; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
769 | } |
770 | |
771 | /// Tables used for literal/lengths in `HuffmanOxide`. |
772 | const LITLEN_TABLE: usize = 0; |
773 | /// Tables for distances. |
774 | const DIST_TABLE: usize = 1; |
775 | /// Tables for the run-length encoded huffman lengths for literals/lengths/distances. |
776 | const HUFF_CODES_TABLE: usize = 2; |
777 | |
778 | /// Status of RLE encoding of huffman code lengths. |
779 | struct Rle { |
780 | pub z_count: u32, |
781 | pub repeat_count: u32, |
782 | pub prev_code_size: u8, |
783 | } |
784 | |
785 | impl Rle { |
786 | fn prev_code_size( |
787 | &mut self, |
788 | packed_code_sizes: &mut [u8], |
789 | packed_pos: &mut usize, |
790 | h: &mut HuffmanOxide, |
791 | ) -> Result<()> { |
792 | let mut write = |buf| write(buf, packed_code_sizes, packed_pos); |
793 | let counts = &mut h.count[HUFF_CODES_TABLE]; |
794 | if self.repeat_count != 0 { |
795 | if self.repeat_count < 3 { |
796 | counts[self.prev_code_size as usize] = |
797 | counts[self.prev_code_size as usize].wrapping_add(self.repeat_count as u16); |
798 | let code = self.prev_code_size; |
799 | write(&[code, code, code][..self.repeat_count as usize])?; |
800 | } else { |
801 | counts[16] = counts[16].wrapping_add(1); |
802 | write(&[16, (self.repeat_count - 3) as u8][..])?; |
803 | } |
804 | self.repeat_count = 0; |
805 | } |
806 | |
807 | Ok(()) |
808 | } |
809 | |
810 | fn zero_code_size( |
811 | &mut self, |
812 | packed_code_sizes: &mut [u8], |
813 | packed_pos: &mut usize, |
814 | h: &mut HuffmanOxide, |
815 | ) -> Result<()> { |
816 | let mut write = |buf| write(buf, packed_code_sizes, packed_pos); |
817 | let counts = &mut h.count[HUFF_CODES_TABLE]; |
818 | if self.z_count != 0 { |
819 | if self.z_count < 3 { |
820 | counts[0] = counts[0].wrapping_add(self.z_count as u16); |
821 | write(&[0, 0, 0][..self.z_count as usize])?; |
822 | } else if self.z_count <= 10 { |
823 | counts[17] = counts[17].wrapping_add(1); |
824 | write(&[17, (self.z_count - 3) as u8][..])?; |
825 | } else { |
826 | counts[18] = counts[18].wrapping_add(1); |
827 | write(&[18, (self.z_count - 11) as u8][..])?; |
828 | } |
829 | self.z_count = 0; |
830 | } |
831 | |
832 | Ok(()) |
833 | } |
834 | } |
835 | |
836 | fn write(src: &[u8], dst: &mut [u8], dst_pos: &mut usize) -> Result<()> { |
837 | match dst.get_mut(*dst_pos..*dst_pos + src.len()) { |
838 | Some(s: &mut [u8]) => s.copy_from_slice(src), |
839 | None => return Err(Error {}), |
840 | } |
841 | *dst_pos += src.len(); |
842 | Ok(()) |
843 | } |
844 | |
845 | impl Default for HuffmanOxide { |
846 | fn default() -> Self { |
847 | HuffmanOxide { |
848 | count: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
849 | codes: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
850 | code_sizes: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
851 | } |
852 | } |
853 | } |
854 | |
855 | impl HuffmanOxide { |
856 | fn radix_sort_symbols<'a>( |
857 | symbols0: &'a mut [SymFreq], |
858 | symbols1: &'a mut [SymFreq], |
859 | ) -> &'a mut [SymFreq] { |
860 | let mut hist = [[0; 256]; 2]; |
861 | |
862 | for freq in symbols0.iter() { |
863 | hist[0][(freq.key & 0xFF) as usize] += 1; |
864 | hist[1][((freq.key >> 8) & 0xFF) as usize] += 1; |
865 | } |
866 | |
867 | let mut n_passes = 2; |
868 | if symbols0.len() == hist[1][0] { |
869 | n_passes -= 1; |
870 | } |
871 | |
872 | let mut current_symbols = symbols0; |
873 | let mut new_symbols = symbols1; |
874 | |
875 | for (pass, hist_item) in hist.iter().enumerate().take(n_passes) { |
876 | let mut offsets = [0; 256]; |
877 | let mut offset = 0; |
878 | for i in 0..256 { |
879 | offsets[i] = offset; |
880 | offset += hist_item[i]; |
881 | } |
882 | |
883 | for sym in current_symbols.iter() { |
884 | let j = ((sym.key >> (pass * 8)) & 0xFF) as usize; |
885 | new_symbols[offsets[j]] = *sym; |
886 | offsets[j] += 1; |
887 | } |
888 | |
889 | mem::swap(&mut current_symbols, &mut new_symbols); |
890 | } |
891 | |
892 | current_symbols |
893 | } |
894 | |
895 | fn calculate_minimum_redundancy(symbols: &mut [SymFreq]) { |
896 | match symbols.len() { |
897 | 0 => (), |
898 | 1 => symbols[0].key = 1, |
899 | n => { |
900 | symbols[0].key += symbols[1].key; |
901 | let mut root = 0; |
902 | let mut leaf = 2; |
903 | for next in 1..n - 1 { |
904 | if (leaf >= n) || (symbols[root].key < symbols[leaf].key) { |
905 | symbols[next].key = symbols[root].key; |
906 | symbols[root].key = next as u16; |
907 | root += 1; |
908 | } else { |
909 | symbols[next].key = symbols[leaf].key; |
910 | leaf += 1; |
911 | } |
912 | |
913 | if (leaf >= n) || (root < next && symbols[root].key < symbols[leaf].key) { |
914 | symbols[next].key = symbols[next].key.wrapping_add(symbols[root].key); |
915 | symbols[root].key = next as u16; |
916 | root += 1; |
917 | } else { |
918 | symbols[next].key = symbols[next].key.wrapping_add(symbols[leaf].key); |
919 | leaf += 1; |
920 | } |
921 | } |
922 | |
923 | symbols[n - 2].key = 0; |
924 | for next in (0..n - 2).rev() { |
925 | symbols[next].key = symbols[symbols[next].key as usize].key + 1; |
926 | } |
927 | |
928 | let mut avbl = 1; |
929 | let mut used = 0; |
930 | let mut dpth = 0; |
931 | let mut root = (n - 2) as i32; |
932 | let mut next = (n - 1) as i32; |
933 | while avbl > 0 { |
934 | while (root >= 0) && (symbols[root as usize].key == dpth) { |
935 | used += 1; |
936 | root -= 1; |
937 | } |
938 | while avbl > used { |
939 | symbols[next as usize].key = dpth; |
940 | next -= 1; |
941 | avbl -= 1; |
942 | } |
943 | avbl = 2 * used; |
944 | dpth += 1; |
945 | used = 0; |
946 | } |
947 | } |
948 | } |
949 | } |
950 | |
951 | fn enforce_max_code_size(num_codes: &mut [i32], code_list_len: usize, max_code_size: usize) { |
952 | if code_list_len <= 1 { |
953 | return; |
954 | } |
955 | |
956 | num_codes[max_code_size] += num_codes[max_code_size + 1..].iter().sum::<i32>(); |
957 | let total = num_codes[1..=max_code_size] |
958 | .iter() |
959 | .rev() |
960 | .enumerate() |
961 | .fold(0u32, |total, (i, &x)| total + ((x as u32) << i)); |
962 | |
963 | for _ in (1 << max_code_size)..total { |
964 | num_codes[max_code_size] -= 1; |
965 | for i in (1..max_code_size).rev() { |
966 | if num_codes[i] != 0 { |
967 | num_codes[i] -= 1; |
968 | num_codes[i + 1] += 2; |
969 | break; |
970 | } |
971 | } |
972 | } |
973 | } |
974 | |
975 | fn optimize_table( |
976 | &mut self, |
977 | table_num: usize, |
978 | table_len: usize, |
979 | code_size_limit: usize, |
980 | static_table: bool, |
981 | ) { |
982 | let mut num_codes = [0i32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; |
983 | let mut next_code = [0u32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; |
984 | |
985 | if static_table { |
986 | for &code_size in &self.code_sizes[table_num][..table_len] { |
987 | num_codes[code_size as usize] += 1; |
988 | } |
989 | } else { |
990 | let mut symbols0 = [SymFreq { |
991 | key: 0, |
992 | sym_index: 0, |
993 | }; MAX_HUFF_SYMBOLS]; |
994 | let mut symbols1 = [SymFreq { |
995 | key: 0, |
996 | sym_index: 0, |
997 | }; MAX_HUFF_SYMBOLS]; |
998 | |
999 | let mut num_used_symbols = 0; |
1000 | for i in 0..table_len { |
1001 | if self.count[table_num][i] != 0 { |
1002 | symbols0[num_used_symbols] = SymFreq { |
1003 | key: self.count[table_num][i], |
1004 | sym_index: i as u16, |
1005 | }; |
1006 | num_used_symbols += 1; |
1007 | } |
1008 | } |
1009 | |
1010 | let symbols = Self::radix_sort_symbols( |
1011 | &mut symbols0[..num_used_symbols], |
1012 | &mut symbols1[..num_used_symbols], |
1013 | ); |
1014 | Self::calculate_minimum_redundancy(symbols); |
1015 | |
1016 | for symbol in symbols.iter() { |
1017 | num_codes[symbol.key as usize] += 1; |
1018 | } |
1019 | |
1020 | Self::enforce_max_code_size(&mut num_codes, num_used_symbols, code_size_limit); |
1021 | |
1022 | memset(&mut self.code_sizes[table_num][..], 0); |
1023 | memset(&mut self.codes[table_num][..], 0); |
1024 | |
1025 | let mut last = num_used_symbols; |
1026 | for (i, &num_item) in num_codes |
1027 | .iter() |
1028 | .enumerate() |
1029 | .take(code_size_limit + 1) |
1030 | .skip(1) |
1031 | { |
1032 | let first = last - num_item as usize; |
1033 | for symbol in &symbols[first..last] { |
1034 | self.code_sizes[table_num][symbol.sym_index as usize] = i as u8; |
1035 | } |
1036 | last = first; |
1037 | } |
1038 | } |
1039 | |
1040 | let mut j = 0; |
1041 | next_code[1] = 0; |
1042 | for i in 2..=code_size_limit { |
1043 | j = (j + num_codes[i - 1]) << 1; |
1044 | next_code[i] = j as u32; |
1045 | } |
1046 | |
1047 | for (&code_size, huff_code) in self.code_sizes[table_num] |
1048 | .iter() |
1049 | .take(table_len) |
1050 | .zip(self.codes[table_num].iter_mut().take(table_len)) |
1051 | { |
1052 | if code_size == 0 { |
1053 | continue; |
1054 | } |
1055 | |
1056 | let mut code = next_code[code_size as usize]; |
1057 | next_code[code_size as usize] += 1; |
1058 | |
1059 | let mut rev_code = 0; |
1060 | for _ in 0..code_size { |
1061 | rev_code = (rev_code << 1) | (code & 1); |
1062 | code >>= 1; |
1063 | } |
1064 | *huff_code = rev_code as u16; |
1065 | } |
1066 | } |
1067 | |
1068 | fn start_static_block(&mut self, output: &mut OutputBufferOxide) { |
1069 | memset(&mut self.code_sizes[LITLEN_TABLE][0..144], 8); |
1070 | memset(&mut self.code_sizes[LITLEN_TABLE][144..256], 9); |
1071 | memset(&mut self.code_sizes[LITLEN_TABLE][256..280], 7); |
1072 | memset(&mut self.code_sizes[LITLEN_TABLE][280..288], 8); |
1073 | |
1074 | memset(&mut self.code_sizes[DIST_TABLE][..32], 5); |
1075 | |
1076 | self.optimize_table(LITLEN_TABLE, 288, 15, true); |
1077 | self.optimize_table(DIST_TABLE, 32, 15, true); |
1078 | |
1079 | output.put_bits(0b01, 2) |
1080 | } |
1081 | |
1082 | fn start_dynamic_block(&mut self, output: &mut OutputBufferOxide) -> Result<()> { |
1083 | // There will always be one, and only one end of block code. |
1084 | self.count[0][256] = 1; |
1085 | |
1086 | self.optimize_table(0, MAX_HUFF_SYMBOLS_0, 15, false); |
1087 | self.optimize_table(1, MAX_HUFF_SYMBOLS_1, 15, false); |
1088 | |
1089 | let num_lit_codes = 286 |
1090 | - &self.code_sizes[0][257..286] |
1091 | .iter() |
1092 | .rev() |
1093 | .take_while(|&x| *x == 0) |
1094 | .count(); |
1095 | |
1096 | let num_dist_codes = 30 |
1097 | - &self.code_sizes[1][1..30] |
1098 | .iter() |
1099 | .rev() |
1100 | .take_while(|&x| *x == 0) |
1101 | .count(); |
1102 | |
1103 | let mut code_sizes_to_pack = [0u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1]; |
1104 | let mut packed_code_sizes = [0u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1]; |
1105 | |
1106 | let total_code_sizes_to_pack = num_lit_codes + num_dist_codes; |
1107 | |
1108 | code_sizes_to_pack[..num_lit_codes].copy_from_slice(&self.code_sizes[0][..num_lit_codes]); |
1109 | |
1110 | code_sizes_to_pack[num_lit_codes..total_code_sizes_to_pack] |
1111 | .copy_from_slice(&self.code_sizes[1][..num_dist_codes]); |
1112 | |
1113 | let mut rle = Rle { |
1114 | z_count: 0, |
1115 | repeat_count: 0, |
1116 | prev_code_size: 0xFF, |
1117 | }; |
1118 | |
1119 | memset(&mut self.count[HUFF_CODES_TABLE][..MAX_HUFF_SYMBOLS_2], 0); |
1120 | |
1121 | let mut packed_pos = 0; |
1122 | for &code_size in &code_sizes_to_pack[..total_code_sizes_to_pack] { |
1123 | if code_size == 0 { |
1124 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1125 | rle.z_count += 1; |
1126 | if rle.z_count == 138 { |
1127 | rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1128 | } |
1129 | } else { |
1130 | rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1131 | if code_size != rle.prev_code_size { |
1132 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1133 | self.count[HUFF_CODES_TABLE][code_size as usize] = |
1134 | self.count[HUFF_CODES_TABLE][code_size as usize].wrapping_add(1); |
1135 | write(&[code_size], &mut packed_code_sizes, &mut packed_pos)?; |
1136 | } else { |
1137 | rle.repeat_count += 1; |
1138 | if rle.repeat_count == 6 { |
1139 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1140 | } |
1141 | } |
1142 | } |
1143 | rle.prev_code_size = code_size; |
1144 | } |
1145 | |
1146 | if rle.repeat_count != 0 { |
1147 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1148 | } else { |
1149 | rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
1150 | } |
1151 | |
1152 | self.optimize_table(2, MAX_HUFF_SYMBOLS_2, 7, false); |
1153 | |
1154 | output.put_bits(2, 2); |
1155 | |
1156 | output.put_bits((num_lit_codes - 257) as u32, 5); |
1157 | output.put_bits((num_dist_codes - 1) as u32, 5); |
1158 | |
1159 | let mut num_bit_lengths = 18 |
1160 | - HUFFMAN_LENGTH_ORDER |
1161 | .iter() |
1162 | .rev() |
1163 | .take_while(|&swizzle| self.code_sizes[HUFF_CODES_TABLE][*swizzle as usize] == 0) |
1164 | .count(); |
1165 | |
1166 | num_bit_lengths = cmp::max(4, num_bit_lengths + 1); |
1167 | output.put_bits(num_bit_lengths as u32 - 4, 4); |
1168 | for &swizzle in &HUFFMAN_LENGTH_ORDER[..num_bit_lengths] { |
1169 | output.put_bits( |
1170 | u32::from(self.code_sizes[HUFF_CODES_TABLE][swizzle as usize]), |
1171 | 3, |
1172 | ); |
1173 | } |
1174 | |
1175 | let mut packed_code_size_index = 0; |
1176 | while packed_code_size_index < packed_pos { |
1177 | let code = packed_code_sizes[packed_code_size_index] as usize; |
1178 | packed_code_size_index += 1; |
1179 | assert!(code < MAX_HUFF_SYMBOLS_2); |
1180 | output.put_bits( |
1181 | u32::from(self.codes[HUFF_CODES_TABLE][code]), |
1182 | u32::from(self.code_sizes[HUFF_CODES_TABLE][code]), |
1183 | ); |
1184 | if code >= 16 { |
1185 | output.put_bits( |
1186 | u32::from(packed_code_sizes[packed_code_size_index]), |
1187 | [2, 3, 7][code - 16], |
1188 | ); |
1189 | packed_code_size_index += 1; |
1190 | } |
1191 | } |
1192 | |
1193 | Ok(()) |
1194 | } |
1195 | } |
1196 | |
1197 | struct DictOxide { |
1198 | /// The maximum number of checks in the hash chain, for the initial, |
1199 | /// and the lazy match respectively. |
1200 | pub max_probes: [u32; 2], |
1201 | /// Buffer of input data. |
1202 | /// Padded with 1 byte to simplify matching code in `compress_fast`. |
1203 | pub b: Box<HashBuffers>, |
1204 | |
1205 | pub code_buf_dict_pos: usize, |
1206 | pub lookahead_size: usize, |
1207 | pub lookahead_pos: usize, |
1208 | pub size: usize, |
1209 | } |
1210 | |
1211 | const fn probes_from_flags(flags: u32) -> [u32; 2] { |
1212 | [ |
1213 | 1 + ((flags & 0xFFF) + 2) / 3, |
1214 | 1 + (((flags & 0xFFF) >> 2) + 2) / 3, |
1215 | ] |
1216 | } |
1217 | |
1218 | impl DictOxide { |
1219 | fn new(flags: u32) -> Self { |
1220 | DictOxide { |
1221 | max_probes: probes_from_flags(flags), |
1222 | b: Box::default(), |
1223 | code_buf_dict_pos: 0, |
1224 | lookahead_size: 0, |
1225 | lookahead_pos: 0, |
1226 | size: 0, |
1227 | } |
1228 | } |
1229 | |
1230 | fn update_flags(&mut self, flags: u32) { |
1231 | self.max_probes = probes_from_flags(flags); |
1232 | } |
1233 | |
1234 | fn reset(&mut self) { |
1235 | self.b.reset(); |
1236 | self.code_buf_dict_pos = 0; |
1237 | self.lookahead_size = 0; |
1238 | self.lookahead_pos = 0; |
1239 | self.size = 0; |
1240 | } |
1241 | |
1242 | /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of |
1243 | /// type T. |
1244 | #[inline ] |
1245 | fn read_unaligned_u32(&self, pos: usize) -> u32 { |
1246 | // Masking the value here helps avoid bounds checks. |
1247 | let pos = (pos & LZ_DICT_SIZE_MASK) as usize; |
1248 | let end = pos + 4; |
1249 | // Somehow this assertion makes things faster. |
1250 | assert!(end < LZ_DICT_FULL_SIZE); |
1251 | |
1252 | let bytes: [u8; 4] = self.b.dict[pos..end].try_into().unwrap(); |
1253 | u32::from_le_bytes(bytes) |
1254 | } |
1255 | |
1256 | /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of |
1257 | /// type T. |
1258 | #[inline ] |
1259 | fn read_unaligned_u64(&self, pos: usize) -> u64 { |
1260 | let pos = pos as usize; |
1261 | let bytes: [u8; 8] = self.b.dict[pos..pos + 8].try_into().unwrap(); |
1262 | u64::from_le_bytes(bytes) |
1263 | } |
1264 | |
1265 | /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of |
1266 | /// type T. |
1267 | #[inline ] |
1268 | fn read_as_u16(&self, pos: usize) -> u16 { |
1269 | read_u16_le(&self.b.dict[..], pos) |
1270 | } |
1271 | |
1272 | /// Try to find a match for the data at lookahead_pos in the dictionary that is |
1273 | /// longer than `match_len`. |
1274 | /// Returns a tuple containing (match_distance, match_length). Will be equal to the input |
1275 | /// values if no better matches were found. |
1276 | fn find_match( |
1277 | &self, |
1278 | lookahead_pos: usize, |
1279 | max_dist: usize, |
1280 | max_match_len: u32, |
1281 | mut match_dist: u32, |
1282 | mut match_len: u32, |
1283 | ) -> (u32, u32) { |
1284 | // Clamp the match len and max_match_len to be valid. (It should be when this is called, but |
1285 | // do it for now just in case for safety reasons.) |
1286 | // This should normally end up as at worst conditional moves, |
1287 | // so it shouldn't slow us down much. |
1288 | // TODO: Statically verify these so we don't need to do this. |
1289 | let max_match_len = cmp::min(MAX_MATCH_LEN as u32, max_match_len); |
1290 | match_len = cmp::max(match_len, 1); |
1291 | |
1292 | let pos = lookahead_pos as usize & LZ_DICT_SIZE_MASK; |
1293 | let mut probe_pos = pos; |
1294 | // Number of probes into the hash chains. |
1295 | let mut num_probes_left = self.max_probes[(match_len >= 32) as usize]; |
1296 | |
1297 | // If we already have a match of the full length don't bother searching for another one. |
1298 | if max_match_len <= match_len { |
1299 | return (match_dist, match_len); |
1300 | } |
1301 | |
1302 | // Read the last byte of the current match, and the next one, used to compare matches. |
1303 | let mut c01: u16 = self.read_as_u16(pos as usize + match_len as usize - 1); |
1304 | // Read the two bytes at the end position of the current match. |
1305 | let s01: u16 = self.read_as_u16(pos as usize); |
1306 | |
1307 | 'outer: loop { |
1308 | let mut dist; |
1309 | 'found: loop { |
1310 | num_probes_left -= 1; |
1311 | if num_probes_left == 0 { |
1312 | // We have done as many probes in the hash chain as the current compression |
1313 | // settings allow, so return the best match we found, if any. |
1314 | return (match_dist, match_len); |
1315 | } |
1316 | |
1317 | for _ in 0..3 { |
1318 | let next_probe_pos = self.b.next[probe_pos as usize] as usize; |
1319 | |
1320 | dist = (lookahead_pos - next_probe_pos) & 0xFFFF; |
1321 | if next_probe_pos == 0 || dist > max_dist { |
1322 | // We reached the end of the hash chain, or the next value is further away |
1323 | // than the maximum allowed distance, so return the best match we found, if |
1324 | // any. |
1325 | return (match_dist, match_len); |
1326 | } |
1327 | |
1328 | // Mask the position value to get the position in the hash chain of the next |
1329 | // position to match against. |
1330 | probe_pos = next_probe_pos & LZ_DICT_SIZE_MASK; |
1331 | |
1332 | if self.read_as_u16((probe_pos + match_len as usize - 1) as usize) == c01 { |
1333 | break 'found; |
1334 | } |
1335 | } |
1336 | } |
1337 | |
1338 | if dist == 0 { |
1339 | // We've looked through the whole match range, so return the best match we |
1340 | // found. |
1341 | return (match_dist, match_len); |
1342 | } |
1343 | |
1344 | // Check if the two first bytes match. |
1345 | if self.read_as_u16(probe_pos as usize) != s01 { |
1346 | continue; |
1347 | } |
1348 | |
1349 | let mut p = pos + 2; |
1350 | let mut q = probe_pos + 2; |
1351 | // The first two bytes matched, so check the full length of the match. |
1352 | for _ in 0..32 { |
1353 | let p_data: u64 = self.read_unaligned_u64(p); |
1354 | let q_data: u64 = self.read_unaligned_u64(q); |
1355 | // Compare of 8 bytes at a time by using unaligned loads of 64-bit integers. |
1356 | let xor_data = p_data ^ q_data; |
1357 | if xor_data == 0 { |
1358 | p += 8; |
1359 | q += 8; |
1360 | } else { |
1361 | // If not all of the last 8 bytes matched, check how may of them did. |
1362 | let trailing = xor_data.trailing_zeros(); |
1363 | |
1364 | let probe_len = p - pos + (trailing as usize >> 3); |
1365 | if probe_len > match_len as usize { |
1366 | match_dist = dist as u32; |
1367 | match_len = cmp::min(max_match_len, probe_len as u32); |
1368 | if match_len == max_match_len { |
1369 | // We found a match that had the maximum allowed length, |
1370 | // so there is now point searching further. |
1371 | return (match_dist, match_len); |
1372 | } |
1373 | // We found a better match, so save the last two bytes for further match |
1374 | // comparisons. |
1375 | c01 = self.read_as_u16(pos + match_len as usize - 1) |
1376 | } |
1377 | continue 'outer; |
1378 | } |
1379 | } |
1380 | |
1381 | return (dist as u32, cmp::min(max_match_len, MAX_MATCH_LEN as u32)); |
1382 | } |
1383 | } |
1384 | } |
1385 | |
1386 | struct ParamsOxide { |
1387 | pub flags: u32, |
1388 | pub greedy_parsing: bool, |
1389 | pub block_index: u32, |
1390 | |
1391 | pub saved_match_dist: u32, |
1392 | pub saved_match_len: u32, |
1393 | pub saved_lit: u8, |
1394 | |
1395 | pub flush: TDEFLFlush, |
1396 | pub flush_ofs: u32, |
1397 | pub flush_remaining: u32, |
1398 | pub finished: bool, |
1399 | |
1400 | pub adler32: u32, |
1401 | |
1402 | pub src_pos: usize, |
1403 | |
1404 | pub out_buf_ofs: usize, |
1405 | pub prev_return_status: TDEFLStatus, |
1406 | |
1407 | pub saved_bit_buffer: u32, |
1408 | pub saved_bits_in: u32, |
1409 | |
1410 | pub local_buf: Box<LocalBuf>, |
1411 | } |
1412 | |
1413 | impl ParamsOxide { |
1414 | fn new(flags: u32) -> Self { |
1415 | ParamsOxide { |
1416 | flags, |
1417 | greedy_parsing: flags & TDEFL_GREEDY_PARSING_FLAG != 0, |
1418 | block_index: 0, |
1419 | saved_match_dist: 0, |
1420 | saved_match_len: 0, |
1421 | saved_lit: 0, |
1422 | flush: TDEFLFlush::None, |
1423 | flush_ofs: 0, |
1424 | flush_remaining: 0, |
1425 | finished: false, |
1426 | adler32: MZ_ADLER32_INIT, |
1427 | src_pos: 0, |
1428 | out_buf_ofs: 0, |
1429 | prev_return_status: TDEFLStatus::Okay, |
1430 | saved_bit_buffer: 0, |
1431 | saved_bits_in: 0, |
1432 | local_buf: Box::default(), |
1433 | } |
1434 | } |
1435 | |
1436 | fn update_flags(&mut self, flags: u32) { |
1437 | self.flags = flags; |
1438 | self.greedy_parsing = self.flags & TDEFL_GREEDY_PARSING_FLAG != 0; |
1439 | } |
1440 | |
1441 | /// Reset state, saving settings. |
1442 | fn reset(&mut self) { |
1443 | self.block_index = 0; |
1444 | self.saved_match_len = 0; |
1445 | self.saved_match_dist = 0; |
1446 | self.saved_lit = 0; |
1447 | self.flush = TDEFLFlush::None; |
1448 | self.flush_ofs = 0; |
1449 | self.flush_remaining = 0; |
1450 | self.finished = false; |
1451 | self.adler32 = MZ_ADLER32_INIT; |
1452 | self.src_pos = 0; |
1453 | self.out_buf_ofs = 0; |
1454 | self.prev_return_status = TDEFLStatus::Okay; |
1455 | self.saved_bit_buffer = 0; |
1456 | self.saved_bits_in = 0; |
1457 | self.local_buf.b = [0; OUT_BUF_SIZE]; |
1458 | } |
1459 | } |
1460 | |
1461 | struct LZOxide { |
1462 | pub codes: [u8; LZ_CODE_BUF_SIZE], |
1463 | pub code_position: usize, |
1464 | pub flag_position: usize, |
1465 | |
1466 | // The total number of bytes in the current block. |
1467 | // (Could maybe use usize, but it's not possible to exceed a block size of ) |
1468 | pub total_bytes: u32, |
1469 | pub num_flags_left: u32, |
1470 | } |
1471 | |
1472 | impl LZOxide { |
1473 | const fn new() -> Self { |
1474 | LZOxide { |
1475 | codes: [0; LZ_CODE_BUF_SIZE], |
1476 | code_position: 1, |
1477 | flag_position: 0, |
1478 | total_bytes: 0, |
1479 | num_flags_left: 8, |
1480 | } |
1481 | } |
1482 | |
1483 | fn write_code(&mut self, val: u8) { |
1484 | self.codes[self.code_position] = val; |
1485 | self.code_position += 1; |
1486 | } |
1487 | |
1488 | fn init_flag(&mut self) { |
1489 | if self.num_flags_left == 8 { |
1490 | *self.get_flag() = 0; |
1491 | self.code_position -= 1; |
1492 | } else { |
1493 | *self.get_flag() >>= self.num_flags_left; |
1494 | } |
1495 | } |
1496 | |
1497 | fn get_flag(&mut self) -> &mut u8 { |
1498 | &mut self.codes[self.flag_position] |
1499 | } |
1500 | |
1501 | fn plant_flag(&mut self) { |
1502 | self.flag_position = self.code_position; |
1503 | self.code_position += 1; |
1504 | } |
1505 | |
1506 | fn consume_flag(&mut self) { |
1507 | self.num_flags_left -= 1; |
1508 | if self.num_flags_left == 0 { |
1509 | self.num_flags_left = 8; |
1510 | self.plant_flag(); |
1511 | } |
1512 | } |
1513 | } |
1514 | |
1515 | fn compress_lz_codes( |
1516 | huff: &HuffmanOxide, |
1517 | output: &mut OutputBufferOxide, |
1518 | lz_code_buf: &[u8], |
1519 | ) -> Result<bool> { |
1520 | let mut flags = 1; |
1521 | let mut bb = BitBuffer { |
1522 | bit_buffer: u64::from(output.bit_buffer), |
1523 | bits_in: output.bits_in, |
1524 | }; |
1525 | |
1526 | let mut i: usize = 0; |
1527 | while i < lz_code_buf.len() { |
1528 | if flags == 1 { |
1529 | flags = u32::from(lz_code_buf[i]) | 0x100; |
1530 | i += 1; |
1531 | } |
1532 | |
1533 | // The lz code was a length code |
1534 | if flags & 1 == 1 { |
1535 | flags >>= 1; |
1536 | |
1537 | let sym; |
1538 | let num_extra_bits; |
1539 | |
1540 | let match_len = lz_code_buf[i] as usize; |
1541 | |
1542 | let match_dist = read_u16_le(lz_code_buf, i + 1); |
1543 | |
1544 | i += 3; |
1545 | |
1546 | debug_assert!(huff.code_sizes[0][LEN_SYM[match_len] as usize] != 0); |
1547 | bb.put_fast( |
1548 | u64::from(huff.codes[0][LEN_SYM[match_len] as usize]), |
1549 | u32::from(huff.code_sizes[0][LEN_SYM[match_len] as usize]), |
1550 | ); |
1551 | bb.put_fast( |
1552 | match_len as u64 & u64::from(BITMASKS[LEN_EXTRA[match_len] as usize]), |
1553 | u32::from(LEN_EXTRA[match_len]), |
1554 | ); |
1555 | |
1556 | if match_dist < 512 { |
1557 | sym = SMALL_DIST_SYM[match_dist as usize] as usize; |
1558 | num_extra_bits = SMALL_DIST_EXTRA[match_dist as usize] as usize; |
1559 | } else { |
1560 | sym = LARGE_DIST_SYM[(match_dist >> 8) as usize] as usize; |
1561 | num_extra_bits = LARGE_DIST_EXTRA[(match_dist >> 8) as usize] as usize; |
1562 | } |
1563 | |
1564 | debug_assert!(huff.code_sizes[1][sym] != 0); |
1565 | bb.put_fast( |
1566 | u64::from(huff.codes[1][sym]), |
1567 | u32::from(huff.code_sizes[1][sym]), |
1568 | ); |
1569 | bb.put_fast( |
1570 | u64::from(match_dist) & u64::from(BITMASKS[num_extra_bits as usize]), |
1571 | num_extra_bits as u32, |
1572 | ); |
1573 | } else { |
1574 | // The lz code was a literal |
1575 | for _ in 0..3 { |
1576 | flags >>= 1; |
1577 | let lit = lz_code_buf[i]; |
1578 | i += 1; |
1579 | |
1580 | debug_assert!(huff.code_sizes[0][lit as usize] != 0); |
1581 | bb.put_fast( |
1582 | u64::from(huff.codes[0][lit as usize]), |
1583 | u32::from(huff.code_sizes[0][lit as usize]), |
1584 | ); |
1585 | |
1586 | if flags & 1 == 1 || i >= lz_code_buf.len() { |
1587 | break; |
1588 | } |
1589 | } |
1590 | } |
1591 | |
1592 | bb.flush(output)?; |
1593 | } |
1594 | |
1595 | output.bits_in = 0; |
1596 | output.bit_buffer = 0; |
1597 | while bb.bits_in != 0 { |
1598 | let n = cmp::min(bb.bits_in, 16); |
1599 | output.put_bits(bb.bit_buffer as u32 & BITMASKS[n as usize], n); |
1600 | bb.bit_buffer >>= n; |
1601 | bb.bits_in -= n; |
1602 | } |
1603 | |
1604 | // Output the end of block symbol. |
1605 | output.put_bits( |
1606 | u32::from(huff.codes[0][256]), |
1607 | u32::from(huff.code_sizes[0][256]), |
1608 | ); |
1609 | |
1610 | Ok(true) |
1611 | } |
1612 | |
1613 | fn compress_block( |
1614 | huff: &mut HuffmanOxide, |
1615 | output: &mut OutputBufferOxide, |
1616 | lz: &LZOxide, |
1617 | static_block: bool, |
1618 | ) -> Result<bool> { |
1619 | if static_block { |
1620 | huff.start_static_block(output); |
1621 | } else { |
1622 | huff.start_dynamic_block(output)?; |
1623 | } |
1624 | |
1625 | compress_lz_codes(huff, output, &lz.codes[..lz.code_position]) |
1626 | } |
1627 | |
1628 | fn flush_block( |
1629 | d: &mut CompressorOxide, |
1630 | callback: &mut CallbackOxide, |
1631 | flush: TDEFLFlush, |
1632 | ) -> Result<i32> { |
1633 | let mut saved_buffer; |
1634 | { |
1635 | let mut output = callback |
1636 | .out |
1637 | .new_output_buffer(&mut d.params.local_buf.b, d.params.out_buf_ofs); |
1638 | output.bit_buffer = d.params.saved_bit_buffer; |
1639 | output.bits_in = d.params.saved_bits_in; |
1640 | |
1641 | let use_raw_block = (d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0) |
1642 | && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos) <= d.dict.size; |
1643 | |
1644 | assert!(d.params.flush_remaining == 0); |
1645 | d.params.flush_ofs = 0; |
1646 | d.params.flush_remaining = 0; |
1647 | |
1648 | d.lz.init_flag(); |
1649 | |
1650 | // If we are at the start of the stream, write the zlib header if requested. |
1651 | if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 && d.params.block_index == 0 { |
1652 | let header = zlib::header_from_flags(d.params.flags as u32); |
1653 | output.put_bits(header[0].into(), 8); |
1654 | output.put_bits(header[1].into(), 8); |
1655 | } |
1656 | |
1657 | // Output the block header. |
1658 | output.put_bits((flush == TDEFLFlush::Finish) as u32, 1); |
1659 | |
1660 | saved_buffer = output.save(); |
1661 | |
1662 | let comp_success = if !use_raw_block { |
1663 | let use_static = |
1664 | (d.params.flags & TDEFL_FORCE_ALL_STATIC_BLOCKS != 0) || (d.lz.total_bytes < 48); |
1665 | compress_block(&mut d.huff, &mut output, &d.lz, use_static)? |
1666 | } else { |
1667 | false |
1668 | }; |
1669 | |
1670 | // If we failed to compress anything and the output would take up more space than the output |
1671 | // data, output a stored block instead, which has at most 5 bytes of overhead. |
1672 | // We only use some simple heuristics for now. |
1673 | // A stored block will have an overhead of at least 4 bytes containing the block length |
1674 | // but usually more due to the length parameters having to start at a byte boundary and thus |
1675 | // requiring up to 5 bytes of padding. |
1676 | // As a static block will have an overhead of at most 1 bit per byte |
1677 | // (as literals are either 8 or 9 bytes), a raw block will |
1678 | // never take up less space if the number of input bytes are less than 32. |
1679 | let expanded = (d.lz.total_bytes > 32) |
1680 | && (output.inner_pos - saved_buffer.pos + 1 >= (d.lz.total_bytes as usize)) |
1681 | && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos <= d.dict.size); |
1682 | |
1683 | if use_raw_block || expanded { |
1684 | output.load(saved_buffer); |
1685 | |
1686 | // Block header. |
1687 | output.put_bits(0, 2); |
1688 | |
1689 | // Block length has to start on a byte boundary, s opad. |
1690 | output.pad_to_bytes(); |
1691 | |
1692 | // Block length and ones complement of block length. |
1693 | output.put_bits(d.lz.total_bytes & 0xFFFF, 16); |
1694 | output.put_bits(!d.lz.total_bytes & 0xFFFF, 16); |
1695 | |
1696 | // Write the actual bytes. |
1697 | for i in 0..d.lz.total_bytes { |
1698 | let pos = (d.dict.code_buf_dict_pos + i as usize) & LZ_DICT_SIZE_MASK; |
1699 | output.put_bits(u32::from(d.dict.b.dict[pos as usize]), 8); |
1700 | } |
1701 | } else if !comp_success { |
1702 | output.load(saved_buffer); |
1703 | compress_block(&mut d.huff, &mut output, &d.lz, true)?; |
1704 | } |
1705 | |
1706 | if flush != TDEFLFlush::None { |
1707 | if flush == TDEFLFlush::Finish { |
1708 | output.pad_to_bytes(); |
1709 | if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 { |
1710 | let mut adler = d.params.adler32; |
1711 | for _ in 0..4 { |
1712 | output.put_bits((adler >> 24) & 0xFF, 8); |
1713 | adler <<= 8; |
1714 | } |
1715 | } |
1716 | } else { |
1717 | // Sync or Full flush. |
1718 | // Output an empty raw block. |
1719 | output.put_bits(0, 3); |
1720 | output.pad_to_bytes(); |
1721 | output.put_bits(0, 16); |
1722 | output.put_bits(0xFFFF, 16); |
1723 | } |
1724 | } |
1725 | |
1726 | memset(&mut d.huff.count[0][..MAX_HUFF_SYMBOLS_0], 0); |
1727 | memset(&mut d.huff.count[1][..MAX_HUFF_SYMBOLS_1], 0); |
1728 | |
1729 | d.lz.code_position = 1; |
1730 | d.lz.flag_position = 0; |
1731 | d.lz.num_flags_left = 8; |
1732 | d.dict.code_buf_dict_pos += d.lz.total_bytes as usize; |
1733 | d.lz.total_bytes = 0; |
1734 | d.params.block_index += 1; |
1735 | |
1736 | saved_buffer = output.save(); |
1737 | |
1738 | d.params.saved_bit_buffer = saved_buffer.bit_buffer; |
1739 | d.params.saved_bits_in = saved_buffer.bits_in; |
1740 | } |
1741 | |
1742 | Ok(callback.flush_output(saved_buffer, &mut d.params)) |
1743 | } |
1744 | |
1745 | fn record_literal(h: &mut HuffmanOxide, lz: &mut LZOxide, lit: u8) { |
1746 | lz.total_bytes += 1; |
1747 | lz.write_code(val:lit); |
1748 | |
1749 | *lz.get_flag() >>= 1; |
1750 | lz.consume_flag(); |
1751 | |
1752 | h.count[0][lit as usize] += 1; |
1753 | } |
1754 | |
1755 | fn record_match(h: &mut HuffmanOxide, lz: &mut LZOxide, mut match_len: u32, mut match_dist: u32) { |
1756 | assert!(match_len >= MIN_MATCH_LEN.into()); |
1757 | assert!(match_dist >= 1); |
1758 | assert!(match_dist as usize <= LZ_DICT_SIZE); |
1759 | |
1760 | lz.total_bytes += match_len; |
1761 | match_dist -= 1; |
1762 | match_len -= u32::from(MIN_MATCH_LEN); |
1763 | lz.write_code(val:match_len as u8); |
1764 | lz.write_code(val:match_dist as u8); |
1765 | lz.write_code((match_dist >> 8) as u8); |
1766 | |
1767 | *lz.get_flag() >>= 1; |
1768 | *lz.get_flag() |= 0x80; |
1769 | lz.consume_flag(); |
1770 | |
1771 | let symbol: usize = if match_dist < 512 { |
1772 | SMALL_DIST_SYM[match_dist as usize] |
1773 | } else { |
1774 | LARGE_DIST_SYM[((match_dist >> 8) & 127) as usize] |
1775 | } as usize; |
1776 | h.count[1][symbol] += 1; |
1777 | h.count[0][LEN_SYM[match_len as usize] as usize] += 1; |
1778 | } |
1779 | |
1780 | fn compress_normal(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { |
1781 | let mut src_pos = d.params.src_pos; |
1782 | let in_buf = match callback.in_buf { |
1783 | None => return true, |
1784 | Some(in_buf) => in_buf, |
1785 | }; |
1786 | |
1787 | let mut lookahead_size = d.dict.lookahead_size; |
1788 | let mut lookahead_pos = d.dict.lookahead_pos; |
1789 | let mut saved_lit = d.params.saved_lit; |
1790 | let mut saved_match_dist = d.params.saved_match_dist; |
1791 | let mut saved_match_len = d.params.saved_match_len; |
1792 | |
1793 | while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) { |
1794 | let src_buf_left = in_buf.len() - src_pos; |
1795 | let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size as usize); |
1796 | |
1797 | if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1 |
1798 | && num_bytes_to_process > 0 |
1799 | { |
1800 | let dictb = &mut d.dict.b; |
1801 | |
1802 | let mut dst_pos = (lookahead_pos + lookahead_size as usize) & LZ_DICT_SIZE_MASK; |
1803 | let mut ins_pos = lookahead_pos + lookahead_size as usize - 2; |
1804 | // Start the hash value from the first two bytes |
1805 | let mut hash = update_hash( |
1806 | u16::from(dictb.dict[(ins_pos & LZ_DICT_SIZE_MASK) as usize]), |
1807 | dictb.dict[((ins_pos + 1) & LZ_DICT_SIZE_MASK) as usize], |
1808 | ); |
1809 | |
1810 | lookahead_size += num_bytes_to_process; |
1811 | |
1812 | for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { |
1813 | // Add byte to input buffer. |
1814 | dictb.dict[dst_pos as usize] = c; |
1815 | if (dst_pos as usize) < MAX_MATCH_LEN - 1 { |
1816 | dictb.dict[LZ_DICT_SIZE + dst_pos as usize] = c; |
1817 | } |
1818 | |
1819 | // Generate hash from the current byte, |
1820 | hash = update_hash(hash, c); |
1821 | dictb.next[(ins_pos & LZ_DICT_SIZE_MASK) as usize] = dictb.hash[hash as usize]; |
1822 | // and insert it into the hash chain. |
1823 | dictb.hash[hash as usize] = ins_pos as u16; |
1824 | dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK; |
1825 | ins_pos += 1; |
1826 | } |
1827 | src_pos += num_bytes_to_process; |
1828 | } else { |
1829 | let dictb = &mut d.dict.b; |
1830 | for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { |
1831 | let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; |
1832 | dictb.dict[dst_pos as usize] = c; |
1833 | if (dst_pos as usize) < MAX_MATCH_LEN - 1 { |
1834 | dictb.dict[LZ_DICT_SIZE + dst_pos as usize] = c; |
1835 | } |
1836 | |
1837 | lookahead_size += 1; |
1838 | if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() { |
1839 | let ins_pos = lookahead_pos + lookahead_size - 3; |
1840 | let hash = ((u32::from(dictb.dict[(ins_pos & LZ_DICT_SIZE_MASK) as usize]) |
1841 | << (LZ_HASH_SHIFT * 2)) |
1842 | ^ ((u32::from(dictb.dict[((ins_pos + 1) & LZ_DICT_SIZE_MASK) as usize]) |
1843 | << LZ_HASH_SHIFT) |
1844 | ^ u32::from(c))) |
1845 | & (LZ_HASH_SIZE as u32 - 1); |
1846 | |
1847 | dictb.next[(ins_pos & LZ_DICT_SIZE_MASK) as usize] = dictb.hash[hash as usize]; |
1848 | dictb.hash[hash as usize] = ins_pos as u16; |
1849 | } |
1850 | } |
1851 | |
1852 | src_pos += num_bytes_to_process; |
1853 | } |
1854 | |
1855 | d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); |
1856 | if d.params.flush == TDEFLFlush::None && (lookahead_size as usize) < MAX_MATCH_LEN { |
1857 | break; |
1858 | } |
1859 | |
1860 | let mut len_to_move = 1; |
1861 | let mut cur_match_dist = 0; |
1862 | let mut cur_match_len = if saved_match_len != 0 { |
1863 | saved_match_len |
1864 | } else { |
1865 | u32::from(MIN_MATCH_LEN) - 1 |
1866 | }; |
1867 | let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; |
1868 | if d.params.flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS) != 0 { |
1869 | // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte. |
1870 | if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 { |
1871 | let c = d.dict.b.dict[((cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK) as usize]; |
1872 | cur_match_len = d.dict.b.dict[cur_pos as usize..(cur_pos + lookahead_size) as usize] |
1873 | .iter() |
1874 | .take_while(|&x| *x == c) |
1875 | .count() as u32; |
1876 | if cur_match_len < MIN_MATCH_LEN.into() { |
1877 | cur_match_len = 0 |
1878 | } else { |
1879 | cur_match_dist = 1 |
1880 | } |
1881 | } |
1882 | } else { |
1883 | // Try to find a match for the bytes at the current position. |
1884 | let dist_len = d.dict.find_match( |
1885 | lookahead_pos, |
1886 | d.dict.size, |
1887 | lookahead_size as u32, |
1888 | cur_match_dist, |
1889 | cur_match_len, |
1890 | ); |
1891 | cur_match_dist = dist_len.0; |
1892 | cur_match_len = dist_len.1; |
1893 | } |
1894 | |
1895 | let far_and_small = cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024; |
1896 | let filter_small = d.params.flags & TDEFL_FILTER_MATCHES != 0 && cur_match_len <= 5; |
1897 | if far_and_small || filter_small || cur_pos == cur_match_dist as usize { |
1898 | cur_match_dist = 0; |
1899 | cur_match_len = 0; |
1900 | } |
1901 | |
1902 | if saved_match_len != 0 { |
1903 | if cur_match_len > saved_match_len { |
1904 | record_literal(&mut d.huff, &mut d.lz, saved_lit); |
1905 | if cur_match_len >= 128 { |
1906 | record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); |
1907 | saved_match_len = 0; |
1908 | len_to_move = cur_match_len as usize; |
1909 | } else { |
1910 | saved_lit = d.dict.b.dict[cur_pos as usize]; |
1911 | saved_match_dist = cur_match_dist; |
1912 | saved_match_len = cur_match_len; |
1913 | } |
1914 | } else { |
1915 | record_match(&mut d.huff, &mut d.lz, saved_match_len, saved_match_dist); |
1916 | len_to_move = (saved_match_len - 1) as usize; |
1917 | saved_match_len = 0; |
1918 | } |
1919 | } else if cur_match_dist == 0 { |
1920 | record_literal( |
1921 | &mut d.huff, |
1922 | &mut d.lz, |
1923 | d.dict.b.dict[cmp::min(cur_pos as usize, d.dict.b.dict.len() - 1)], |
1924 | ); |
1925 | } else if d.params.greedy_parsing |
1926 | || (d.params.flags & TDEFL_RLE_MATCHES != 0) |
1927 | || cur_match_len >= 128 |
1928 | { |
1929 | // If we are using lazy matching, check for matches at the next byte if the current |
1930 | // match was shorter than 128 bytes. |
1931 | record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); |
1932 | len_to_move = cur_match_len as usize; |
1933 | } else { |
1934 | saved_lit = d.dict.b.dict[cmp::min(cur_pos as usize, d.dict.b.dict.len() - 1)]; |
1935 | saved_match_dist = cur_match_dist; |
1936 | saved_match_len = cur_match_len; |
1937 | } |
1938 | |
1939 | lookahead_pos += len_to_move; |
1940 | assert!(lookahead_size >= len_to_move); |
1941 | lookahead_size -= len_to_move; |
1942 | d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE); |
1943 | |
1944 | let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8; |
1945 | let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0; |
1946 | let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize; |
1947 | let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw); |
1948 | |
1949 | if lz_buf_tight || fat_or_raw { |
1950 | d.params.src_pos = src_pos; |
1951 | // These values are used in flush_block, so we need to write them back here. |
1952 | d.dict.lookahead_size = lookahead_size; |
1953 | d.dict.lookahead_pos = lookahead_pos; |
1954 | |
1955 | let n = flush_block(d, callback, TDEFLFlush::None) |
1956 | .unwrap_or(TDEFLStatus::PutBufFailed as i32); |
1957 | if n != 0 { |
1958 | d.params.saved_lit = saved_lit; |
1959 | d.params.saved_match_dist = saved_match_dist; |
1960 | d.params.saved_match_len = saved_match_len; |
1961 | return n > 0; |
1962 | } |
1963 | } |
1964 | } |
1965 | |
1966 | d.params.src_pos = src_pos; |
1967 | d.dict.lookahead_size = lookahead_size; |
1968 | d.dict.lookahead_pos = lookahead_pos; |
1969 | d.params.saved_lit = saved_lit; |
1970 | d.params.saved_match_dist = saved_match_dist; |
1971 | d.params.saved_match_len = saved_match_len; |
1972 | true |
1973 | } |
1974 | |
1975 | const COMP_FAST_LOOKAHEAD_SIZE: usize = 4096; |
1976 | |
1977 | fn compress_fast(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { |
1978 | let mut src_pos = d.params.src_pos; |
1979 | let mut lookahead_size = d.dict.lookahead_size; |
1980 | let mut lookahead_pos = d.dict.lookahead_pos; |
1981 | |
1982 | let mut cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; |
1983 | let in_buf = match callback.in_buf { |
1984 | None => return true, |
1985 | Some(in_buf) => in_buf, |
1986 | }; |
1987 | |
1988 | debug_assert!(d.lz.code_position < LZ_CODE_BUF_SIZE - 2); |
1989 | |
1990 | while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size > 0) { |
1991 | let mut dst_pos = ((lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK) as usize; |
1992 | let mut num_bytes_to_process = cmp::min( |
1993 | in_buf.len() - src_pos, |
1994 | (COMP_FAST_LOOKAHEAD_SIZE - lookahead_size) as usize, |
1995 | ); |
1996 | lookahead_size += num_bytes_to_process; |
1997 | |
1998 | while num_bytes_to_process != 0 { |
1999 | let n = cmp::min(LZ_DICT_SIZE - dst_pos, num_bytes_to_process); |
2000 | d.dict.b.dict[dst_pos..dst_pos + n].copy_from_slice(&in_buf[src_pos..src_pos + n]); |
2001 | |
2002 | if dst_pos < MAX_MATCH_LEN - 1 { |
2003 | let m = cmp::min(n, MAX_MATCH_LEN - 1 - dst_pos); |
2004 | d.dict.b.dict[dst_pos + LZ_DICT_SIZE..dst_pos + LZ_DICT_SIZE + m] |
2005 | .copy_from_slice(&in_buf[src_pos..src_pos + m]); |
2006 | } |
2007 | |
2008 | src_pos += n; |
2009 | dst_pos = (dst_pos + n) & LZ_DICT_SIZE_MASK as usize; |
2010 | num_bytes_to_process -= n; |
2011 | } |
2012 | |
2013 | d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); |
2014 | if d.params.flush == TDEFLFlush::None && lookahead_size < COMP_FAST_LOOKAHEAD_SIZE { |
2015 | break; |
2016 | } |
2017 | |
2018 | while lookahead_size >= 4 { |
2019 | let mut cur_match_len = 1; |
2020 | |
2021 | let first_trigram = d.dict.read_unaligned_u32(cur_pos) & 0xFF_FFFF; |
2022 | |
2023 | let hash = (first_trigram ^ (first_trigram >> (24 - (LZ_HASH_BITS - 8)))) |
2024 | & LEVEL1_HASH_SIZE_MASK; |
2025 | |
2026 | let mut probe_pos = usize::from(d.dict.b.hash[hash as usize]); |
2027 | d.dict.b.hash[hash as usize] = lookahead_pos as u16; |
2028 | |
2029 | let mut cur_match_dist = (lookahead_pos - probe_pos as usize) as u16; |
2030 | if cur_match_dist as usize <= d.dict.size { |
2031 | probe_pos &= LZ_DICT_SIZE_MASK; |
2032 | |
2033 | let trigram = d.dict.read_unaligned_u32(probe_pos) & 0xFF_FFFF; |
2034 | |
2035 | if first_trigram == trigram { |
2036 | // Trigram was tested, so we can start with "+ 3" displacement. |
2037 | let mut p = cur_pos + 3; |
2038 | let mut q = probe_pos + 3; |
2039 | cur_match_len = (|| { |
2040 | for _ in 0..32 { |
2041 | let p_data: u64 = d.dict.read_unaligned_u64(p); |
2042 | let q_data: u64 = d.dict.read_unaligned_u64(q); |
2043 | let xor_data = p_data ^ q_data; |
2044 | if xor_data == 0 { |
2045 | p += 8; |
2046 | q += 8; |
2047 | } else { |
2048 | let trailing = xor_data.trailing_zeros(); |
2049 | return p as u32 - cur_pos as u32 + (trailing >> 3); |
2050 | } |
2051 | } |
2052 | |
2053 | if cur_match_dist == 0 { |
2054 | 0 |
2055 | } else { |
2056 | MAX_MATCH_LEN as u32 |
2057 | } |
2058 | })(); |
2059 | |
2060 | if cur_match_len < MIN_MATCH_LEN.into() |
2061 | || (cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024) |
2062 | { |
2063 | let lit = first_trigram as u8; |
2064 | cur_match_len = 1; |
2065 | d.lz.write_code(lit); |
2066 | *d.lz.get_flag() >>= 1; |
2067 | d.huff.count[0][lit as usize] += 1; |
2068 | } else { |
2069 | // Limit the match to the length of the lookahead so we don't create a match |
2070 | // that ends after the end of the input data. |
2071 | cur_match_len = cmp::min(cur_match_len, lookahead_size as u32); |
2072 | debug_assert!(cur_match_len >= MIN_MATCH_LEN.into()); |
2073 | debug_assert!(cur_match_dist >= 1); |
2074 | debug_assert!(cur_match_dist as usize <= LZ_DICT_SIZE); |
2075 | cur_match_dist -= 1; |
2076 | |
2077 | d.lz.write_code((cur_match_len - u32::from(MIN_MATCH_LEN)) as u8); |
2078 | d.lz.write_code(cur_match_dist as u8); |
2079 | d.lz.write_code((cur_match_dist >> 8) as u8); |
2080 | |
2081 | *d.lz.get_flag() >>= 1; |
2082 | *d.lz.get_flag() |= 0x80; |
2083 | if cur_match_dist < 512 { |
2084 | d.huff.count[1][SMALL_DIST_SYM[cur_match_dist as usize] as usize] += 1; |
2085 | } else { |
2086 | d.huff.count[1] |
2087 | [LARGE_DIST_SYM[(cur_match_dist >> 8) as usize] as usize] += 1; |
2088 | } |
2089 | |
2090 | d.huff.count[0][LEN_SYM[(cur_match_len - u32::from(MIN_MATCH_LEN)) as usize] |
2091 | as usize] += 1; |
2092 | } |
2093 | } else { |
2094 | d.lz.write_code(first_trigram as u8); |
2095 | *d.lz.get_flag() >>= 1; |
2096 | d.huff.count[0][first_trigram as u8 as usize] += 1; |
2097 | } |
2098 | |
2099 | d.lz.consume_flag(); |
2100 | d.lz.total_bytes += cur_match_len; |
2101 | lookahead_pos += cur_match_len as usize; |
2102 | d.dict.size = cmp::min(d.dict.size + cur_match_len as usize, LZ_DICT_SIZE); |
2103 | cur_pos = (cur_pos + cur_match_len as usize) & LZ_DICT_SIZE_MASK; |
2104 | lookahead_size -= cur_match_len as usize; |
2105 | |
2106 | if d.lz.code_position > LZ_CODE_BUF_SIZE - 8 { |
2107 | // These values are used in flush_block, so we need to write them back here. |
2108 | d.dict.lookahead_size = lookahead_size; |
2109 | d.dict.lookahead_pos = lookahead_pos; |
2110 | |
2111 | let n = match flush_block(d, callback, TDEFLFlush::None) { |
2112 | Err(_) => { |
2113 | d.params.src_pos = src_pos; |
2114 | d.params.prev_return_status = TDEFLStatus::PutBufFailed; |
2115 | return false; |
2116 | } |
2117 | Ok(status) => status, |
2118 | }; |
2119 | if n != 0 { |
2120 | d.params.src_pos = src_pos; |
2121 | return n > 0; |
2122 | } |
2123 | debug_assert!(d.lz.code_position < LZ_CODE_BUF_SIZE - 2); |
2124 | |
2125 | lookahead_size = d.dict.lookahead_size; |
2126 | lookahead_pos = d.dict.lookahead_pos; |
2127 | } |
2128 | } |
2129 | } |
2130 | |
2131 | while lookahead_size != 0 { |
2132 | let lit = d.dict.b.dict[cur_pos as usize]; |
2133 | d.lz.total_bytes += 1; |
2134 | d.lz.write_code(lit); |
2135 | *d.lz.get_flag() >>= 1; |
2136 | d.lz.consume_flag(); |
2137 | |
2138 | d.huff.count[0][lit as usize] += 1; |
2139 | lookahead_pos += 1; |
2140 | d.dict.size = cmp::min(d.dict.size + 1, LZ_DICT_SIZE); |
2141 | cur_pos = (cur_pos + 1) & LZ_DICT_SIZE_MASK; |
2142 | lookahead_size -= 1; |
2143 | |
2144 | if d.lz.code_position > LZ_CODE_BUF_SIZE - 8 { |
2145 | // These values are used in flush_block, so we need to write them back here. |
2146 | d.dict.lookahead_size = lookahead_size; |
2147 | d.dict.lookahead_pos = lookahead_pos; |
2148 | |
2149 | let n = match flush_block(d, callback, TDEFLFlush::None) { |
2150 | Err(_) => { |
2151 | d.params.prev_return_status = TDEFLStatus::PutBufFailed; |
2152 | d.params.src_pos = src_pos; |
2153 | return false; |
2154 | } |
2155 | Ok(status) => status, |
2156 | }; |
2157 | if n != 0 { |
2158 | d.params.src_pos = src_pos; |
2159 | return n > 0; |
2160 | } |
2161 | |
2162 | lookahead_size = d.dict.lookahead_size; |
2163 | lookahead_pos = d.dict.lookahead_pos; |
2164 | } |
2165 | } |
2166 | } |
2167 | |
2168 | d.params.src_pos = src_pos; |
2169 | d.dict.lookahead_size = lookahead_size; |
2170 | d.dict.lookahead_pos = lookahead_pos; |
2171 | true |
2172 | } |
2173 | |
2174 | fn flush_output_buffer(c: &mut CallbackOxide, p: &mut ParamsOxide) -> (TDEFLStatus, usize, usize) { |
2175 | let mut res: (TDEFLStatus, usize, usize) = (TDEFLStatus::Okay, p.src_pos, 0); |
2176 | if let CallbackOut::Buf(ref mut cb: &mut CallbackBuf<'_>) = c.out { |
2177 | let n: usize = cmp::min(v1:cb.out_buf.len() - p.out_buf_ofs, v2:p.flush_remaining as usize); |
2178 | if n != 0 { |
2179 | (&mut cb.out_buf[p.out_buf_ofs..p.out_buf_ofs + n]) |
2180 | .copy_from_slice(&p.local_buf.b[p.flush_ofs as usize..p.flush_ofs as usize + n]); |
2181 | } |
2182 | p.flush_ofs += n as u32; |
2183 | p.flush_remaining -= n as u32; |
2184 | p.out_buf_ofs += n; |
2185 | res.2 = p.out_buf_ofs; |
2186 | } |
2187 | |
2188 | if p.finished && p.flush_remaining == 0 { |
2189 | res.0 = TDEFLStatus::Done |
2190 | } |
2191 | res |
2192 | } |
2193 | |
2194 | /// Main compression function. Tries to compress as much as possible from `in_buf` and |
2195 | /// puts compressed output into `out_buf`. |
2196 | /// |
2197 | /// The value of `flush` determines if the compressor should attempt to flush all output |
2198 | /// and alternatively try to finish the stream. |
2199 | /// |
2200 | /// Use [`TDEFLFlush::Finish`] on the final call to signal that the stream is finishing. |
2201 | /// |
2202 | /// Note that this function does not keep track of whether a flush marker has been output, so |
2203 | /// if called using [`TDEFLFlush::Sync`], the caller needs to ensure there is enough space in the |
2204 | /// output buffer if they want to avoid repeated flush markers. |
2205 | /// See #105 for details. |
2206 | /// |
2207 | /// # Returns |
2208 | /// Returns a tuple containing the current status of the compressor, the current position |
2209 | /// in the input buffer and the current position in the output buffer. |
2210 | pub fn compress( |
2211 | d: &mut CompressorOxide, |
2212 | in_buf: &[u8], |
2213 | out_buf: &mut [u8], |
2214 | flush: TDEFLFlush, |
2215 | ) -> (TDEFLStatus, usize, usize) { |
2216 | compress_inner( |
2217 | d, |
2218 | &mut CallbackOxide::new_callback_buf(in_buf, out_buf), |
2219 | flush, |
2220 | ) |
2221 | } |
2222 | |
2223 | /// Main compression function. Callbacks output. |
2224 | /// |
2225 | /// # Returns |
2226 | /// Returns a tuple containing the current status of the compressor, the current position |
2227 | /// in the input buffer. |
2228 | /// |
2229 | /// The caller is responsible for ensuring the `CallbackFunc` struct will not cause undefined |
2230 | /// behaviour. |
2231 | pub fn compress_to_output( |
2232 | d: &mut CompressorOxide, |
2233 | in_buf: &[u8], |
2234 | flush: TDEFLFlush, |
2235 | mut callback_func: impl FnMut(&[u8]) -> bool, |
2236 | ) -> (TDEFLStatus, usize) { |
2237 | let res: (TDEFLStatus, usize, usize) = compress_inner( |
2238 | d, |
2239 | &mut CallbackOxide::new_callback_func( |
2240 | in_buf, |
2241 | CallbackFunc { |
2242 | put_buf_func: &mut callback_func, |
2243 | }, |
2244 | ), |
2245 | flush, |
2246 | ); |
2247 | |
2248 | (res.0, res.1) |
2249 | } |
2250 | |
2251 | fn compress_inner( |
2252 | d: &mut CompressorOxide, |
2253 | callback: &mut CallbackOxide, |
2254 | flush: TDEFLFlush, |
2255 | ) -> (TDEFLStatus, usize, usize) { |
2256 | d.params.out_buf_ofs = 0; |
2257 | d.params.src_pos = 0; |
2258 | |
2259 | let prev_ok = d.params.prev_return_status == TDEFLStatus::Okay; |
2260 | let flush_finish_once = d.params.flush != TDEFLFlush::Finish || flush == TDEFLFlush::Finish; |
2261 | |
2262 | d.params.flush = flush; |
2263 | if !prev_ok || !flush_finish_once { |
2264 | d.params.prev_return_status = TDEFLStatus::BadParam; |
2265 | return (d.params.prev_return_status, 0, 0); |
2266 | } |
2267 | |
2268 | if d.params.flush_remaining != 0 || d.params.finished { |
2269 | let res = flush_output_buffer(callback, &mut d.params); |
2270 | d.params.prev_return_status = res.0; |
2271 | return res; |
2272 | } |
2273 | |
2274 | let one_probe = d.params.flags & MAX_PROBES_MASK as u32 == 1; |
2275 | let greedy = d.params.flags & TDEFL_GREEDY_PARSING_FLAG != 0; |
2276 | let filter_or_rle_or_raw = d.params.flags |
2277 | & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES) |
2278 | != 0; |
2279 | |
2280 | let compress_success = if one_probe && greedy && !filter_or_rle_or_raw { |
2281 | compress_fast(d, callback) |
2282 | } else { |
2283 | compress_normal(d, callback) |
2284 | }; |
2285 | |
2286 | if !compress_success { |
2287 | return ( |
2288 | d.params.prev_return_status, |
2289 | d.params.src_pos, |
2290 | d.params.out_buf_ofs, |
2291 | ); |
2292 | } |
2293 | |
2294 | if let Some(in_buf) = callback.in_buf { |
2295 | if d.params.flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32) != 0 { |
2296 | d.params.adler32 = update_adler32(d.params.adler32, &in_buf[..d.params.src_pos]); |
2297 | } |
2298 | } |
2299 | |
2300 | let flush_none = d.params.flush == TDEFLFlush::None; |
2301 | let in_left = callback.in_buf.map_or(0, |buf| buf.len()) - d.params.src_pos; |
2302 | let remaining = in_left != 0 || d.params.flush_remaining != 0; |
2303 | if !flush_none && d.dict.lookahead_size == 0 && !remaining { |
2304 | let flush = d.params.flush; |
2305 | match flush_block(d, callback, flush) { |
2306 | Err(_) => { |
2307 | d.params.prev_return_status = TDEFLStatus::PutBufFailed; |
2308 | return ( |
2309 | d.params.prev_return_status, |
2310 | d.params.src_pos, |
2311 | d.params.out_buf_ofs, |
2312 | ); |
2313 | } |
2314 | Ok(x) if x < 0 => { |
2315 | return ( |
2316 | d.params.prev_return_status, |
2317 | d.params.src_pos, |
2318 | d.params.out_buf_ofs, |
2319 | ) |
2320 | } |
2321 | _ => { |
2322 | d.params.finished = d.params.flush == TDEFLFlush::Finish; |
2323 | if d.params.flush == TDEFLFlush::Full { |
2324 | memset(&mut d.dict.b.hash[..], 0); |
2325 | memset(&mut d.dict.b.next[..], 0); |
2326 | d.dict.size = 0; |
2327 | } |
2328 | } |
2329 | } |
2330 | } |
2331 | |
2332 | let res = flush_output_buffer(callback, &mut d.params); |
2333 | d.params.prev_return_status = res.0; |
2334 | |
2335 | res |
2336 | } |
2337 | |
2338 | /// Create a set of compression flags using parameters used by zlib and other compressors. |
2339 | /// Mainly intended for use with transition from c libraries as it deals with raw integers. |
2340 | /// |
2341 | /// # Parameters |
2342 | /// `level` determines compression level. Clamped to maximum of 10. Negative values result in |
2343 | /// `CompressionLevel::DefaultLevel`. |
2344 | /// `window_bits`: Above 0, wraps the stream in a zlib wrapper, 0 or negative for a raw deflate |
2345 | /// stream. |
2346 | /// `strategy`: Sets the strategy if this conforms to any of the values in `CompressionStrategy`. |
2347 | /// |
2348 | /// # Notes |
2349 | /// This function may be removed or moved to the `miniz_oxide_c_api` in the future. |
2350 | pub fn create_comp_flags_from_zip_params(level: i32, window_bits: i32, strategy: i32) -> u32 { |
2351 | let num_probes = (if level >= 0 { |
2352 | cmp::min(10, level) |
2353 | } else { |
2354 | CompressionLevel::DefaultLevel as i32 |
2355 | }) as usize; |
2356 | let greedy = if level <= 3 { |
2357 | TDEFL_GREEDY_PARSING_FLAG |
2358 | } else { |
2359 | 0 |
2360 | }; |
2361 | let mut comp_flags = NUM_PROBES[num_probes] | greedy; |
2362 | |
2363 | if window_bits > 0 { |
2364 | comp_flags |= TDEFL_WRITE_ZLIB_HEADER; |
2365 | } |
2366 | |
2367 | if level == 0 { |
2368 | comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; |
2369 | } else if strategy == CompressionStrategy::Filtered as i32 { |
2370 | comp_flags |= TDEFL_FILTER_MATCHES; |
2371 | } else if strategy == CompressionStrategy::HuffmanOnly as i32 { |
2372 | comp_flags &= !MAX_PROBES_MASK as u32; |
2373 | } else if strategy == CompressionStrategy::Fixed as i32 { |
2374 | comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; |
2375 | } else if strategy == CompressionStrategy::RLE as i32 { |
2376 | comp_flags |= TDEFL_RLE_MATCHES; |
2377 | } |
2378 | |
2379 | comp_flags |
2380 | } |
2381 | |
2382 | #[cfg (test)] |
2383 | mod test { |
2384 | use super::{ |
2385 | compress_to_output, create_comp_flags_from_zip_params, read_u16_le, write_u16_le, |
2386 | CompressionStrategy, CompressorOxide, TDEFLFlush, TDEFLStatus, DEFAULT_FLAGS, |
2387 | MZ_DEFAULT_WINDOW_BITS, |
2388 | }; |
2389 | use crate::inflate::decompress_to_vec; |
2390 | use alloc::vec; |
2391 | |
2392 | #[test ] |
2393 | fn u16_to_slice() { |
2394 | let mut slice = [0, 0]; |
2395 | write_u16_le(2000, &mut slice, 0); |
2396 | assert_eq!(slice, [208, 7]); |
2397 | } |
2398 | |
2399 | #[test ] |
2400 | fn u16_from_slice() { |
2401 | let mut slice = [208, 7]; |
2402 | assert_eq!(read_u16_le(&mut slice, 0), 2000); |
2403 | } |
2404 | |
2405 | #[test ] |
2406 | fn compress_output() { |
2407 | assert_eq!( |
2408 | DEFAULT_FLAGS, |
2409 | create_comp_flags_from_zip_params( |
2410 | 4, |
2411 | MZ_DEFAULT_WINDOW_BITS, |
2412 | CompressionStrategy::Default as i32 |
2413 | ) |
2414 | ); |
2415 | |
2416 | let slice = [ |
2417 | 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, |
2418 | ]; |
2419 | let mut encoded = vec![]; |
2420 | let flags = create_comp_flags_from_zip_params(6, 0, 0); |
2421 | let mut d = CompressorOxide::new(flags); |
2422 | let (status, in_consumed) = |
2423 | compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { |
2424 | encoded.extend_from_slice(out); |
2425 | true |
2426 | }); |
2427 | |
2428 | assert_eq!(status, TDEFLStatus::Done); |
2429 | assert_eq!(in_consumed, slice.len()); |
2430 | |
2431 | let decoded = decompress_to_vec(&encoded[..]).unwrap(); |
2432 | assert_eq!(&decoded[..], &slice[..]); |
2433 | } |
2434 | |
2435 | #[test ] |
2436 | /// Check fast compress mode |
2437 | fn compress_fast() { |
2438 | let slice = [ |
2439 | 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, |
2440 | ]; |
2441 | let mut encoded = vec![]; |
2442 | let flags = create_comp_flags_from_zip_params(1, 0, 0); |
2443 | let mut d = CompressorOxide::new(flags); |
2444 | let (status, in_consumed) = |
2445 | compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { |
2446 | encoded.extend_from_slice(out); |
2447 | true |
2448 | }); |
2449 | |
2450 | assert_eq!(status, TDEFLStatus::Done); |
2451 | assert_eq!(in_consumed, slice.len()); |
2452 | |
2453 | // Needs to be altered if algorithm improves. |
2454 | assert_eq!( |
2455 | &encoded[..], |
2456 | [99, 100, 98, 102, 1, 98, 48, 98, 3, 147, 204, 76, 204, 140, 76, 204, 0] |
2457 | ); |
2458 | |
2459 | let decoded = decompress_to_vec(&encoded[..]).unwrap(); |
2460 | assert_eq!(&decoded[..], &slice[..]); |
2461 | } |
2462 | } |
2463 | |