| 1 | //! Streaming compression functionality. |
| 2 | |
| 3 | use alloc::boxed::Box; |
| 4 | use core::convert::TryInto; |
| 5 | use core::{cmp, mem}; |
| 6 | |
| 7 | use super::super::*; |
| 8 | use super::deflate_flags::*; |
| 9 | use super::CompressionLevel; |
| 10 | use crate::deflate::buffer::{ |
| 11 | update_hash, HashBuffers, LocalBuf, LZ_CODE_BUF_SIZE, LZ_DICT_FULL_SIZE, LZ_HASH_BITS, |
| 12 | LZ_HASH_SHIFT, LZ_HASH_SIZE, OUT_BUF_SIZE, |
| 13 | }; |
| 14 | use crate::shared::{update_adler32, HUFFMAN_LENGTH_ORDER, MZ_ADLER32_INIT}; |
| 15 | use crate::DataFormat; |
| 16 | |
| 17 | // Currently not bubbled up outside this module, so can fill in with more |
| 18 | // context eventually if needed. |
| 19 | type Result<T, E = Error> = core::result::Result<T, E>; |
| 20 | struct Error {} |
| 21 | |
| 22 | const MAX_PROBES_MASK: i32 = 0xFFF; |
| 23 | |
| 24 | const MAX_SUPPORTED_HUFF_CODESIZE: usize = 32; |
| 25 | |
| 26 | /// Length code for length values. |
| 27 | #[rustfmt::skip] |
| 28 | const LEN_SYM: [u16; 256] = [ |
| 29 | 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, |
| 30 | 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, |
| 31 | 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, |
| 32 | 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, |
| 33 | 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, |
| 34 | 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, |
| 35 | 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, |
| 36 | 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, |
| 37 | 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, |
| 38 | 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, |
| 39 | 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, |
| 40 | 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, |
| 41 | 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, |
| 42 | 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, |
| 43 | 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, |
| 44 | 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285 |
| 45 | ]; |
| 46 | |
| 47 | /// Number of extra bits for length values. |
| 48 | #[rustfmt::skip] |
| 49 | const LEN_EXTRA: [u8; 256] = [ |
| 50 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, |
| 51 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
| 52 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 53 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 54 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 55 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 56 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 57 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 58 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 59 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 60 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 61 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 62 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 63 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 64 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 65 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0 |
| 66 | ]; |
| 67 | |
| 68 | /// Distance codes for distances smaller than 512. |
| 69 | #[rustfmt::skip] |
| 70 | const SMALL_DIST_SYM: [u8; 512] = [ |
| 71 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, |
| 72 | 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, |
| 73 | 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, |
| 74 | 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, |
| 75 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
| 76 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
| 77 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
| 78 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
| 79 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
| 80 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
| 81 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
| 82 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, |
| 83 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 84 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 85 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 86 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, |
| 87 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 88 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 89 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 90 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 91 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 92 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 93 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 94 | 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, |
| 95 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 96 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 97 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 98 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 99 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 100 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 101 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, |
| 102 | 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17 |
| 103 | ]; |
| 104 | |
| 105 | /// Number of extra bits for distances smaller than 512. |
| 106 | #[rustfmt::skip] |
| 107 | const SMALL_DIST_EXTRA: [u8; 512] = [ |
| 108 | 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
| 109 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 110 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 111 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 112 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 113 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 114 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 115 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 116 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 117 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 118 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 119 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 120 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 121 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 122 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 123 | 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 |
| 124 | ]; |
| 125 | |
| 126 | /// Base values to calculate distances above 512. |
| 127 | #[rustfmt::skip] |
| 128 | const LARGE_DIST_SYM: [u8; 128] = [ |
| 129 | 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, |
| 130 | 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, |
| 131 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, |
| 132 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, |
| 133 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, |
| 134 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, |
| 135 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, |
| 136 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 |
| 137 | ]; |
| 138 | |
| 139 | /// Number of extra bits distances above 512. |
| 140 | #[rustfmt::skip] |
| 141 | const LARGE_DIST_EXTRA: [u8; 128] = [ |
| 142 | 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, |
| 143 | 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, |
| 144 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
| 145 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
| 146 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
| 147 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
| 148 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, |
| 149 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 |
| 150 | ]; |
| 151 | |
| 152 | #[rustfmt::skip] |
| 153 | const BITMASKS: [u32; 17] = [ |
| 154 | 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, |
| 155 | 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF |
| 156 | ]; |
| 157 | |
| 158 | /// The maximum number of checks for matches in the hash table the compressor will make for each |
| 159 | /// compression level. |
| 160 | const NUM_PROBES: [u32; 11] = [0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500]; |
| 161 | |
| 162 | #[derive (Copy, Clone)] |
| 163 | struct SymFreq { |
| 164 | key: u16, |
| 165 | sym_index: u16, |
| 166 | } |
| 167 | |
| 168 | pub mod deflate_flags { |
| 169 | /// Whether to use a zlib wrapper. |
| 170 | pub const TDEFL_WRITE_ZLIB_HEADER: u32 = 0x0000_1000; |
| 171 | /// Should we compute the adler32 checksum. |
| 172 | pub const TDEFL_COMPUTE_ADLER32: u32 = 0x0000_2000; |
| 173 | /// Should we use greedy parsing (as opposed to lazy parsing where look ahead one or more |
| 174 | /// bytes to check for better matches.) |
| 175 | pub const TDEFL_GREEDY_PARSING_FLAG: u32 = 0x0000_4000; |
| 176 | /// Used in miniz to skip zero-initializing hash and dict. We don't do this here, so |
| 177 | /// this flag is ignored. |
| 178 | pub const TDEFL_NONDETERMINISTIC_PARSING_FLAG: u32 = 0x0000_8000; |
| 179 | /// Only look for matches with a distance of 0. |
| 180 | pub const TDEFL_RLE_MATCHES: u32 = 0x0001_0000; |
| 181 | /// Only use matches that are at least 6 bytes long. |
| 182 | pub const TDEFL_FILTER_MATCHES: u32 = 0x0002_0000; |
| 183 | /// Force the compressor to only output static blocks. (Blocks using the default huffman codes |
| 184 | /// specified in the deflate specification.) |
| 185 | pub const TDEFL_FORCE_ALL_STATIC_BLOCKS: u32 = 0x0004_0000; |
| 186 | /// Force the compressor to only output raw/uncompressed blocks. |
| 187 | pub const TDEFL_FORCE_ALL_RAW_BLOCKS: u32 = 0x0008_0000; |
| 188 | } |
| 189 | |
| 190 | /// Strategy setting for compression. |
| 191 | /// |
| 192 | /// The non-default settings offer some special-case compression variants. |
| 193 | #[repr (i32)] |
| 194 | #[derive (Debug, Copy, Clone, PartialEq, Eq, Hash)] |
| 195 | pub enum CompressionStrategy { |
| 196 | /// Don't use any of the special strategies. |
| 197 | Default = 0, |
| 198 | /// Only use matches that are at least 5 bytes long. |
| 199 | Filtered = 1, |
| 200 | /// Don't look for matches, only huffman encode the literals. |
| 201 | HuffmanOnly = 2, |
| 202 | /// Only look for matches with a distance of 1, i.e do run-length encoding only. |
| 203 | RLE = 3, |
| 204 | /// Only use static/fixed blocks. (Blocks using the default huffman codes |
| 205 | /// specified in the deflate specification.) |
| 206 | Fixed = 4, |
| 207 | } |
| 208 | |
| 209 | /// A list of deflate flush types. |
| 210 | #[derive (Debug, Copy, Clone, PartialEq, Eq, Hash)] |
| 211 | pub enum TDEFLFlush { |
| 212 | /// Normal operation. |
| 213 | /// |
| 214 | /// Compress as much as there is space for, and then return waiting for more input. |
| 215 | None = 0, |
| 216 | |
| 217 | /// Try to flush all the current data and output an empty raw block. |
| 218 | Sync = 2, |
| 219 | |
| 220 | /// Same as [`Sync`][Self::Sync], but reset the dictionary so that the following data does not |
| 221 | /// depend on previous data. |
| 222 | Full = 3, |
| 223 | |
| 224 | /// Try to flush everything and end the deflate stream. |
| 225 | /// |
| 226 | /// On success this will yield a [`TDEFLStatus::Done`] return status. |
| 227 | Finish = 4, |
| 228 | } |
| 229 | |
| 230 | impl From<MZFlush> for TDEFLFlush { |
| 231 | fn from(flush: MZFlush) -> Self { |
| 232 | match flush { |
| 233 | MZFlush::None => TDEFLFlush::None, |
| 234 | MZFlush::Sync => TDEFLFlush::Sync, |
| 235 | MZFlush::Full => TDEFLFlush::Full, |
| 236 | MZFlush::Finish => TDEFLFlush::Finish, |
| 237 | _ => TDEFLFlush::None, // TODO: ??? What to do ??? |
| 238 | } |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | impl TDEFLFlush { |
| 243 | pub fn new(flush: i32) -> Result<Self, MZError> { |
| 244 | match flush { |
| 245 | 0 => Ok(TDEFLFlush::None), |
| 246 | 2 => Ok(TDEFLFlush::Sync), |
| 247 | 3 => Ok(TDEFLFlush::Full), |
| 248 | 4 => Ok(TDEFLFlush::Finish), |
| 249 | _ => Err(MZError::Param), |
| 250 | } |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | /// Return status of compression. |
| 255 | #[repr (i32)] |
| 256 | #[derive (Debug, Copy, Clone, PartialEq, Eq, Hash)] |
| 257 | pub enum TDEFLStatus { |
| 258 | /// Usage error. |
| 259 | /// |
| 260 | /// This indicates that either the [`CompressorOxide`] experienced a previous error, or the |
| 261 | /// stream has already been [`TDEFLFlush::Finish`]'d. |
| 262 | BadParam = -2, |
| 263 | |
| 264 | /// Error putting data into output buffer. |
| 265 | /// |
| 266 | /// This usually indicates a too-small buffer. |
| 267 | PutBufFailed = -1, |
| 268 | |
| 269 | /// Compression succeeded normally. |
| 270 | Okay = 0, |
| 271 | |
| 272 | /// Compression succeeded and the deflate stream was ended. |
| 273 | /// |
| 274 | /// This is the result of calling compression with [`TDEFLFlush::Finish`]. |
| 275 | Done = 1, |
| 276 | } |
| 277 | |
| 278 | const MAX_HUFF_SYMBOLS: usize = 288; |
| 279 | /// Size of hash chain for fast compression mode. |
| 280 | const LEVEL1_HASH_SIZE_MASK: u32 = 4095; |
| 281 | /// The number of huffman tables used by the compressor. |
| 282 | /// Literal/length, Distances and Length of the huffman codes for the other two tables. |
| 283 | const MAX_HUFF_TABLES: usize = 3; |
| 284 | /// Literal/length codes |
| 285 | const MAX_HUFF_SYMBOLS_0: usize = 288; |
| 286 | /// Distance codes. |
| 287 | const MAX_HUFF_SYMBOLS_1: usize = 32; |
| 288 | /// Huffman length values. |
| 289 | const MAX_HUFF_SYMBOLS_2: usize = 19; |
| 290 | /// Size of the chained hash table. |
| 291 | pub(crate) const LZ_DICT_SIZE: usize = 32_768; |
| 292 | /// Mask used when stepping through the hash chains. |
| 293 | const LZ_DICT_SIZE_MASK: usize = (LZ_DICT_SIZE as u32 - 1) as usize; |
| 294 | /// The minimum length of a match. |
| 295 | const MIN_MATCH_LEN: u8 = 3; |
| 296 | /// The maximum length of a match. |
| 297 | pub(crate) const MAX_MATCH_LEN: usize = 258; |
| 298 | |
| 299 | const DEFAULT_FLAGS: u32 = NUM_PROBES[4] | TDEFL_WRITE_ZLIB_HEADER; |
| 300 | |
| 301 | mod zlib { |
| 302 | const DEFAULT_CM: u8 = 8; |
| 303 | const DEFAULT_CINFO: u8 = 7 << 4; |
| 304 | const _DEFAULT_FDICT: u8 = 0; |
| 305 | const DEFAULT_CMF: u8 = DEFAULT_CM | DEFAULT_CINFO; |
| 306 | /// The 16-bit value consisting of CMF and FLG must be divisible by this to be valid. |
| 307 | const FCHECK_DIVISOR: u8 = 31; |
| 308 | |
| 309 | /// Generate FCHECK from CMF and FLG (without FCKECH )so that they are correct according to the |
| 310 | /// specification, i.e (CMF*256 + FCHK) % 31 = 0. |
| 311 | /// Returns flg with the FCHKECK bits added (any existing FCHECK bits are ignored). |
| 312 | fn add_fcheck(cmf: u8, flg: u8) -> u8 { |
| 313 | let rem = ((usize::from(cmf) * 256) + usize::from(flg)) % usize::from(FCHECK_DIVISOR); |
| 314 | |
| 315 | // Clear existing FCHECK if any |
| 316 | let flg = flg & 0b11100000; |
| 317 | |
| 318 | // Casting is safe as rem can't overflow since it is a value mod 31 |
| 319 | // We can simply add the value to flg as (31 - rem) will never be above 2^5 |
| 320 | flg + (FCHECK_DIVISOR - rem as u8) |
| 321 | } |
| 322 | |
| 323 | fn zlib_level_from_flags(flags: u32) -> u8 { |
| 324 | use super::NUM_PROBES; |
| 325 | |
| 326 | let num_probes = flags & (super::MAX_PROBES_MASK as u32); |
| 327 | if flags & super::TDEFL_GREEDY_PARSING_FLAG != 0 { |
| 328 | if num_probes <= 1 { |
| 329 | 0 |
| 330 | } else { |
| 331 | 1 |
| 332 | } |
| 333 | } else if num_probes >= NUM_PROBES[9] { |
| 334 | 3 |
| 335 | } else { |
| 336 | 2 |
| 337 | } |
| 338 | } |
| 339 | |
| 340 | /// Get the zlib header for the level using the default window size and no |
| 341 | /// dictionary. |
| 342 | fn header_from_level(level: u8) -> [u8; 2] { |
| 343 | let cmf = DEFAULT_CMF; |
| 344 | [cmf, add_fcheck(cmf, level << 6)] |
| 345 | } |
| 346 | |
| 347 | /// Create a zlib header from the given compression flags. |
| 348 | /// Only level is considered. |
| 349 | pub fn header_from_flags(flags: u32) -> [u8; 2] { |
| 350 | let level = zlib_level_from_flags(flags); |
| 351 | header_from_level(level) |
| 352 | } |
| 353 | |
| 354 | #[cfg (test)] |
| 355 | mod test { |
| 356 | #[test ] |
| 357 | fn zlib() { |
| 358 | use super::super::*; |
| 359 | use super::*; |
| 360 | |
| 361 | let test_level = |level, expected| { |
| 362 | let flags = create_comp_flags_from_zip_params( |
| 363 | level, |
| 364 | MZ_DEFAULT_WINDOW_BITS, |
| 365 | CompressionStrategy::Default as i32, |
| 366 | ); |
| 367 | assert_eq!(zlib_level_from_flags(flags), expected); |
| 368 | }; |
| 369 | |
| 370 | assert_eq!(zlib_level_from_flags(DEFAULT_FLAGS), 2); |
| 371 | test_level(0, 0); |
| 372 | test_level(1, 0); |
| 373 | test_level(2, 1); |
| 374 | test_level(3, 1); |
| 375 | for i in 4..=8 { |
| 376 | test_level(i, 2) |
| 377 | } |
| 378 | test_level(9, 3); |
| 379 | test_level(10, 3); |
| 380 | } |
| 381 | |
| 382 | #[test ] |
| 383 | fn test_header() { |
| 384 | let header = super::header_from_level(3); |
| 385 | assert_eq!( |
| 386 | ((usize::from(header[0]) * 256) + usize::from(header[1])) % 31, |
| 387 | 0 |
| 388 | ); |
| 389 | } |
| 390 | } |
| 391 | } |
| 392 | |
| 393 | fn memset<T: Copy>(slice: &mut [T], val: T) { |
| 394 | for x: &mut T in slice { |
| 395 | *x = val |
| 396 | } |
| 397 | } |
| 398 | |
| 399 | #[cfg (test)] |
| 400 | #[inline ] |
| 401 | fn write_u16_le(val: u16, slice: &mut [u8], pos: usize) { |
| 402 | slice[pos] = val as u8; |
| 403 | slice[pos + 1] = (val >> 8) as u8; |
| 404 | } |
| 405 | |
| 406 | // Read the two bytes starting at pos and interpret them as an u16. |
| 407 | #[inline ] |
| 408 | const fn read_u16_le(slice: &[u8], pos: usize) -> u16 { |
| 409 | // The compiler is smart enough to optimize this into an unaligned load. |
| 410 | slice[pos] as u16 | ((slice[pos + 1] as u16) << 8) |
| 411 | } |
| 412 | |
| 413 | /// Main compression struct. |
| 414 | pub struct CompressorOxide { |
| 415 | lz: LZOxide, |
| 416 | params: ParamsOxide, |
| 417 | /// Put HuffmanOxide on the heap with default trick to avoid |
| 418 | /// excessive stack copies. |
| 419 | huff: Box<HuffmanOxide>, |
| 420 | dict: DictOxide, |
| 421 | } |
| 422 | |
| 423 | impl CompressorOxide { |
| 424 | /// Create a new `CompressorOxide` with the given flags. |
| 425 | /// |
| 426 | /// # Notes |
| 427 | /// This function may be changed to take different parameters in the future. |
| 428 | pub fn new(flags: u32) -> Self { |
| 429 | CompressorOxide { |
| 430 | lz: LZOxide::new(), |
| 431 | params: ParamsOxide::new(flags), |
| 432 | huff: Box::default(), |
| 433 | dict: DictOxide::new(flags), |
| 434 | } |
| 435 | } |
| 436 | |
| 437 | /// Get the adler32 checksum of the currently encoded data. |
| 438 | pub const fn adler32(&self) -> u32 { |
| 439 | self.params.adler32 |
| 440 | } |
| 441 | |
| 442 | /// Get the return status of the previous [`compress`](fn.compress.html) |
| 443 | /// call with this compressor. |
| 444 | pub const fn prev_return_status(&self) -> TDEFLStatus { |
| 445 | self.params.prev_return_status |
| 446 | } |
| 447 | |
| 448 | /// Get the raw compressor flags. |
| 449 | /// |
| 450 | /// # Notes |
| 451 | /// This function may be deprecated or changed in the future to use more rust-style flags. |
| 452 | pub const fn flags(&self) -> i32 { |
| 453 | self.params.flags as i32 |
| 454 | } |
| 455 | |
| 456 | /// Returns whether the compressor is wrapping the data in a zlib format or not. |
| 457 | pub fn data_format(&self) -> DataFormat { |
| 458 | if (self.params.flags & TDEFL_WRITE_ZLIB_HEADER) != 0 { |
| 459 | DataFormat::Zlib |
| 460 | } else { |
| 461 | DataFormat::Raw |
| 462 | } |
| 463 | } |
| 464 | |
| 465 | /// Reset the state of the compressor, keeping the same parameters. |
| 466 | /// |
| 467 | /// This avoids re-allocating data. |
| 468 | pub fn reset(&mut self) { |
| 469 | // LZ buf and huffman has no settings or dynamic memory |
| 470 | // that needs to be saved, so we simply replace them. |
| 471 | self.lz = LZOxide::new(); |
| 472 | self.params.reset(); |
| 473 | *self.huff = HuffmanOxide::default(); |
| 474 | self.dict.reset(); |
| 475 | } |
| 476 | |
| 477 | /// Set the compression level of the compressor. |
| 478 | /// |
| 479 | /// Using this to change level after compression has started is supported. |
| 480 | /// # Notes |
| 481 | /// The compression strategy will be reset to the default one when this is called. |
| 482 | pub fn set_compression_level(&mut self, level: CompressionLevel) { |
| 483 | let format = self.data_format(); |
| 484 | self.set_format_and_level(format, level as u8); |
| 485 | } |
| 486 | |
| 487 | /// Set the compression level of the compressor using an integer value. |
| 488 | /// |
| 489 | /// Using this to change level after compression has started is supported. |
| 490 | /// # Notes |
| 491 | /// The compression strategy will be reset to the default one when this is called. |
| 492 | pub fn set_compression_level_raw(&mut self, level: u8) { |
| 493 | let format = self.data_format(); |
| 494 | self.set_format_and_level(format, level); |
| 495 | } |
| 496 | |
| 497 | /// Update the compression settings of the compressor. |
| 498 | /// |
| 499 | /// Changing the `DataFormat` after compression has started will result in |
| 500 | /// a corrupted stream. |
| 501 | /// |
| 502 | /// # Notes |
| 503 | /// This function mainly intended for setting the initial settings after e.g creating with |
| 504 | /// `default` or after calling `CompressorOxide::reset()`, and behaviour may be changed |
| 505 | /// to disallow calling it after starting compression in the future. |
| 506 | pub fn set_format_and_level(&mut self, data_format: DataFormat, level: u8) { |
| 507 | let flags = create_comp_flags_from_zip_params( |
| 508 | level.into(), |
| 509 | data_format.to_window_bits(), |
| 510 | CompressionStrategy::Default as i32, |
| 511 | ); |
| 512 | self.params.update_flags(flags); |
| 513 | self.dict.update_flags(flags); |
| 514 | } |
| 515 | } |
| 516 | |
| 517 | impl Default for CompressorOxide { |
| 518 | /// Initialize the compressor with a level of 4, zlib wrapper and |
| 519 | /// the default strategy. |
| 520 | fn default() -> Self { |
| 521 | CompressorOxide { |
| 522 | lz: LZOxide::new(), |
| 523 | params: ParamsOxide::new(DEFAULT_FLAGS), |
| 524 | huff: Box::default(), |
| 525 | dict: DictOxide::new(DEFAULT_FLAGS), |
| 526 | } |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | /// Callback function and user used in `compress_to_output`. |
| 531 | pub struct CallbackFunc<'a> { |
| 532 | pub put_buf_func: &'a mut dyn FnMut(&[u8]) -> bool, |
| 533 | } |
| 534 | |
| 535 | impl<'a> CallbackFunc<'a> { |
| 536 | fn flush_output( |
| 537 | &mut self, |
| 538 | saved_output: SavedOutputBufferOxide, |
| 539 | params: &mut ParamsOxide, |
| 540 | ) -> i32 { |
| 541 | // TODO: As this could be unsafe since |
| 542 | // we can't verify the function pointer |
| 543 | // this whole function should maybe be unsafe as well. |
| 544 | let call_success: bool = (self.put_buf_func)(¶ms.local_buf.b[0..saved_output.pos]); |
| 545 | |
| 546 | if !call_success { |
| 547 | params.prev_return_status = TDEFLStatus::PutBufFailed; |
| 548 | return params.prev_return_status as i32; |
| 549 | } |
| 550 | |
| 551 | params.flush_remaining as i32 |
| 552 | } |
| 553 | } |
| 554 | |
| 555 | struct CallbackBuf<'a> { |
| 556 | pub out_buf: &'a mut [u8], |
| 557 | } |
| 558 | |
| 559 | impl<'a> CallbackBuf<'a> { |
| 560 | fn flush_output( |
| 561 | &mut self, |
| 562 | saved_output: SavedOutputBufferOxide, |
| 563 | params: &mut ParamsOxide, |
| 564 | ) -> i32 { |
| 565 | if saved_output.local { |
| 566 | let n: usize = cmp::min(v1:saved_output.pos, self.out_buf.len() - params.out_buf_ofs); |
| 567 | (self.out_buf[params.out_buf_ofs..params.out_buf_ofs + n]) |
| 568 | .copy_from_slice(¶ms.local_buf.b[..n]); |
| 569 | |
| 570 | params.out_buf_ofs += n; |
| 571 | if saved_output.pos != n { |
| 572 | params.flush_ofs = n as u32; |
| 573 | params.flush_remaining = (saved_output.pos - n) as u32; |
| 574 | } |
| 575 | } else { |
| 576 | params.out_buf_ofs += saved_output.pos; |
| 577 | } |
| 578 | |
| 579 | params.flush_remaining as i32 |
| 580 | } |
| 581 | } |
| 582 | |
| 583 | enum CallbackOut<'a> { |
| 584 | Func(CallbackFunc<'a>), |
| 585 | Buf(CallbackBuf<'a>), |
| 586 | } |
| 587 | |
| 588 | impl<'a> CallbackOut<'a> { |
| 589 | fn new_output_buffer<'b>( |
| 590 | &'b mut self, |
| 591 | local_buf: &'b mut [u8], |
| 592 | out_buf_ofs: usize, |
| 593 | ) -> OutputBufferOxide<'b> { |
| 594 | let is_local; |
| 595 | let buf_len = OUT_BUF_SIZE - 16; |
| 596 | let chosen_buffer = match *self { |
| 597 | CallbackOut::Buf(ref mut cb) if cb.out_buf.len() - out_buf_ofs >= OUT_BUF_SIZE => { |
| 598 | is_local = false; |
| 599 | &mut cb.out_buf[out_buf_ofs..out_buf_ofs + buf_len] |
| 600 | } |
| 601 | _ => { |
| 602 | is_local = true; |
| 603 | &mut local_buf[..buf_len] |
| 604 | } |
| 605 | }; |
| 606 | |
| 607 | OutputBufferOxide { |
| 608 | inner: chosen_buffer, |
| 609 | inner_pos: 0, |
| 610 | local: is_local, |
| 611 | bit_buffer: 0, |
| 612 | bits_in: 0, |
| 613 | } |
| 614 | } |
| 615 | } |
| 616 | |
| 617 | struct CallbackOxide<'a> { |
| 618 | in_buf: Option<&'a [u8]>, |
| 619 | in_buf_size: Option<&'a mut usize>, |
| 620 | out_buf_size: Option<&'a mut usize>, |
| 621 | out: CallbackOut<'a>, |
| 622 | } |
| 623 | |
| 624 | impl<'a> CallbackOxide<'a> { |
| 625 | fn new_callback_buf(in_buf: &'a [u8], out_buf: &'a mut [u8]) -> Self { |
| 626 | CallbackOxide { |
| 627 | in_buf: Some(in_buf), |
| 628 | in_buf_size: None, |
| 629 | out_buf_size: None, |
| 630 | out: CallbackOut::Buf(CallbackBuf { out_buf }), |
| 631 | } |
| 632 | } |
| 633 | |
| 634 | fn new_callback_func(in_buf: &'a [u8], callback_func: CallbackFunc<'a>) -> Self { |
| 635 | CallbackOxide { |
| 636 | in_buf: Some(in_buf), |
| 637 | in_buf_size: None, |
| 638 | out_buf_size: None, |
| 639 | out: CallbackOut::Func(callback_func), |
| 640 | } |
| 641 | } |
| 642 | |
| 643 | fn update_size(&mut self, in_size: Option<usize>, out_size: Option<usize>) { |
| 644 | if let (Some(in_size), Some(size)) = (in_size, self.in_buf_size.as_mut()) { |
| 645 | **size = in_size; |
| 646 | } |
| 647 | |
| 648 | if let (Some(out_size), Some(size)) = (out_size, self.out_buf_size.as_mut()) { |
| 649 | **size = out_size |
| 650 | } |
| 651 | } |
| 652 | |
| 653 | fn flush_output( |
| 654 | &mut self, |
| 655 | saved_output: SavedOutputBufferOxide, |
| 656 | params: &mut ParamsOxide, |
| 657 | ) -> i32 { |
| 658 | if saved_output.pos == 0 { |
| 659 | return params.flush_remaining as i32; |
| 660 | } |
| 661 | |
| 662 | self.update_size(Some(params.src_pos), None); |
| 663 | match self.out { |
| 664 | CallbackOut::Func(ref mut cf) => cf.flush_output(saved_output, params), |
| 665 | CallbackOut::Buf(ref mut cb) => cb.flush_output(saved_output, params), |
| 666 | } |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | struct OutputBufferOxide<'a> { |
| 671 | pub inner: &'a mut [u8], |
| 672 | pub inner_pos: usize, |
| 673 | pub local: bool, |
| 674 | |
| 675 | pub bit_buffer: u32, |
| 676 | pub bits_in: u32, |
| 677 | } |
| 678 | |
| 679 | impl<'a> OutputBufferOxide<'a> { |
| 680 | fn put_bits(&mut self, bits: u32, len: u32) { |
| 681 | // TODO: Removing this assertion worsens performance |
| 682 | // Need to figure out why |
| 683 | assert!(bits <= ((1u32 << len) - 1u32)); |
| 684 | self.bit_buffer |= bits << self.bits_in; |
| 685 | self.bits_in += len; |
| 686 | |
| 687 | while self.bits_in >= 8 { |
| 688 | self.inner[self.inner_pos] = self.bit_buffer as u8; |
| 689 | self.inner_pos += 1; |
| 690 | self.bit_buffer >>= 8; |
| 691 | self.bits_in -= 8; |
| 692 | } |
| 693 | } |
| 694 | |
| 695 | const fn save(&self) -> SavedOutputBufferOxide { |
| 696 | SavedOutputBufferOxide { |
| 697 | pos: self.inner_pos, |
| 698 | bit_buffer: self.bit_buffer, |
| 699 | bits_in: self.bits_in, |
| 700 | local: self.local, |
| 701 | } |
| 702 | } |
| 703 | |
| 704 | fn load(&mut self, saved: SavedOutputBufferOxide) { |
| 705 | self.inner_pos = saved.pos; |
| 706 | self.bit_buffer = saved.bit_buffer; |
| 707 | self.bits_in = saved.bits_in; |
| 708 | self.local = saved.local; |
| 709 | } |
| 710 | |
| 711 | fn pad_to_bytes(&mut self) { |
| 712 | if self.bits_in != 0 { |
| 713 | let len = 8 - self.bits_in; |
| 714 | self.put_bits(0, len); |
| 715 | } |
| 716 | } |
| 717 | } |
| 718 | |
| 719 | struct SavedOutputBufferOxide { |
| 720 | pub pos: usize, |
| 721 | pub bit_buffer: u32, |
| 722 | pub bits_in: u32, |
| 723 | pub local: bool, |
| 724 | } |
| 725 | |
| 726 | struct BitBuffer { |
| 727 | pub bit_buffer: u64, |
| 728 | pub bits_in: u32, |
| 729 | } |
| 730 | |
| 731 | impl BitBuffer { |
| 732 | fn put_fast(&mut self, bits: u64, len: u32) { |
| 733 | self.bit_buffer |= bits << self.bits_in; |
| 734 | self.bits_in += len; |
| 735 | } |
| 736 | |
| 737 | fn flush(&mut self, output: &mut OutputBufferOxide) -> Result<()> { |
| 738 | let pos: usize = output.inner_pos; |
| 739 | { |
| 740 | // isolation to please borrow checker |
| 741 | let inner: &mut [u8] = &mut output.inner[pos..pos + 8]; |
| 742 | let bytes: [u8; _] = u64::to_le_bytes(self.bit_buffer); |
| 743 | inner.copy_from_slice(&bytes); |
| 744 | } |
| 745 | match output.inner_pos.checked_add((self.bits_in >> 3) as usize) { |
| 746 | Some(n: usize) if n <= output.inner.len() => output.inner_pos = n, |
| 747 | _ => return Err(Error {}), |
| 748 | } |
| 749 | self.bit_buffer >>= self.bits_in & !7; |
| 750 | self.bits_in &= 7; |
| 751 | Ok(()) |
| 752 | } |
| 753 | } |
| 754 | |
| 755 | /// A struct containing data about huffman codes and symbol frequencies. |
| 756 | /// |
| 757 | /// NOTE: Only the literal/lengths have enough symbols to actually use |
| 758 | /// the full array. It's unclear why it's defined like this in miniz, |
| 759 | /// it could be for cache/alignment reasons. |
| 760 | struct HuffmanOxide { |
| 761 | /// Number of occurrences of each symbol. |
| 762 | pub count: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
| 763 | /// The bits of the huffman code assigned to the symbol |
| 764 | pub codes: [[u16; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
| 765 | /// The length of the huffman code assigned to the symbol. |
| 766 | pub code_sizes: [[u8; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
| 767 | } |
| 768 | |
| 769 | /// Tables used for literal/lengths in `HuffmanOxide`. |
| 770 | const LITLEN_TABLE: usize = 0; |
| 771 | /// Tables for distances. |
| 772 | const DIST_TABLE: usize = 1; |
| 773 | /// Tables for the run-length encoded huffman lengths for literals/lengths/distances. |
| 774 | const HUFF_CODES_TABLE: usize = 2; |
| 775 | |
| 776 | /// Status of RLE encoding of huffman code lengths. |
| 777 | struct Rle { |
| 778 | pub z_count: u32, |
| 779 | pub repeat_count: u32, |
| 780 | pub prev_code_size: u8, |
| 781 | } |
| 782 | |
| 783 | impl Rle { |
| 784 | fn prev_code_size( |
| 785 | &mut self, |
| 786 | packed_code_sizes: &mut [u8], |
| 787 | packed_pos: &mut usize, |
| 788 | h: &mut HuffmanOxide, |
| 789 | ) -> Result<()> { |
| 790 | let mut write = |buf| write(buf, packed_code_sizes, packed_pos); |
| 791 | let counts = &mut h.count[HUFF_CODES_TABLE]; |
| 792 | if self.repeat_count != 0 { |
| 793 | if self.repeat_count < 3 { |
| 794 | counts[self.prev_code_size as usize] = |
| 795 | counts[self.prev_code_size as usize].wrapping_add(self.repeat_count as u16); |
| 796 | let code = self.prev_code_size; |
| 797 | write(&[code, code, code][..self.repeat_count as usize])?; |
| 798 | } else { |
| 799 | counts[16] = counts[16].wrapping_add(1); |
| 800 | write(&[16, (self.repeat_count - 3) as u8][..])?; |
| 801 | } |
| 802 | self.repeat_count = 0; |
| 803 | } |
| 804 | |
| 805 | Ok(()) |
| 806 | } |
| 807 | |
| 808 | fn zero_code_size( |
| 809 | &mut self, |
| 810 | packed_code_sizes: &mut [u8], |
| 811 | packed_pos: &mut usize, |
| 812 | h: &mut HuffmanOxide, |
| 813 | ) -> Result<()> { |
| 814 | let mut write = |buf| write(buf, packed_code_sizes, packed_pos); |
| 815 | let counts = &mut h.count[HUFF_CODES_TABLE]; |
| 816 | if self.z_count != 0 { |
| 817 | if self.z_count < 3 { |
| 818 | counts[0] = counts[0].wrapping_add(self.z_count as u16); |
| 819 | write(&[0, 0, 0][..self.z_count as usize])?; |
| 820 | } else if self.z_count <= 10 { |
| 821 | counts[17] = counts[17].wrapping_add(1); |
| 822 | write(&[17, (self.z_count - 3) as u8][..])?; |
| 823 | } else { |
| 824 | counts[18] = counts[18].wrapping_add(1); |
| 825 | write(&[18, (self.z_count - 11) as u8][..])?; |
| 826 | } |
| 827 | self.z_count = 0; |
| 828 | } |
| 829 | |
| 830 | Ok(()) |
| 831 | } |
| 832 | } |
| 833 | |
| 834 | fn write(src: &[u8], dst: &mut [u8], dst_pos: &mut usize) -> Result<()> { |
| 835 | match dst.get_mut(*dst_pos..*dst_pos + src.len()) { |
| 836 | Some(s: &mut [u8]) => s.copy_from_slice(src), |
| 837 | None => return Err(Error {}), |
| 838 | } |
| 839 | *dst_pos += src.len(); |
| 840 | Ok(()) |
| 841 | } |
| 842 | |
| 843 | impl Default for HuffmanOxide { |
| 844 | fn default() -> Self { |
| 845 | HuffmanOxide { |
| 846 | count: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
| 847 | codes: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
| 848 | code_sizes: [[0; MAX_HUFF_SYMBOLS]; MAX_HUFF_TABLES], |
| 849 | } |
| 850 | } |
| 851 | } |
| 852 | |
| 853 | impl HuffmanOxide { |
| 854 | fn radix_sort_symbols<'a>( |
| 855 | symbols0: &'a mut [SymFreq], |
| 856 | symbols1: &'a mut [SymFreq], |
| 857 | ) -> &'a mut [SymFreq] { |
| 858 | let mut hist = [[0; 256]; 2]; |
| 859 | |
| 860 | for freq in symbols0.iter() { |
| 861 | hist[0][(freq.key & 0xFF) as usize] += 1; |
| 862 | hist[1][((freq.key >> 8) & 0xFF) as usize] += 1; |
| 863 | } |
| 864 | |
| 865 | let mut n_passes = 2; |
| 866 | if symbols0.len() == hist[1][0] { |
| 867 | n_passes -= 1; |
| 868 | } |
| 869 | |
| 870 | let mut current_symbols = symbols0; |
| 871 | let mut new_symbols = symbols1; |
| 872 | |
| 873 | for (pass, hist_item) in hist.iter().enumerate().take(n_passes) { |
| 874 | let mut offsets = [0; 256]; |
| 875 | let mut offset = 0; |
| 876 | for i in 0..256 { |
| 877 | offsets[i] = offset; |
| 878 | offset += hist_item[i]; |
| 879 | } |
| 880 | |
| 881 | for sym in current_symbols.iter() { |
| 882 | let j = ((sym.key >> (pass * 8)) & 0xFF) as usize; |
| 883 | new_symbols[offsets[j]] = *sym; |
| 884 | offsets[j] += 1; |
| 885 | } |
| 886 | |
| 887 | mem::swap(&mut current_symbols, &mut new_symbols); |
| 888 | } |
| 889 | |
| 890 | current_symbols |
| 891 | } |
| 892 | |
| 893 | fn calculate_minimum_redundancy(symbols: &mut [SymFreq]) { |
| 894 | match symbols.len() { |
| 895 | 0 => (), |
| 896 | 1 => symbols[0].key = 1, |
| 897 | n => { |
| 898 | symbols[0].key += symbols[1].key; |
| 899 | let mut root = 0; |
| 900 | let mut leaf = 2; |
| 901 | for next in 1..n - 1 { |
| 902 | if (leaf >= n) || (symbols[root].key < symbols[leaf].key) { |
| 903 | symbols[next].key = symbols[root].key; |
| 904 | symbols[root].key = next as u16; |
| 905 | root += 1; |
| 906 | } else { |
| 907 | symbols[next].key = symbols[leaf].key; |
| 908 | leaf += 1; |
| 909 | } |
| 910 | |
| 911 | if (leaf >= n) || (root < next && symbols[root].key < symbols[leaf].key) { |
| 912 | symbols[next].key = symbols[next].key.wrapping_add(symbols[root].key); |
| 913 | symbols[root].key = next as u16; |
| 914 | root += 1; |
| 915 | } else { |
| 916 | symbols[next].key = symbols[next].key.wrapping_add(symbols[leaf].key); |
| 917 | leaf += 1; |
| 918 | } |
| 919 | } |
| 920 | |
| 921 | symbols[n - 2].key = 0; |
| 922 | for next in (0..n - 2).rev() { |
| 923 | symbols[next].key = symbols[symbols[next].key as usize].key + 1; |
| 924 | } |
| 925 | |
| 926 | let mut avbl = 1; |
| 927 | let mut used = 0; |
| 928 | let mut dpth = 0; |
| 929 | let mut root = (n - 2) as i32; |
| 930 | let mut next = (n - 1) as i32; |
| 931 | while avbl > 0 { |
| 932 | while (root >= 0) && (symbols[root as usize].key == dpth) { |
| 933 | used += 1; |
| 934 | root -= 1; |
| 935 | } |
| 936 | while avbl > used { |
| 937 | symbols[next as usize].key = dpth; |
| 938 | next -= 1; |
| 939 | avbl -= 1; |
| 940 | } |
| 941 | avbl = 2 * used; |
| 942 | dpth += 1; |
| 943 | used = 0; |
| 944 | } |
| 945 | } |
| 946 | } |
| 947 | } |
| 948 | |
| 949 | fn enforce_max_code_size(num_codes: &mut [i32], code_list_len: usize, max_code_size: usize) { |
| 950 | if code_list_len <= 1 { |
| 951 | return; |
| 952 | } |
| 953 | |
| 954 | num_codes[max_code_size] += num_codes[max_code_size + 1..].iter().sum::<i32>(); |
| 955 | let total = num_codes[1..=max_code_size] |
| 956 | .iter() |
| 957 | .rev() |
| 958 | .enumerate() |
| 959 | .fold(0u32, |total, (i, &x)| total + ((x as u32) << i)); |
| 960 | |
| 961 | for _ in (1 << max_code_size)..total { |
| 962 | num_codes[max_code_size] -= 1; |
| 963 | for i in (1..max_code_size).rev() { |
| 964 | if num_codes[i] != 0 { |
| 965 | num_codes[i] -= 1; |
| 966 | num_codes[i + 1] += 2; |
| 967 | break; |
| 968 | } |
| 969 | } |
| 970 | } |
| 971 | } |
| 972 | |
| 973 | fn optimize_table( |
| 974 | &mut self, |
| 975 | table_num: usize, |
| 976 | table_len: usize, |
| 977 | code_size_limit: usize, |
| 978 | static_table: bool, |
| 979 | ) { |
| 980 | let mut num_codes = [0i32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; |
| 981 | let mut next_code = [0u32; MAX_SUPPORTED_HUFF_CODESIZE + 1]; |
| 982 | |
| 983 | if static_table { |
| 984 | for &code_size in &self.code_sizes[table_num][..table_len] { |
| 985 | num_codes[code_size as usize] += 1; |
| 986 | } |
| 987 | } else { |
| 988 | let mut symbols0 = [SymFreq { |
| 989 | key: 0, |
| 990 | sym_index: 0, |
| 991 | }; MAX_HUFF_SYMBOLS]; |
| 992 | let mut symbols1 = [SymFreq { |
| 993 | key: 0, |
| 994 | sym_index: 0, |
| 995 | }; MAX_HUFF_SYMBOLS]; |
| 996 | |
| 997 | let mut num_used_symbols = 0; |
| 998 | for i in 0..table_len { |
| 999 | if self.count[table_num][i] != 0 { |
| 1000 | symbols0[num_used_symbols] = SymFreq { |
| 1001 | key: self.count[table_num][i], |
| 1002 | sym_index: i as u16, |
| 1003 | }; |
| 1004 | num_used_symbols += 1; |
| 1005 | } |
| 1006 | } |
| 1007 | |
| 1008 | let symbols = Self::radix_sort_symbols( |
| 1009 | &mut symbols0[..num_used_symbols], |
| 1010 | &mut symbols1[..num_used_symbols], |
| 1011 | ); |
| 1012 | Self::calculate_minimum_redundancy(symbols); |
| 1013 | |
| 1014 | for symbol in symbols.iter() { |
| 1015 | num_codes[symbol.key as usize] += 1; |
| 1016 | } |
| 1017 | |
| 1018 | Self::enforce_max_code_size(&mut num_codes, num_used_symbols, code_size_limit); |
| 1019 | |
| 1020 | memset(&mut self.code_sizes[table_num][..], 0); |
| 1021 | memset(&mut self.codes[table_num][..], 0); |
| 1022 | |
| 1023 | let mut last = num_used_symbols; |
| 1024 | for (i, &num_item) in num_codes |
| 1025 | .iter() |
| 1026 | .enumerate() |
| 1027 | .take(code_size_limit + 1) |
| 1028 | .skip(1) |
| 1029 | { |
| 1030 | let first = last - num_item as usize; |
| 1031 | for symbol in &symbols[first..last] { |
| 1032 | self.code_sizes[table_num][symbol.sym_index as usize] = i as u8; |
| 1033 | } |
| 1034 | last = first; |
| 1035 | } |
| 1036 | } |
| 1037 | |
| 1038 | let mut j = 0; |
| 1039 | next_code[1] = 0; |
| 1040 | for i in 2..=code_size_limit { |
| 1041 | j = (j + num_codes[i - 1]) << 1; |
| 1042 | next_code[i] = j as u32; |
| 1043 | } |
| 1044 | |
| 1045 | for (&code_size, huff_code) in self.code_sizes[table_num] |
| 1046 | .iter() |
| 1047 | .take(table_len) |
| 1048 | .zip(self.codes[table_num].iter_mut().take(table_len)) |
| 1049 | { |
| 1050 | if code_size == 0 { |
| 1051 | continue; |
| 1052 | } |
| 1053 | |
| 1054 | let mut code = next_code[code_size as usize]; |
| 1055 | next_code[code_size as usize] += 1; |
| 1056 | |
| 1057 | let mut rev_code = 0; |
| 1058 | for _ in 0..code_size { |
| 1059 | rev_code = (rev_code << 1) | (code & 1); |
| 1060 | code >>= 1; |
| 1061 | } |
| 1062 | *huff_code = rev_code as u16; |
| 1063 | } |
| 1064 | } |
| 1065 | |
| 1066 | fn start_static_block(&mut self, output: &mut OutputBufferOxide) { |
| 1067 | memset(&mut self.code_sizes[LITLEN_TABLE][0..144], 8); |
| 1068 | memset(&mut self.code_sizes[LITLEN_TABLE][144..256], 9); |
| 1069 | memset(&mut self.code_sizes[LITLEN_TABLE][256..280], 7); |
| 1070 | memset(&mut self.code_sizes[LITLEN_TABLE][280..288], 8); |
| 1071 | |
| 1072 | memset(&mut self.code_sizes[DIST_TABLE][..32], 5); |
| 1073 | |
| 1074 | self.optimize_table(LITLEN_TABLE, 288, 15, true); |
| 1075 | self.optimize_table(DIST_TABLE, 32, 15, true); |
| 1076 | |
| 1077 | output.put_bits(0b01, 2) |
| 1078 | } |
| 1079 | |
| 1080 | fn start_dynamic_block(&mut self, output: &mut OutputBufferOxide) -> Result<()> { |
| 1081 | // There will always be one, and only one end of block code. |
| 1082 | self.count[0][256] = 1; |
| 1083 | |
| 1084 | self.optimize_table(0, MAX_HUFF_SYMBOLS_0, 15, false); |
| 1085 | self.optimize_table(1, MAX_HUFF_SYMBOLS_1, 15, false); |
| 1086 | |
| 1087 | let num_lit_codes = 286 |
| 1088 | - &self.code_sizes[0][257..286] |
| 1089 | .iter() |
| 1090 | .rev() |
| 1091 | .take_while(|&x| *x == 0) |
| 1092 | .count(); |
| 1093 | |
| 1094 | let num_dist_codes = 30 |
| 1095 | - &self.code_sizes[1][1..30] |
| 1096 | .iter() |
| 1097 | .rev() |
| 1098 | .take_while(|&x| *x == 0) |
| 1099 | .count(); |
| 1100 | |
| 1101 | let mut code_sizes_to_pack = [0u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1]; |
| 1102 | let mut packed_code_sizes = [0u8; MAX_HUFF_SYMBOLS_0 + MAX_HUFF_SYMBOLS_1]; |
| 1103 | |
| 1104 | let total_code_sizes_to_pack = num_lit_codes + num_dist_codes; |
| 1105 | |
| 1106 | code_sizes_to_pack[..num_lit_codes].copy_from_slice(&self.code_sizes[0][..num_lit_codes]); |
| 1107 | |
| 1108 | code_sizes_to_pack[num_lit_codes..total_code_sizes_to_pack] |
| 1109 | .copy_from_slice(&self.code_sizes[1][..num_dist_codes]); |
| 1110 | |
| 1111 | let mut rle = Rle { |
| 1112 | z_count: 0, |
| 1113 | repeat_count: 0, |
| 1114 | prev_code_size: 0xFF, |
| 1115 | }; |
| 1116 | |
| 1117 | memset(&mut self.count[HUFF_CODES_TABLE][..MAX_HUFF_SYMBOLS_2], 0); |
| 1118 | |
| 1119 | let mut packed_pos = 0; |
| 1120 | for &code_size in &code_sizes_to_pack[..total_code_sizes_to_pack] { |
| 1121 | if code_size == 0 { |
| 1122 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1123 | rle.z_count += 1; |
| 1124 | if rle.z_count == 138 { |
| 1125 | rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1126 | } |
| 1127 | } else { |
| 1128 | rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1129 | if code_size != rle.prev_code_size { |
| 1130 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1131 | self.count[HUFF_CODES_TABLE][code_size as usize] = |
| 1132 | self.count[HUFF_CODES_TABLE][code_size as usize].wrapping_add(1); |
| 1133 | write(&[code_size], &mut packed_code_sizes, &mut packed_pos)?; |
| 1134 | } else { |
| 1135 | rle.repeat_count += 1; |
| 1136 | if rle.repeat_count == 6 { |
| 1137 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1138 | } |
| 1139 | } |
| 1140 | } |
| 1141 | rle.prev_code_size = code_size; |
| 1142 | } |
| 1143 | |
| 1144 | if rle.repeat_count != 0 { |
| 1145 | rle.prev_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1146 | } else { |
| 1147 | rle.zero_code_size(&mut packed_code_sizes, &mut packed_pos, self)?; |
| 1148 | } |
| 1149 | |
| 1150 | self.optimize_table(2, MAX_HUFF_SYMBOLS_2, 7, false); |
| 1151 | |
| 1152 | output.put_bits(2, 2); |
| 1153 | |
| 1154 | output.put_bits((num_lit_codes - 257) as u32, 5); |
| 1155 | output.put_bits((num_dist_codes - 1) as u32, 5); |
| 1156 | |
| 1157 | let mut num_bit_lengths = 18 |
| 1158 | - HUFFMAN_LENGTH_ORDER |
| 1159 | .iter() |
| 1160 | .rev() |
| 1161 | .take_while(|&swizzle| self.code_sizes[HUFF_CODES_TABLE][*swizzle as usize] == 0) |
| 1162 | .count(); |
| 1163 | |
| 1164 | num_bit_lengths = cmp::max(4, num_bit_lengths + 1); |
| 1165 | output.put_bits(num_bit_lengths as u32 - 4, 4); |
| 1166 | for &swizzle in &HUFFMAN_LENGTH_ORDER[..num_bit_lengths] { |
| 1167 | output.put_bits( |
| 1168 | u32::from(self.code_sizes[HUFF_CODES_TABLE][swizzle as usize]), |
| 1169 | 3, |
| 1170 | ); |
| 1171 | } |
| 1172 | |
| 1173 | let mut packed_code_size_index = 0; |
| 1174 | while packed_code_size_index < packed_pos { |
| 1175 | let code = packed_code_sizes[packed_code_size_index] as usize; |
| 1176 | packed_code_size_index += 1; |
| 1177 | assert!(code < MAX_HUFF_SYMBOLS_2); |
| 1178 | output.put_bits( |
| 1179 | u32::from(self.codes[HUFF_CODES_TABLE][code]), |
| 1180 | u32::from(self.code_sizes[HUFF_CODES_TABLE][code]), |
| 1181 | ); |
| 1182 | if code >= 16 { |
| 1183 | output.put_bits( |
| 1184 | u32::from(packed_code_sizes[packed_code_size_index]), |
| 1185 | [2, 3, 7][code - 16], |
| 1186 | ); |
| 1187 | packed_code_size_index += 1; |
| 1188 | } |
| 1189 | } |
| 1190 | |
| 1191 | Ok(()) |
| 1192 | } |
| 1193 | } |
| 1194 | |
| 1195 | struct DictOxide { |
| 1196 | /// The maximum number of checks in the hash chain, for the initial, |
| 1197 | /// and the lazy match respectively. |
| 1198 | pub max_probes: [u32; 2], |
| 1199 | /// Buffer of input data. |
| 1200 | /// Padded with 1 byte to simplify matching code in `compress_fast`. |
| 1201 | pub b: Box<HashBuffers>, |
| 1202 | |
| 1203 | pub code_buf_dict_pos: usize, |
| 1204 | pub lookahead_size: usize, |
| 1205 | pub lookahead_pos: usize, |
| 1206 | pub size: usize, |
| 1207 | } |
| 1208 | |
| 1209 | const fn probes_from_flags(flags: u32) -> [u32; 2] { |
| 1210 | [ |
| 1211 | 1 + ((flags & 0xFFF) + 2) / 3, |
| 1212 | 1 + (((flags & 0xFFF) >> 2) + 2) / 3, |
| 1213 | ] |
| 1214 | } |
| 1215 | |
| 1216 | impl DictOxide { |
| 1217 | fn new(flags: u32) -> Self { |
| 1218 | DictOxide { |
| 1219 | max_probes: probes_from_flags(flags), |
| 1220 | b: Box::default(), |
| 1221 | code_buf_dict_pos: 0, |
| 1222 | lookahead_size: 0, |
| 1223 | lookahead_pos: 0, |
| 1224 | size: 0, |
| 1225 | } |
| 1226 | } |
| 1227 | |
| 1228 | fn update_flags(&mut self, flags: u32) { |
| 1229 | self.max_probes = probes_from_flags(flags); |
| 1230 | } |
| 1231 | |
| 1232 | fn reset(&mut self) { |
| 1233 | self.b.reset(); |
| 1234 | self.code_buf_dict_pos = 0; |
| 1235 | self.lookahead_size = 0; |
| 1236 | self.lookahead_pos = 0; |
| 1237 | self.size = 0; |
| 1238 | } |
| 1239 | |
| 1240 | /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of |
| 1241 | /// type T. |
| 1242 | #[inline ] |
| 1243 | fn read_unaligned_u32(&self, pos: usize) -> u32 { |
| 1244 | // Masking the value here helps avoid bounds checks. |
| 1245 | let pos = pos & LZ_DICT_SIZE_MASK; |
| 1246 | let end = pos + 4; |
| 1247 | // Somehow this assertion makes things faster. |
| 1248 | // TODO: as of may 2024 this does not seem to make any difference |
| 1249 | // so consider removing. |
| 1250 | assert!(end < LZ_DICT_FULL_SIZE); |
| 1251 | |
| 1252 | let bytes: [u8; 4] = self.b.dict[pos..end].try_into().unwrap(); |
| 1253 | u32::from_le_bytes(bytes) |
| 1254 | } |
| 1255 | |
| 1256 | /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of |
| 1257 | /// type T. |
| 1258 | #[inline ] |
| 1259 | fn read_unaligned_u64(&self, pos: usize) -> u64 { |
| 1260 | // Help evade bounds/panic code check by masking the position value |
| 1261 | // This provides a small speedup at the cost of an instruction or two instead of |
| 1262 | // having to use unsafe. |
| 1263 | let pos = pos & LZ_DICT_SIZE_MASK; |
| 1264 | let bytes: [u8; 8] = self.b.dict[pos..pos + 8].try_into().unwrap(); |
| 1265 | u64::from_le_bytes(bytes) |
| 1266 | } |
| 1267 | |
| 1268 | /// Do an unaligned read of the data at `pos` in the dictionary and treat it as if it was of |
| 1269 | /// type T. |
| 1270 | #[inline ] |
| 1271 | fn read_as_u16(&self, pos: usize) -> u16 { |
| 1272 | read_u16_le(&self.b.dict[..], pos) |
| 1273 | } |
| 1274 | |
| 1275 | /// Try to find a match for the data at lookahead_pos in the dictionary that is |
| 1276 | /// longer than `match_len`. |
| 1277 | /// Returns a tuple containing (match_distance, match_length). Will be equal to the input |
| 1278 | /// values if no better matches were found. |
| 1279 | fn find_match( |
| 1280 | &self, |
| 1281 | lookahead_pos: usize, |
| 1282 | max_dist: usize, |
| 1283 | max_match_len: u32, |
| 1284 | mut match_dist: u32, |
| 1285 | mut match_len: u32, |
| 1286 | ) -> (u32, u32) { |
| 1287 | // Clamp the match len and max_match_len to be valid. (It should be when this is called, but |
| 1288 | // do it for now just in case for safety reasons.) |
| 1289 | // This should normally end up as at worst conditional moves, |
| 1290 | // so it shouldn't slow us down much. |
| 1291 | // TODO: Statically verify these so we don't need to do this. |
| 1292 | let max_match_len = cmp::min(MAX_MATCH_LEN as u32, max_match_len); |
| 1293 | match_len = cmp::max(match_len, 1); |
| 1294 | |
| 1295 | let pos = lookahead_pos & LZ_DICT_SIZE_MASK; |
| 1296 | let mut probe_pos = pos; |
| 1297 | // Number of probes into the hash chains. |
| 1298 | let mut num_probes_left = self.max_probes[(match_len >= 32) as usize]; |
| 1299 | |
| 1300 | // If we already have a match of the full length don't bother searching for another one. |
| 1301 | if max_match_len <= match_len { |
| 1302 | return (match_dist, match_len); |
| 1303 | } |
| 1304 | |
| 1305 | // Read the last byte of the current match, and the next one, used to compare matches. |
| 1306 | let mut c01: u16 = self.read_as_u16(pos + match_len as usize - 1); |
| 1307 | // Read the two bytes at the end position of the current match. |
| 1308 | let s01: u16 = self.read_as_u16(pos); |
| 1309 | |
| 1310 | 'outer: loop { |
| 1311 | let mut dist; |
| 1312 | 'found: loop { |
| 1313 | num_probes_left -= 1; |
| 1314 | if num_probes_left == 0 { |
| 1315 | // We have done as many probes in the hash chain as the current compression |
| 1316 | // settings allow, so return the best match we found, if any. |
| 1317 | return (match_dist, match_len); |
| 1318 | } |
| 1319 | |
| 1320 | for _ in 0..3 { |
| 1321 | let next_probe_pos = self.b.next[probe_pos] as usize; |
| 1322 | |
| 1323 | dist = (lookahead_pos - next_probe_pos) & 0xFFFF; |
| 1324 | if next_probe_pos == 0 || dist > max_dist { |
| 1325 | // We reached the end of the hash chain, or the next value is further away |
| 1326 | // than the maximum allowed distance, so return the best match we found, if |
| 1327 | // any. |
| 1328 | return (match_dist, match_len); |
| 1329 | } |
| 1330 | |
| 1331 | // Mask the position value to get the position in the hash chain of the next |
| 1332 | // position to match against. |
| 1333 | probe_pos = next_probe_pos & LZ_DICT_SIZE_MASK; |
| 1334 | |
| 1335 | if self.read_as_u16(probe_pos + match_len as usize - 1) == c01 { |
| 1336 | break 'found; |
| 1337 | } |
| 1338 | } |
| 1339 | } |
| 1340 | |
| 1341 | if dist == 0 { |
| 1342 | // We've looked through the whole match range, so return the best match we |
| 1343 | // found. |
| 1344 | return (match_dist, match_len); |
| 1345 | } |
| 1346 | |
| 1347 | // Check if the two first bytes match. |
| 1348 | if self.read_as_u16(probe_pos) != s01 { |
| 1349 | continue; |
| 1350 | } |
| 1351 | |
| 1352 | let mut p = pos + 2; |
| 1353 | let mut q = probe_pos + 2; |
| 1354 | // The first two bytes matched, so check the full length of the match. |
| 1355 | for _ in 0..32 { |
| 1356 | let p_data: u64 = self.read_unaligned_u64(p); |
| 1357 | let q_data: u64 = self.read_unaligned_u64(q); |
| 1358 | // Compare of 8 bytes at a time by using unaligned loads of 64-bit integers. |
| 1359 | let xor_data = p_data ^ q_data; |
| 1360 | if xor_data == 0 { |
| 1361 | p += 8; |
| 1362 | q += 8; |
| 1363 | } else { |
| 1364 | // If not all of the last 8 bytes matched, check how may of them did. |
| 1365 | let trailing = xor_data.trailing_zeros(); |
| 1366 | |
| 1367 | let probe_len = p - pos + (trailing as usize >> 3); |
| 1368 | if probe_len > match_len as usize { |
| 1369 | match_dist = dist as u32; |
| 1370 | match_len = cmp::min(max_match_len, probe_len as u32); |
| 1371 | if match_len == max_match_len { |
| 1372 | // We found a match that had the maximum allowed length, |
| 1373 | // so there is now point searching further. |
| 1374 | return (match_dist, match_len); |
| 1375 | } |
| 1376 | // We found a better match, so save the last two bytes for further match |
| 1377 | // comparisons. |
| 1378 | c01 = self.read_as_u16(pos + match_len as usize - 1) |
| 1379 | } |
| 1380 | continue 'outer; |
| 1381 | } |
| 1382 | } |
| 1383 | |
| 1384 | return (dist as u32, cmp::min(max_match_len, MAX_MATCH_LEN as u32)); |
| 1385 | } |
| 1386 | } |
| 1387 | } |
| 1388 | |
| 1389 | struct ParamsOxide { |
| 1390 | pub flags: u32, |
| 1391 | pub greedy_parsing: bool, |
| 1392 | pub block_index: u32, |
| 1393 | |
| 1394 | pub saved_match_dist: u32, |
| 1395 | pub saved_match_len: u32, |
| 1396 | pub saved_lit: u8, |
| 1397 | |
| 1398 | pub flush: TDEFLFlush, |
| 1399 | pub flush_ofs: u32, |
| 1400 | pub flush_remaining: u32, |
| 1401 | pub finished: bool, |
| 1402 | |
| 1403 | pub adler32: u32, |
| 1404 | |
| 1405 | pub src_pos: usize, |
| 1406 | |
| 1407 | pub out_buf_ofs: usize, |
| 1408 | pub prev_return_status: TDEFLStatus, |
| 1409 | |
| 1410 | pub saved_bit_buffer: u32, |
| 1411 | pub saved_bits_in: u32, |
| 1412 | |
| 1413 | pub local_buf: Box<LocalBuf>, |
| 1414 | } |
| 1415 | |
| 1416 | impl ParamsOxide { |
| 1417 | fn new(flags: u32) -> Self { |
| 1418 | ParamsOxide { |
| 1419 | flags, |
| 1420 | greedy_parsing: flags & TDEFL_GREEDY_PARSING_FLAG != 0, |
| 1421 | block_index: 0, |
| 1422 | saved_match_dist: 0, |
| 1423 | saved_match_len: 0, |
| 1424 | saved_lit: 0, |
| 1425 | flush: TDEFLFlush::None, |
| 1426 | flush_ofs: 0, |
| 1427 | flush_remaining: 0, |
| 1428 | finished: false, |
| 1429 | adler32: MZ_ADLER32_INIT, |
| 1430 | src_pos: 0, |
| 1431 | out_buf_ofs: 0, |
| 1432 | prev_return_status: TDEFLStatus::Okay, |
| 1433 | saved_bit_buffer: 0, |
| 1434 | saved_bits_in: 0, |
| 1435 | local_buf: Box::default(), |
| 1436 | } |
| 1437 | } |
| 1438 | |
| 1439 | fn update_flags(&mut self, flags: u32) { |
| 1440 | self.flags = flags; |
| 1441 | self.greedy_parsing = self.flags & TDEFL_GREEDY_PARSING_FLAG != 0; |
| 1442 | } |
| 1443 | |
| 1444 | /// Reset state, saving settings. |
| 1445 | fn reset(&mut self) { |
| 1446 | self.block_index = 0; |
| 1447 | self.saved_match_len = 0; |
| 1448 | self.saved_match_dist = 0; |
| 1449 | self.saved_lit = 0; |
| 1450 | self.flush = TDEFLFlush::None; |
| 1451 | self.flush_ofs = 0; |
| 1452 | self.flush_remaining = 0; |
| 1453 | self.finished = false; |
| 1454 | self.adler32 = MZ_ADLER32_INIT; |
| 1455 | self.src_pos = 0; |
| 1456 | self.out_buf_ofs = 0; |
| 1457 | self.prev_return_status = TDEFLStatus::Okay; |
| 1458 | self.saved_bit_buffer = 0; |
| 1459 | self.saved_bits_in = 0; |
| 1460 | self.local_buf.b = [0; OUT_BUF_SIZE]; |
| 1461 | } |
| 1462 | } |
| 1463 | |
| 1464 | struct LZOxide { |
| 1465 | pub codes: [u8; LZ_CODE_BUF_SIZE], |
| 1466 | pub code_position: usize, |
| 1467 | pub flag_position: usize, |
| 1468 | |
| 1469 | // The total number of bytes in the current block. |
| 1470 | pub total_bytes: u32, |
| 1471 | pub num_flags_left: u32, |
| 1472 | } |
| 1473 | |
| 1474 | impl LZOxide { |
| 1475 | const fn new() -> Self { |
| 1476 | LZOxide { |
| 1477 | codes: [0; LZ_CODE_BUF_SIZE], |
| 1478 | code_position: 1, |
| 1479 | flag_position: 0, |
| 1480 | total_bytes: 0, |
| 1481 | num_flags_left: 8, |
| 1482 | } |
| 1483 | } |
| 1484 | |
| 1485 | fn write_code(&mut self, val: u8) { |
| 1486 | // Perf - go via u16 to help evade bounds check |
| 1487 | // TODO: see if we can use u16 for flag_position in general. |
| 1488 | self.codes[usize::from(self.code_position as u16)] = val; |
| 1489 | self.code_position += 1; |
| 1490 | } |
| 1491 | |
| 1492 | fn init_flag(&mut self) { |
| 1493 | if self.num_flags_left == 8 { |
| 1494 | *self.get_flag() = 0; |
| 1495 | self.code_position -= 1; |
| 1496 | } else { |
| 1497 | *self.get_flag() >>= self.num_flags_left; |
| 1498 | } |
| 1499 | } |
| 1500 | |
| 1501 | fn get_flag(&mut self) -> &mut u8 { |
| 1502 | // Perf - go via u16 to help evade bounds check |
| 1503 | // TODO: see if we can use u16 for flag_position in general. |
| 1504 | &mut self.codes[usize::from(self.flag_position as u16)] |
| 1505 | } |
| 1506 | |
| 1507 | fn plant_flag(&mut self) { |
| 1508 | self.flag_position = self.code_position; |
| 1509 | self.code_position += 1; |
| 1510 | } |
| 1511 | |
| 1512 | fn consume_flag(&mut self) { |
| 1513 | self.num_flags_left -= 1; |
| 1514 | if self.num_flags_left == 0 { |
| 1515 | self.num_flags_left = 8; |
| 1516 | self.plant_flag(); |
| 1517 | } |
| 1518 | } |
| 1519 | } |
| 1520 | |
| 1521 | fn compress_lz_codes( |
| 1522 | huff: &HuffmanOxide, |
| 1523 | output: &mut OutputBufferOxide, |
| 1524 | lz_code_buf: &[u8], |
| 1525 | ) -> Result<bool> { |
| 1526 | let mut flags = 1; |
| 1527 | let mut bb = BitBuffer { |
| 1528 | bit_buffer: u64::from(output.bit_buffer), |
| 1529 | bits_in: output.bits_in, |
| 1530 | }; |
| 1531 | |
| 1532 | let mut i: usize = 0; |
| 1533 | while i < lz_code_buf.len() { |
| 1534 | if flags == 1 { |
| 1535 | flags = u32::from(lz_code_buf[i]) | 0x100; |
| 1536 | i += 1; |
| 1537 | } |
| 1538 | |
| 1539 | // The lz code was a length code |
| 1540 | if flags & 1 == 1 { |
| 1541 | flags >>= 1; |
| 1542 | |
| 1543 | let sym; |
| 1544 | let num_extra_bits; |
| 1545 | |
| 1546 | let match_len = lz_code_buf[i] as usize; |
| 1547 | |
| 1548 | let match_dist = read_u16_le(lz_code_buf, i + 1); |
| 1549 | |
| 1550 | i += 3; |
| 1551 | |
| 1552 | debug_assert!(huff.code_sizes[0][LEN_SYM[match_len] as usize] != 0); |
| 1553 | bb.put_fast( |
| 1554 | u64::from(huff.codes[0][LEN_SYM[match_len] as usize]), |
| 1555 | u32::from(huff.code_sizes[0][LEN_SYM[match_len] as usize]), |
| 1556 | ); |
| 1557 | bb.put_fast( |
| 1558 | match_len as u64 & u64::from(BITMASKS[LEN_EXTRA[match_len] as usize]), |
| 1559 | u32::from(LEN_EXTRA[match_len]), |
| 1560 | ); |
| 1561 | |
| 1562 | if match_dist < 512 { |
| 1563 | sym = SMALL_DIST_SYM[match_dist as usize] as usize; |
| 1564 | num_extra_bits = SMALL_DIST_EXTRA[match_dist as usize] as usize; |
| 1565 | } else { |
| 1566 | sym = LARGE_DIST_SYM[(match_dist >> 8) as usize] as usize; |
| 1567 | num_extra_bits = LARGE_DIST_EXTRA[(match_dist >> 8) as usize] as usize; |
| 1568 | } |
| 1569 | |
| 1570 | debug_assert!(huff.code_sizes[1][sym] != 0); |
| 1571 | bb.put_fast( |
| 1572 | u64::from(huff.codes[1][sym]), |
| 1573 | u32::from(huff.code_sizes[1][sym]), |
| 1574 | ); |
| 1575 | bb.put_fast( |
| 1576 | u64::from(match_dist) & u64::from(BITMASKS[num_extra_bits]), |
| 1577 | num_extra_bits as u32, |
| 1578 | ); |
| 1579 | } else { |
| 1580 | // The lz code was a literal |
| 1581 | for _ in 0..3 { |
| 1582 | flags >>= 1; |
| 1583 | let lit = lz_code_buf[i]; |
| 1584 | i += 1; |
| 1585 | |
| 1586 | debug_assert!(huff.code_sizes[0][lit as usize] != 0); |
| 1587 | bb.put_fast( |
| 1588 | u64::from(huff.codes[0][lit as usize]), |
| 1589 | u32::from(huff.code_sizes[0][lit as usize]), |
| 1590 | ); |
| 1591 | |
| 1592 | if flags & 1 == 1 || i >= lz_code_buf.len() { |
| 1593 | break; |
| 1594 | } |
| 1595 | } |
| 1596 | } |
| 1597 | |
| 1598 | bb.flush(output)?; |
| 1599 | } |
| 1600 | |
| 1601 | output.bits_in = 0; |
| 1602 | output.bit_buffer = 0; |
| 1603 | while bb.bits_in != 0 { |
| 1604 | let n = cmp::min(bb.bits_in, 16); |
| 1605 | output.put_bits(bb.bit_buffer as u32 & BITMASKS[n as usize], n); |
| 1606 | bb.bit_buffer >>= n; |
| 1607 | bb.bits_in -= n; |
| 1608 | } |
| 1609 | |
| 1610 | // Output the end of block symbol. |
| 1611 | output.put_bits( |
| 1612 | u32::from(huff.codes[0][256]), |
| 1613 | u32::from(huff.code_sizes[0][256]), |
| 1614 | ); |
| 1615 | |
| 1616 | Ok(true) |
| 1617 | } |
| 1618 | |
| 1619 | fn compress_block( |
| 1620 | huff: &mut HuffmanOxide, |
| 1621 | output: &mut OutputBufferOxide, |
| 1622 | lz: &LZOxide, |
| 1623 | static_block: bool, |
| 1624 | ) -> Result<bool> { |
| 1625 | if static_block { |
| 1626 | huff.start_static_block(output); |
| 1627 | } else { |
| 1628 | huff.start_dynamic_block(output)?; |
| 1629 | } |
| 1630 | |
| 1631 | compress_lz_codes(huff, output, &lz.codes[..lz.code_position]) |
| 1632 | } |
| 1633 | |
| 1634 | fn flush_block( |
| 1635 | d: &mut CompressorOxide, |
| 1636 | callback: &mut CallbackOxide, |
| 1637 | flush: TDEFLFlush, |
| 1638 | ) -> Result<i32> { |
| 1639 | let mut saved_buffer; |
| 1640 | { |
| 1641 | let mut output = callback |
| 1642 | .out |
| 1643 | .new_output_buffer(&mut d.params.local_buf.b, d.params.out_buf_ofs); |
| 1644 | output.bit_buffer = d.params.saved_bit_buffer; |
| 1645 | output.bits_in = d.params.saved_bits_in; |
| 1646 | |
| 1647 | let use_raw_block = (d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0) |
| 1648 | && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos) <= d.dict.size; |
| 1649 | |
| 1650 | assert!(d.params.flush_remaining == 0); |
| 1651 | d.params.flush_ofs = 0; |
| 1652 | d.params.flush_remaining = 0; |
| 1653 | |
| 1654 | d.lz.init_flag(); |
| 1655 | |
| 1656 | // If we are at the start of the stream, write the zlib header if requested. |
| 1657 | if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 && d.params.block_index == 0 { |
| 1658 | let header = zlib::header_from_flags(d.params.flags); |
| 1659 | output.put_bits(header[0].into(), 8); |
| 1660 | output.put_bits(header[1].into(), 8); |
| 1661 | } |
| 1662 | |
| 1663 | // Output the block header. |
| 1664 | output.put_bits((flush == TDEFLFlush::Finish) as u32, 1); |
| 1665 | |
| 1666 | saved_buffer = output.save(); |
| 1667 | |
| 1668 | let comp_success = if !use_raw_block { |
| 1669 | let use_static = |
| 1670 | (d.params.flags & TDEFL_FORCE_ALL_STATIC_BLOCKS != 0) || (d.lz.total_bytes < 48); |
| 1671 | compress_block(&mut d.huff, &mut output, &d.lz, use_static)? |
| 1672 | } else { |
| 1673 | false |
| 1674 | }; |
| 1675 | |
| 1676 | // If we failed to compress anything and the output would take up more space than the output |
| 1677 | // data, output a stored block instead, which has at most 5 bytes of overhead. |
| 1678 | // We only use some simple heuristics for now. |
| 1679 | // A stored block will have an overhead of at least 4 bytes containing the block length |
| 1680 | // but usually more due to the length parameters having to start at a byte boundary and thus |
| 1681 | // requiring up to 5 bytes of padding. |
| 1682 | // As a static block will have an overhead of at most 1 bit per byte |
| 1683 | // (as literals are either 8 or 9 bytes), a raw block will |
| 1684 | // never take up less space if the number of input bytes are less than 32. |
| 1685 | let expanded = (d.lz.total_bytes > 32) |
| 1686 | && (output.inner_pos - saved_buffer.pos + 1 >= (d.lz.total_bytes as usize)) |
| 1687 | && (d.dict.lookahead_pos - d.dict.code_buf_dict_pos <= d.dict.size); |
| 1688 | |
| 1689 | if use_raw_block || expanded { |
| 1690 | output.load(saved_buffer); |
| 1691 | |
| 1692 | // Block header. |
| 1693 | output.put_bits(0, 2); |
| 1694 | |
| 1695 | // Block length has to start on a byte boundary, s opad. |
| 1696 | output.pad_to_bytes(); |
| 1697 | |
| 1698 | // Block length and ones complement of block length. |
| 1699 | output.put_bits(d.lz.total_bytes & 0xFFFF, 16); |
| 1700 | output.put_bits(!d.lz.total_bytes & 0xFFFF, 16); |
| 1701 | |
| 1702 | // Write the actual bytes. |
| 1703 | for i in 0..d.lz.total_bytes { |
| 1704 | let pos = (d.dict.code_buf_dict_pos + i as usize) & LZ_DICT_SIZE_MASK; |
| 1705 | output.put_bits(u32::from(d.dict.b.dict[pos]), 8); |
| 1706 | } |
| 1707 | } else if !comp_success { |
| 1708 | output.load(saved_buffer); |
| 1709 | compress_block(&mut d.huff, &mut output, &d.lz, true)?; |
| 1710 | } |
| 1711 | |
| 1712 | if flush != TDEFLFlush::None { |
| 1713 | if flush == TDEFLFlush::Finish { |
| 1714 | output.pad_to_bytes(); |
| 1715 | if d.params.flags & TDEFL_WRITE_ZLIB_HEADER != 0 { |
| 1716 | let mut adler = d.params.adler32; |
| 1717 | for _ in 0..4 { |
| 1718 | output.put_bits((adler >> 24) & 0xFF, 8); |
| 1719 | adler <<= 8; |
| 1720 | } |
| 1721 | } |
| 1722 | } else { |
| 1723 | // Sync or Full flush. |
| 1724 | // Output an empty raw block. |
| 1725 | output.put_bits(0, 3); |
| 1726 | output.pad_to_bytes(); |
| 1727 | output.put_bits(0, 16); |
| 1728 | output.put_bits(0xFFFF, 16); |
| 1729 | } |
| 1730 | } |
| 1731 | |
| 1732 | memset(&mut d.huff.count[0][..MAX_HUFF_SYMBOLS_0], 0); |
| 1733 | memset(&mut d.huff.count[1][..MAX_HUFF_SYMBOLS_1], 0); |
| 1734 | |
| 1735 | d.lz.code_position = 1; |
| 1736 | d.lz.flag_position = 0; |
| 1737 | d.lz.num_flags_left = 8; |
| 1738 | d.dict.code_buf_dict_pos += d.lz.total_bytes as usize; |
| 1739 | d.lz.total_bytes = 0; |
| 1740 | d.params.block_index += 1; |
| 1741 | |
| 1742 | saved_buffer = output.save(); |
| 1743 | |
| 1744 | d.params.saved_bit_buffer = saved_buffer.bit_buffer; |
| 1745 | d.params.saved_bits_in = saved_buffer.bits_in; |
| 1746 | } |
| 1747 | |
| 1748 | Ok(callback.flush_output(saved_buffer, &mut d.params)) |
| 1749 | } |
| 1750 | |
| 1751 | fn record_literal(h: &mut HuffmanOxide, lz: &mut LZOxide, lit: u8) { |
| 1752 | lz.total_bytes += 1; |
| 1753 | lz.write_code(val:lit); |
| 1754 | |
| 1755 | *lz.get_flag() >>= 1; |
| 1756 | lz.consume_flag(); |
| 1757 | |
| 1758 | h.count[0][lit as usize] += 1; |
| 1759 | } |
| 1760 | |
| 1761 | fn record_match(h: &mut HuffmanOxide, lz: &mut LZOxide, mut match_len: u32, mut match_dist: u32) { |
| 1762 | debug_assert!(match_len >= MIN_MATCH_LEN.into()); |
| 1763 | debug_assert!(match_dist >= 1); |
| 1764 | debug_assert!(match_dist as usize <= LZ_DICT_SIZE); |
| 1765 | |
| 1766 | lz.total_bytes += match_len; |
| 1767 | match_dist -= 1; |
| 1768 | match_len -= u32::from(MIN_MATCH_LEN); |
| 1769 | lz.write_code(match_len as u8); |
| 1770 | lz.write_code(match_dist as u8); |
| 1771 | lz.write_code((match_dist >> 8) as u8); |
| 1772 | |
| 1773 | *lz.get_flag() >>= 1; |
| 1774 | *lz.get_flag() |= 0x80; |
| 1775 | lz.consume_flag(); |
| 1776 | |
| 1777 | let symbol = if match_dist < 512 { |
| 1778 | SMALL_DIST_SYM[match_dist as usize] |
| 1779 | } else { |
| 1780 | LARGE_DIST_SYM[((match_dist >> 8) & 127) as usize] |
| 1781 | } as usize; |
| 1782 | h.count[1][symbol] += 1; |
| 1783 | // Perf - go via u8 to help optimize out bounds check. |
| 1784 | h.count[0][LEN_SYM[usize::from(match_len as u8)] as usize] += 1; |
| 1785 | } |
| 1786 | |
| 1787 | fn compress_normal(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { |
| 1788 | let mut src_pos = d.params.src_pos; |
| 1789 | let in_buf = match callback.in_buf { |
| 1790 | None => return true, |
| 1791 | Some(in_buf) => in_buf, |
| 1792 | }; |
| 1793 | |
| 1794 | let mut lookahead_size = d.dict.lookahead_size; |
| 1795 | let mut lookahead_pos = d.dict.lookahead_pos; |
| 1796 | let mut saved_lit = d.params.saved_lit; |
| 1797 | let mut saved_match_dist = d.params.saved_match_dist; |
| 1798 | let mut saved_match_len = d.params.saved_match_len; |
| 1799 | |
| 1800 | while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size != 0) { |
| 1801 | let src_buf_left = in_buf.len() - src_pos; |
| 1802 | let num_bytes_to_process = cmp::min(src_buf_left, MAX_MATCH_LEN - lookahead_size); |
| 1803 | |
| 1804 | if lookahead_size + d.dict.size >= usize::from(MIN_MATCH_LEN) - 1 |
| 1805 | && num_bytes_to_process > 0 |
| 1806 | { |
| 1807 | let dictb = &mut d.dict.b; |
| 1808 | |
| 1809 | let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; |
| 1810 | let mut ins_pos = lookahead_pos + lookahead_size - 2; |
| 1811 | // Start the hash value from the first two bytes |
| 1812 | let mut hash = update_hash( |
| 1813 | u16::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]), |
| 1814 | dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK], |
| 1815 | ); |
| 1816 | |
| 1817 | lookahead_size += num_bytes_to_process; |
| 1818 | |
| 1819 | for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { |
| 1820 | // Add byte to input buffer. |
| 1821 | dictb.dict[dst_pos] = c; |
| 1822 | if dst_pos < MAX_MATCH_LEN - 1 { |
| 1823 | dictb.dict[LZ_DICT_SIZE + dst_pos] = c; |
| 1824 | } |
| 1825 | |
| 1826 | // Generate hash from the current byte, |
| 1827 | hash = update_hash(hash, c); |
| 1828 | dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize]; |
| 1829 | // and insert it into the hash chain. |
| 1830 | dictb.hash[hash as usize] = ins_pos as u16; |
| 1831 | dst_pos = (dst_pos + 1) & LZ_DICT_SIZE_MASK; |
| 1832 | ins_pos += 1; |
| 1833 | } |
| 1834 | src_pos += num_bytes_to_process; |
| 1835 | } else { |
| 1836 | let dictb = &mut d.dict.b; |
| 1837 | for &c in &in_buf[src_pos..src_pos + num_bytes_to_process] { |
| 1838 | let dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; |
| 1839 | dictb.dict[dst_pos] = c; |
| 1840 | if dst_pos < MAX_MATCH_LEN - 1 { |
| 1841 | dictb.dict[LZ_DICT_SIZE + dst_pos] = c; |
| 1842 | } |
| 1843 | |
| 1844 | lookahead_size += 1; |
| 1845 | if lookahead_size + d.dict.size >= MIN_MATCH_LEN.into() { |
| 1846 | let ins_pos = lookahead_pos + lookahead_size - 3; |
| 1847 | let hash = ((u32::from(dictb.dict[ins_pos & LZ_DICT_SIZE_MASK]) |
| 1848 | << (LZ_HASH_SHIFT * 2)) |
| 1849 | ^ ((u32::from(dictb.dict[(ins_pos + 1) & LZ_DICT_SIZE_MASK]) |
| 1850 | << LZ_HASH_SHIFT) |
| 1851 | ^ u32::from(c))) |
| 1852 | & (LZ_HASH_SIZE as u32 - 1); |
| 1853 | |
| 1854 | dictb.next[ins_pos & LZ_DICT_SIZE_MASK] = dictb.hash[hash as usize]; |
| 1855 | dictb.hash[hash as usize] = ins_pos as u16; |
| 1856 | } |
| 1857 | } |
| 1858 | |
| 1859 | src_pos += num_bytes_to_process; |
| 1860 | } |
| 1861 | |
| 1862 | d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); |
| 1863 | if d.params.flush == TDEFLFlush::None && lookahead_size < MAX_MATCH_LEN { |
| 1864 | break; |
| 1865 | } |
| 1866 | |
| 1867 | let mut len_to_move = 1; |
| 1868 | let mut cur_match_dist = 0; |
| 1869 | let mut cur_match_len = if saved_match_len != 0 { |
| 1870 | saved_match_len |
| 1871 | } else { |
| 1872 | u32::from(MIN_MATCH_LEN) - 1 |
| 1873 | }; |
| 1874 | let cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; |
| 1875 | if d.params.flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS) != 0 { |
| 1876 | // If TDEFL_RLE_MATCHES is set, we only look for repeating sequences of the current byte. |
| 1877 | if d.dict.size != 0 && d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS == 0 { |
| 1878 | let c = d.dict.b.dict[(cur_pos.wrapping_sub(1)) & LZ_DICT_SIZE_MASK]; |
| 1879 | cur_match_len = d.dict.b.dict[cur_pos..(cur_pos + lookahead_size)] |
| 1880 | .iter() |
| 1881 | .take_while(|&x| *x == c) |
| 1882 | .count() as u32; |
| 1883 | if cur_match_len < MIN_MATCH_LEN.into() { |
| 1884 | cur_match_len = 0 |
| 1885 | } else { |
| 1886 | cur_match_dist = 1 |
| 1887 | } |
| 1888 | } |
| 1889 | } else { |
| 1890 | // Try to find a match for the bytes at the current position. |
| 1891 | let dist_len = d.dict.find_match( |
| 1892 | lookahead_pos, |
| 1893 | d.dict.size, |
| 1894 | lookahead_size as u32, |
| 1895 | cur_match_dist, |
| 1896 | cur_match_len, |
| 1897 | ); |
| 1898 | cur_match_dist = dist_len.0; |
| 1899 | cur_match_len = dist_len.1; |
| 1900 | } |
| 1901 | |
| 1902 | let far_and_small = cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024; |
| 1903 | let filter_small = d.params.flags & TDEFL_FILTER_MATCHES != 0 && cur_match_len <= 5; |
| 1904 | if far_and_small || filter_small || cur_pos == cur_match_dist as usize { |
| 1905 | cur_match_dist = 0; |
| 1906 | cur_match_len = 0; |
| 1907 | } |
| 1908 | |
| 1909 | if saved_match_len != 0 { |
| 1910 | if cur_match_len > saved_match_len { |
| 1911 | record_literal(&mut d.huff, &mut d.lz, saved_lit); |
| 1912 | if cur_match_len >= 128 { |
| 1913 | record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); |
| 1914 | saved_match_len = 0; |
| 1915 | len_to_move = cur_match_len as usize; |
| 1916 | } else { |
| 1917 | saved_lit = d.dict.b.dict[cur_pos]; |
| 1918 | saved_match_dist = cur_match_dist; |
| 1919 | saved_match_len = cur_match_len; |
| 1920 | } |
| 1921 | } else { |
| 1922 | record_match(&mut d.huff, &mut d.lz, saved_match_len, saved_match_dist); |
| 1923 | len_to_move = (saved_match_len - 1) as usize; |
| 1924 | saved_match_len = 0; |
| 1925 | } |
| 1926 | } else if cur_match_dist == 0 { |
| 1927 | record_literal( |
| 1928 | &mut d.huff, |
| 1929 | &mut d.lz, |
| 1930 | d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)], |
| 1931 | ); |
| 1932 | } else if d.params.greedy_parsing |
| 1933 | || (d.params.flags & TDEFL_RLE_MATCHES != 0) |
| 1934 | || cur_match_len >= 128 |
| 1935 | { |
| 1936 | // If we are using lazy matching, check for matches at the next byte if the current |
| 1937 | // match was shorter than 128 bytes. |
| 1938 | record_match(&mut d.huff, &mut d.lz, cur_match_len, cur_match_dist); |
| 1939 | len_to_move = cur_match_len as usize; |
| 1940 | } else { |
| 1941 | saved_lit = d.dict.b.dict[cmp::min(cur_pos, d.dict.b.dict.len() - 1)]; |
| 1942 | saved_match_dist = cur_match_dist; |
| 1943 | saved_match_len = cur_match_len; |
| 1944 | } |
| 1945 | |
| 1946 | lookahead_pos += len_to_move; |
| 1947 | assert!(lookahead_size >= len_to_move); |
| 1948 | lookahead_size -= len_to_move; |
| 1949 | d.dict.size = cmp::min(d.dict.size + len_to_move, LZ_DICT_SIZE); |
| 1950 | |
| 1951 | let lz_buf_tight = d.lz.code_position > LZ_CODE_BUF_SIZE - 8; |
| 1952 | let raw = d.params.flags & TDEFL_FORCE_ALL_RAW_BLOCKS != 0; |
| 1953 | let fat = ((d.lz.code_position * 115) >> 7) >= d.lz.total_bytes as usize; |
| 1954 | let fat_or_raw = (d.lz.total_bytes > 31 * 1024) && (fat || raw); |
| 1955 | |
| 1956 | if lz_buf_tight || fat_or_raw { |
| 1957 | d.params.src_pos = src_pos; |
| 1958 | // These values are used in flush_block, so we need to write them back here. |
| 1959 | d.dict.lookahead_size = lookahead_size; |
| 1960 | d.dict.lookahead_pos = lookahead_pos; |
| 1961 | |
| 1962 | let n = flush_block(d, callback, TDEFLFlush::None) |
| 1963 | .unwrap_or(TDEFLStatus::PutBufFailed as i32); |
| 1964 | if n != 0 { |
| 1965 | d.params.saved_lit = saved_lit; |
| 1966 | d.params.saved_match_dist = saved_match_dist; |
| 1967 | d.params.saved_match_len = saved_match_len; |
| 1968 | return n > 0; |
| 1969 | } |
| 1970 | } |
| 1971 | } |
| 1972 | |
| 1973 | d.params.src_pos = src_pos; |
| 1974 | d.dict.lookahead_size = lookahead_size; |
| 1975 | d.dict.lookahead_pos = lookahead_pos; |
| 1976 | d.params.saved_lit = saved_lit; |
| 1977 | d.params.saved_match_dist = saved_match_dist; |
| 1978 | d.params.saved_match_len = saved_match_len; |
| 1979 | true |
| 1980 | } |
| 1981 | |
| 1982 | const COMP_FAST_LOOKAHEAD_SIZE: usize = 4096; |
| 1983 | |
| 1984 | fn compress_fast(d: &mut CompressorOxide, callback: &mut CallbackOxide) -> bool { |
| 1985 | let mut src_pos = d.params.src_pos; |
| 1986 | let mut lookahead_size = d.dict.lookahead_size; |
| 1987 | let mut lookahead_pos = d.dict.lookahead_pos; |
| 1988 | |
| 1989 | let mut cur_pos = lookahead_pos & LZ_DICT_SIZE_MASK; |
| 1990 | let in_buf = match callback.in_buf { |
| 1991 | None => return true, |
| 1992 | Some(in_buf) => in_buf, |
| 1993 | }; |
| 1994 | |
| 1995 | debug_assert!(d.lz.code_position < LZ_CODE_BUF_SIZE - 2); |
| 1996 | |
| 1997 | while src_pos < in_buf.len() || (d.params.flush != TDEFLFlush::None && lookahead_size > 0) { |
| 1998 | let mut dst_pos = (lookahead_pos + lookahead_size) & LZ_DICT_SIZE_MASK; |
| 1999 | let mut num_bytes_to_process = cmp::min( |
| 2000 | in_buf.len() - src_pos, |
| 2001 | COMP_FAST_LOOKAHEAD_SIZE - lookahead_size, |
| 2002 | ); |
| 2003 | lookahead_size += num_bytes_to_process; |
| 2004 | |
| 2005 | while num_bytes_to_process != 0 { |
| 2006 | let n = cmp::min(LZ_DICT_SIZE - dst_pos, num_bytes_to_process); |
| 2007 | d.dict.b.dict[dst_pos..dst_pos + n].copy_from_slice(&in_buf[src_pos..src_pos + n]); |
| 2008 | |
| 2009 | if dst_pos < MAX_MATCH_LEN - 1 { |
| 2010 | let m = cmp::min(n, MAX_MATCH_LEN - 1 - dst_pos); |
| 2011 | d.dict.b.dict[dst_pos + LZ_DICT_SIZE..dst_pos + LZ_DICT_SIZE + m] |
| 2012 | .copy_from_slice(&in_buf[src_pos..src_pos + m]); |
| 2013 | } |
| 2014 | |
| 2015 | src_pos += n; |
| 2016 | dst_pos = (dst_pos + n) & LZ_DICT_SIZE_MASK; |
| 2017 | num_bytes_to_process -= n; |
| 2018 | } |
| 2019 | |
| 2020 | d.dict.size = cmp::min(LZ_DICT_SIZE - lookahead_size, d.dict.size); |
| 2021 | if d.params.flush == TDEFLFlush::None && lookahead_size < COMP_FAST_LOOKAHEAD_SIZE { |
| 2022 | break; |
| 2023 | } |
| 2024 | |
| 2025 | while lookahead_size >= 4 { |
| 2026 | let mut cur_match_len = 1; |
| 2027 | |
| 2028 | let first_trigram = d.dict.read_unaligned_u32(cur_pos) & 0xFF_FFFF; |
| 2029 | |
| 2030 | let hash = (first_trigram ^ (first_trigram >> (24 - (LZ_HASH_BITS - 8)))) |
| 2031 | & LEVEL1_HASH_SIZE_MASK; |
| 2032 | |
| 2033 | let mut probe_pos = usize::from(d.dict.b.hash[hash as usize]); |
| 2034 | d.dict.b.hash[hash as usize] = lookahead_pos as u16; |
| 2035 | |
| 2036 | let mut cur_match_dist = (lookahead_pos - probe_pos) as u16; |
| 2037 | if cur_match_dist as usize <= d.dict.size { |
| 2038 | probe_pos &= LZ_DICT_SIZE_MASK; |
| 2039 | |
| 2040 | let trigram = d.dict.read_unaligned_u32(probe_pos) & 0xFF_FFFF; |
| 2041 | |
| 2042 | if first_trigram == trigram { |
| 2043 | // Trigram was tested, so we can start with "+ 3" displacement. |
| 2044 | let mut p = cur_pos + 3; |
| 2045 | let mut q = probe_pos + 3; |
| 2046 | cur_match_len = (|| { |
| 2047 | for _ in 0..32 { |
| 2048 | let p_data: u64 = d.dict.read_unaligned_u64(p); |
| 2049 | let q_data: u64 = d.dict.read_unaligned_u64(q); |
| 2050 | let xor_data = p_data ^ q_data; |
| 2051 | if xor_data == 0 { |
| 2052 | p += 8; |
| 2053 | q += 8; |
| 2054 | } else { |
| 2055 | let trailing = xor_data.trailing_zeros(); |
| 2056 | return p as u32 - cur_pos as u32 + (trailing >> 3); |
| 2057 | } |
| 2058 | } |
| 2059 | |
| 2060 | if cur_match_dist == 0 { |
| 2061 | 0 |
| 2062 | } else { |
| 2063 | MAX_MATCH_LEN as u32 |
| 2064 | } |
| 2065 | })(); |
| 2066 | |
| 2067 | if cur_match_len < MIN_MATCH_LEN.into() |
| 2068 | || (cur_match_len == MIN_MATCH_LEN.into() && cur_match_dist >= 8 * 1024) |
| 2069 | { |
| 2070 | let lit = first_trigram as u8; |
| 2071 | cur_match_len = 1; |
| 2072 | d.lz.write_code(lit); |
| 2073 | *d.lz.get_flag() >>= 1; |
| 2074 | d.huff.count[0][lit as usize] += 1; |
| 2075 | } else { |
| 2076 | // Limit the match to the length of the lookahead so we don't create a match |
| 2077 | // that ends after the end of the input data. |
| 2078 | cur_match_len = cmp::min(cur_match_len, lookahead_size as u32); |
| 2079 | debug_assert!(cur_match_len >= MIN_MATCH_LEN.into()); |
| 2080 | debug_assert!(cur_match_dist >= 1); |
| 2081 | debug_assert!(cur_match_dist as usize <= LZ_DICT_SIZE); |
| 2082 | cur_match_dist -= 1; |
| 2083 | |
| 2084 | d.lz.write_code((cur_match_len - u32::from(MIN_MATCH_LEN)) as u8); |
| 2085 | d.lz.write_code(cur_match_dist as u8); |
| 2086 | d.lz.write_code((cur_match_dist >> 8) as u8); |
| 2087 | |
| 2088 | *d.lz.get_flag() >>= 1; |
| 2089 | *d.lz.get_flag() |= 0x80; |
| 2090 | if cur_match_dist < 512 { |
| 2091 | d.huff.count[1][SMALL_DIST_SYM[cur_match_dist as usize] as usize] += 1; |
| 2092 | } else { |
| 2093 | d.huff.count[1] |
| 2094 | [LARGE_DIST_SYM[(cur_match_dist >> 8) as usize] as usize] += 1; |
| 2095 | } |
| 2096 | |
| 2097 | d.huff.count[0][LEN_SYM[(cur_match_len - u32::from(MIN_MATCH_LEN)) as usize] |
| 2098 | as usize] += 1; |
| 2099 | } |
| 2100 | } else { |
| 2101 | d.lz.write_code(first_trigram as u8); |
| 2102 | *d.lz.get_flag() >>= 1; |
| 2103 | d.huff.count[0][first_trigram as u8 as usize] += 1; |
| 2104 | } |
| 2105 | |
| 2106 | d.lz.consume_flag(); |
| 2107 | d.lz.total_bytes += cur_match_len; |
| 2108 | lookahead_pos += cur_match_len as usize; |
| 2109 | d.dict.size = cmp::min(d.dict.size + cur_match_len as usize, LZ_DICT_SIZE); |
| 2110 | cur_pos = (cur_pos + cur_match_len as usize) & LZ_DICT_SIZE_MASK; |
| 2111 | lookahead_size -= cur_match_len as usize; |
| 2112 | |
| 2113 | if d.lz.code_position > LZ_CODE_BUF_SIZE - 8 { |
| 2114 | // These values are used in flush_block, so we need to write them back here. |
| 2115 | d.dict.lookahead_size = lookahead_size; |
| 2116 | d.dict.lookahead_pos = lookahead_pos; |
| 2117 | |
| 2118 | let n = match flush_block(d, callback, TDEFLFlush::None) { |
| 2119 | Err(_) => { |
| 2120 | d.params.src_pos = src_pos; |
| 2121 | d.params.prev_return_status = TDEFLStatus::PutBufFailed; |
| 2122 | return false; |
| 2123 | } |
| 2124 | Ok(status) => status, |
| 2125 | }; |
| 2126 | if n != 0 { |
| 2127 | d.params.src_pos = src_pos; |
| 2128 | return n > 0; |
| 2129 | } |
| 2130 | debug_assert!(d.lz.code_position < LZ_CODE_BUF_SIZE - 2); |
| 2131 | |
| 2132 | lookahead_size = d.dict.lookahead_size; |
| 2133 | lookahead_pos = d.dict.lookahead_pos; |
| 2134 | } |
| 2135 | } |
| 2136 | } |
| 2137 | |
| 2138 | while lookahead_size != 0 { |
| 2139 | let lit = d.dict.b.dict[cur_pos]; |
| 2140 | d.lz.total_bytes += 1; |
| 2141 | d.lz.write_code(lit); |
| 2142 | *d.lz.get_flag() >>= 1; |
| 2143 | d.lz.consume_flag(); |
| 2144 | |
| 2145 | d.huff.count[0][lit as usize] += 1; |
| 2146 | lookahead_pos += 1; |
| 2147 | d.dict.size = cmp::min(d.dict.size + 1, LZ_DICT_SIZE); |
| 2148 | cur_pos = (cur_pos + 1) & LZ_DICT_SIZE_MASK; |
| 2149 | lookahead_size -= 1; |
| 2150 | |
| 2151 | if d.lz.code_position > LZ_CODE_BUF_SIZE - 8 { |
| 2152 | // These values are used in flush_block, so we need to write them back here. |
| 2153 | d.dict.lookahead_size = lookahead_size; |
| 2154 | d.dict.lookahead_pos = lookahead_pos; |
| 2155 | |
| 2156 | let n = match flush_block(d, callback, TDEFLFlush::None) { |
| 2157 | Err(_) => { |
| 2158 | d.params.prev_return_status = TDEFLStatus::PutBufFailed; |
| 2159 | d.params.src_pos = src_pos; |
| 2160 | return false; |
| 2161 | } |
| 2162 | Ok(status) => status, |
| 2163 | }; |
| 2164 | if n != 0 { |
| 2165 | d.params.src_pos = src_pos; |
| 2166 | return n > 0; |
| 2167 | } |
| 2168 | |
| 2169 | lookahead_size = d.dict.lookahead_size; |
| 2170 | lookahead_pos = d.dict.lookahead_pos; |
| 2171 | } |
| 2172 | } |
| 2173 | } |
| 2174 | |
| 2175 | d.params.src_pos = src_pos; |
| 2176 | d.dict.lookahead_size = lookahead_size; |
| 2177 | d.dict.lookahead_pos = lookahead_pos; |
| 2178 | true |
| 2179 | } |
| 2180 | |
| 2181 | fn flush_output_buffer(c: &mut CallbackOxide, p: &mut ParamsOxide) -> (TDEFLStatus, usize, usize) { |
| 2182 | let mut res: (TDEFLStatus, usize, usize) = (TDEFLStatus::Okay, p.src_pos, 0); |
| 2183 | if let CallbackOut::Buf(ref mut cb: &mut CallbackBuf<'_>) = c.out { |
| 2184 | let n: usize = cmp::min(v1:cb.out_buf.len() - p.out_buf_ofs, v2:p.flush_remaining as usize); |
| 2185 | if n != 0 { |
| 2186 | cb.out_buf[p.out_buf_ofs..p.out_buf_ofs + n] |
| 2187 | .copy_from_slice(&p.local_buf.b[p.flush_ofs as usize..p.flush_ofs as usize + n]); |
| 2188 | } |
| 2189 | p.flush_ofs += n as u32; |
| 2190 | p.flush_remaining -= n as u32; |
| 2191 | p.out_buf_ofs += n; |
| 2192 | res.2 = p.out_buf_ofs; |
| 2193 | } |
| 2194 | |
| 2195 | if p.finished && p.flush_remaining == 0 { |
| 2196 | res.0 = TDEFLStatus::Done |
| 2197 | } |
| 2198 | res |
| 2199 | } |
| 2200 | |
| 2201 | /// Main compression function. Tries to compress as much as possible from `in_buf` and |
| 2202 | /// puts compressed output into `out_buf`. |
| 2203 | /// |
| 2204 | /// The value of `flush` determines if the compressor should attempt to flush all output |
| 2205 | /// and alternatively try to finish the stream. |
| 2206 | /// |
| 2207 | /// Use [`TDEFLFlush::Finish`] on the final call to signal that the stream is finishing. |
| 2208 | /// |
| 2209 | /// Note that this function does not keep track of whether a flush marker has been output, so |
| 2210 | /// if called using [`TDEFLFlush::Sync`], the caller needs to ensure there is enough space in the |
| 2211 | /// output buffer if they want to avoid repeated flush markers. |
| 2212 | /// See #105 for details. |
| 2213 | /// |
| 2214 | /// # Returns |
| 2215 | /// Returns a tuple containing the current status of the compressor, the current position |
| 2216 | /// in the input buffer and the current position in the output buffer. |
| 2217 | pub fn compress( |
| 2218 | d: &mut CompressorOxide, |
| 2219 | in_buf: &[u8], |
| 2220 | out_buf: &mut [u8], |
| 2221 | flush: TDEFLFlush, |
| 2222 | ) -> (TDEFLStatus, usize, usize) { |
| 2223 | compress_inner( |
| 2224 | d, |
| 2225 | &mut CallbackOxide::new_callback_buf(in_buf, out_buf), |
| 2226 | flush, |
| 2227 | ) |
| 2228 | } |
| 2229 | |
| 2230 | /// Main compression function. Callbacks output. |
| 2231 | /// |
| 2232 | /// # Returns |
| 2233 | /// Returns a tuple containing the current status of the compressor, the current position |
| 2234 | /// in the input buffer. |
| 2235 | /// |
| 2236 | /// The caller is responsible for ensuring the `CallbackFunc` struct will not cause undefined |
| 2237 | /// behaviour. |
| 2238 | pub fn compress_to_output( |
| 2239 | d: &mut CompressorOxide, |
| 2240 | in_buf: &[u8], |
| 2241 | flush: TDEFLFlush, |
| 2242 | mut callback_func: impl FnMut(&[u8]) -> bool, |
| 2243 | ) -> (TDEFLStatus, usize) { |
| 2244 | let res: (TDEFLStatus, usize, usize) = compress_inner( |
| 2245 | d, |
| 2246 | &mut CallbackOxide::new_callback_func( |
| 2247 | in_buf, |
| 2248 | CallbackFunc { |
| 2249 | put_buf_func: &mut callback_func, |
| 2250 | }, |
| 2251 | ), |
| 2252 | flush, |
| 2253 | ); |
| 2254 | |
| 2255 | (res.0, res.1) |
| 2256 | } |
| 2257 | |
| 2258 | fn compress_inner( |
| 2259 | d: &mut CompressorOxide, |
| 2260 | callback: &mut CallbackOxide, |
| 2261 | flush: TDEFLFlush, |
| 2262 | ) -> (TDEFLStatus, usize, usize) { |
| 2263 | d.params.out_buf_ofs = 0; |
| 2264 | d.params.src_pos = 0; |
| 2265 | |
| 2266 | let prev_ok = d.params.prev_return_status == TDEFLStatus::Okay; |
| 2267 | let flush_finish_once = d.params.flush != TDEFLFlush::Finish || flush == TDEFLFlush::Finish; |
| 2268 | |
| 2269 | d.params.flush = flush; |
| 2270 | if !prev_ok || !flush_finish_once { |
| 2271 | d.params.prev_return_status = TDEFLStatus::BadParam; |
| 2272 | return (d.params.prev_return_status, 0, 0); |
| 2273 | } |
| 2274 | |
| 2275 | if d.params.flush_remaining != 0 || d.params.finished { |
| 2276 | let res = flush_output_buffer(callback, &mut d.params); |
| 2277 | d.params.prev_return_status = res.0; |
| 2278 | return res; |
| 2279 | } |
| 2280 | |
| 2281 | let one_probe = d.params.flags & MAX_PROBES_MASK as u32 == 1; |
| 2282 | let greedy = d.params.flags & TDEFL_GREEDY_PARSING_FLAG != 0; |
| 2283 | let filter_or_rle_or_raw = d.params.flags |
| 2284 | & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES) |
| 2285 | != 0; |
| 2286 | |
| 2287 | let compress_success = if one_probe && greedy && !filter_or_rle_or_raw { |
| 2288 | compress_fast(d, callback) |
| 2289 | } else { |
| 2290 | compress_normal(d, callback) |
| 2291 | }; |
| 2292 | |
| 2293 | if !compress_success { |
| 2294 | return ( |
| 2295 | d.params.prev_return_status, |
| 2296 | d.params.src_pos, |
| 2297 | d.params.out_buf_ofs, |
| 2298 | ); |
| 2299 | } |
| 2300 | |
| 2301 | if let Some(in_buf) = callback.in_buf { |
| 2302 | if d.params.flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32) != 0 { |
| 2303 | d.params.adler32 = update_adler32(d.params.adler32, &in_buf[..d.params.src_pos]); |
| 2304 | } |
| 2305 | } |
| 2306 | |
| 2307 | let flush_none = d.params.flush == TDEFLFlush::None; |
| 2308 | let in_left = callback.in_buf.map_or(0, |buf| buf.len()) - d.params.src_pos; |
| 2309 | let remaining = in_left != 0 || d.params.flush_remaining != 0; |
| 2310 | if !flush_none && d.dict.lookahead_size == 0 && !remaining { |
| 2311 | let flush = d.params.flush; |
| 2312 | match flush_block(d, callback, flush) { |
| 2313 | Err(_) => { |
| 2314 | d.params.prev_return_status = TDEFLStatus::PutBufFailed; |
| 2315 | return ( |
| 2316 | d.params.prev_return_status, |
| 2317 | d.params.src_pos, |
| 2318 | d.params.out_buf_ofs, |
| 2319 | ); |
| 2320 | } |
| 2321 | Ok(x) if x < 0 => { |
| 2322 | return ( |
| 2323 | d.params.prev_return_status, |
| 2324 | d.params.src_pos, |
| 2325 | d.params.out_buf_ofs, |
| 2326 | ) |
| 2327 | } |
| 2328 | _ => { |
| 2329 | d.params.finished = d.params.flush == TDEFLFlush::Finish; |
| 2330 | if d.params.flush == TDEFLFlush::Full { |
| 2331 | memset(&mut d.dict.b.hash[..], 0); |
| 2332 | memset(&mut d.dict.b.next[..], 0); |
| 2333 | d.dict.size = 0; |
| 2334 | } |
| 2335 | } |
| 2336 | } |
| 2337 | } |
| 2338 | |
| 2339 | let res = flush_output_buffer(callback, &mut d.params); |
| 2340 | d.params.prev_return_status = res.0; |
| 2341 | |
| 2342 | res |
| 2343 | } |
| 2344 | |
| 2345 | /// Create a set of compression flags using parameters used by zlib and other compressors. |
| 2346 | /// Mainly intended for use with transition from c libraries as it deals with raw integers. |
| 2347 | /// |
| 2348 | /// # Parameters |
| 2349 | /// `level` determines compression level. Clamped to maximum of 10. Negative values result in |
| 2350 | /// `CompressionLevel::DefaultLevel`. |
| 2351 | /// `window_bits`: Above 0, wraps the stream in a zlib wrapper, 0 or negative for a raw deflate |
| 2352 | /// stream. |
| 2353 | /// `strategy`: Sets the strategy if this conforms to any of the values in `CompressionStrategy`. |
| 2354 | /// |
| 2355 | /// # Notes |
| 2356 | /// This function may be removed or moved to the `miniz_oxide_c_api` in the future. |
| 2357 | pub fn create_comp_flags_from_zip_params(level: i32, window_bits: i32, strategy: i32) -> u32 { |
| 2358 | let num_probes = (if level >= 0 { |
| 2359 | cmp::min(10, level) |
| 2360 | } else { |
| 2361 | CompressionLevel::DefaultLevel as i32 |
| 2362 | }) as usize; |
| 2363 | let greedy = if level <= 3 { |
| 2364 | TDEFL_GREEDY_PARSING_FLAG |
| 2365 | } else { |
| 2366 | 0 |
| 2367 | }; |
| 2368 | let mut comp_flags = NUM_PROBES[num_probes] | greedy; |
| 2369 | |
| 2370 | if window_bits > 0 { |
| 2371 | comp_flags |= TDEFL_WRITE_ZLIB_HEADER; |
| 2372 | } |
| 2373 | |
| 2374 | if level == 0 { |
| 2375 | comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; |
| 2376 | } else if strategy == CompressionStrategy::Filtered as i32 { |
| 2377 | comp_flags |= TDEFL_FILTER_MATCHES; |
| 2378 | } else if strategy == CompressionStrategy::HuffmanOnly as i32 { |
| 2379 | comp_flags &= !MAX_PROBES_MASK as u32; |
| 2380 | } else if strategy == CompressionStrategy::Fixed as i32 { |
| 2381 | comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; |
| 2382 | } else if strategy == CompressionStrategy::RLE as i32 { |
| 2383 | comp_flags |= TDEFL_RLE_MATCHES; |
| 2384 | } |
| 2385 | |
| 2386 | comp_flags |
| 2387 | } |
| 2388 | |
| 2389 | #[cfg (test)] |
| 2390 | mod test { |
| 2391 | use super::{ |
| 2392 | compress_to_output, create_comp_flags_from_zip_params, read_u16_le, write_u16_le, |
| 2393 | CompressionStrategy, CompressorOxide, TDEFLFlush, TDEFLStatus, DEFAULT_FLAGS, |
| 2394 | MZ_DEFAULT_WINDOW_BITS, |
| 2395 | }; |
| 2396 | use crate::inflate::decompress_to_vec; |
| 2397 | use alloc::vec; |
| 2398 | |
| 2399 | #[test ] |
| 2400 | fn u16_to_slice() { |
| 2401 | let mut slice = [0, 0]; |
| 2402 | write_u16_le(2000, &mut slice, 0); |
| 2403 | assert_eq!(slice, [208, 7]); |
| 2404 | } |
| 2405 | |
| 2406 | #[test ] |
| 2407 | fn u16_from_slice() { |
| 2408 | let slice = [208, 7]; |
| 2409 | assert_eq!(read_u16_le(&slice, 0), 2000); |
| 2410 | } |
| 2411 | |
| 2412 | #[test ] |
| 2413 | fn compress_output() { |
| 2414 | assert_eq!( |
| 2415 | DEFAULT_FLAGS, |
| 2416 | create_comp_flags_from_zip_params( |
| 2417 | 4, |
| 2418 | MZ_DEFAULT_WINDOW_BITS, |
| 2419 | CompressionStrategy::Default as i32 |
| 2420 | ) |
| 2421 | ); |
| 2422 | |
| 2423 | let slice = [ |
| 2424 | 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, |
| 2425 | ]; |
| 2426 | let mut encoded = vec![]; |
| 2427 | let flags = create_comp_flags_from_zip_params(6, 0, 0); |
| 2428 | let mut d = CompressorOxide::new(flags); |
| 2429 | let (status, in_consumed) = |
| 2430 | compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { |
| 2431 | encoded.extend_from_slice(out); |
| 2432 | true |
| 2433 | }); |
| 2434 | |
| 2435 | assert_eq!(status, TDEFLStatus::Done); |
| 2436 | assert_eq!(in_consumed, slice.len()); |
| 2437 | |
| 2438 | let decoded = decompress_to_vec(&encoded[..]).unwrap(); |
| 2439 | assert_eq!(&decoded[..], &slice[..]); |
| 2440 | } |
| 2441 | |
| 2442 | #[test ] |
| 2443 | /// Check fast compress mode |
| 2444 | fn compress_fast() { |
| 2445 | let slice = [ |
| 2446 | 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 6, 1, 2, 3, 1, 2, 3, 2, 3, 1, 2, 3, |
| 2447 | ]; |
| 2448 | let mut encoded = vec![]; |
| 2449 | let flags = create_comp_flags_from_zip_params(1, 0, 0); |
| 2450 | let mut d = CompressorOxide::new(flags); |
| 2451 | let (status, in_consumed) = |
| 2452 | compress_to_output(&mut d, &slice, TDEFLFlush::Finish, |out: &[u8]| { |
| 2453 | encoded.extend_from_slice(out); |
| 2454 | true |
| 2455 | }); |
| 2456 | |
| 2457 | assert_eq!(status, TDEFLStatus::Done); |
| 2458 | assert_eq!(in_consumed, slice.len()); |
| 2459 | |
| 2460 | // Needs to be altered if algorithm improves. |
| 2461 | assert_eq!( |
| 2462 | &encoded[..], |
| 2463 | [99, 100, 98, 102, 1, 98, 48, 98, 3, 147, 204, 76, 204, 140, 76, 204, 0] |
| 2464 | ); |
| 2465 | |
| 2466 | let decoded = decompress_to_vec(&encoded[..]).unwrap(); |
| 2467 | assert_eq!(&decoded[..], &slice[..]); |
| 2468 | } |
| 2469 | } |
| 2470 | |