| 1 | use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY, SECONDARY_TABLE_ENTRY}; |
| 2 | |
| 3 | /// Return the next code, or if the codeword is already all ones (which is the final code), return |
| 4 | /// the same code again. |
| 5 | fn next_codeword(mut codeword: u16, table_size: u16) -> u16 { |
| 6 | if codeword == table_size - 1 { |
| 7 | return codeword; |
| 8 | } |
| 9 | |
| 10 | let adv: u32 = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros(); |
| 11 | let bit: u16 = 1 << adv; |
| 12 | codeword &= bit - 1; |
| 13 | codeword |= bit; |
| 14 | codeword |
| 15 | } |
| 16 | |
| 17 | #[allow (clippy::needless_range_loop)] |
| 18 | pub fn build_table( |
| 19 | lengths: &[u8], |
| 20 | entries: &[u32], |
| 21 | codes: &mut [u16], |
| 22 | primary_table: &mut [u32], |
| 23 | secondary_table: &mut Vec<u16>, |
| 24 | is_distance_table: bool, |
| 25 | double_literal: bool, |
| 26 | ) -> bool { |
| 27 | // Count the number of symbols with each code length. |
| 28 | let mut histogram = [0; 16]; |
| 29 | for &length in lengths { |
| 30 | histogram[length as usize] += 1; |
| 31 | } |
| 32 | |
| 33 | // Determine the maximum code length. |
| 34 | let mut max_length = 15; |
| 35 | while max_length > 1 && histogram[max_length] == 0 { |
| 36 | max_length -= 1; |
| 37 | } |
| 38 | |
| 39 | // Handle zero and one symbol huffman codes (which are only allowed for distance codes). |
| 40 | if is_distance_table { |
| 41 | if max_length == 0 { |
| 42 | primary_table.fill(0); |
| 43 | secondary_table.clear(); |
| 44 | return true; |
| 45 | } else if max_length == 1 && histogram[1] == 1 { |
| 46 | let symbol = lengths.iter().position(|&l| l == 1).unwrap(); |
| 47 | codes[symbol] = 0; |
| 48 | let entry = entries |
| 49 | .get(symbol) |
| 50 | .cloned() |
| 51 | .unwrap_or((symbol as u32) << 16) |
| 52 | | 1; |
| 53 | for chunk in primary_table.chunks_mut(2) { |
| 54 | chunk[0] = entry; |
| 55 | chunk[1] = 0; |
| 56 | } |
| 57 | return true; |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | // Sort symbols by code length. Given the histogram, we can determine the starting offset |
| 62 | // for each code length. |
| 63 | let mut offsets = [0; 16]; |
| 64 | let mut codespace_used = 0; |
| 65 | offsets[1] = histogram[0]; |
| 66 | for i in 1..max_length { |
| 67 | offsets[i + 1] = offsets[i] + histogram[i]; |
| 68 | codespace_used = (codespace_used << 1) + histogram[i]; |
| 69 | } |
| 70 | codespace_used = (codespace_used << 1) + histogram[max_length]; |
| 71 | |
| 72 | // Check that the provided lengths form a valid Huffman tree. |
| 73 | if codespace_used != (1 << max_length) { |
| 74 | return false; |
| 75 | } |
| 76 | |
| 77 | // Sort the symbols by code length. |
| 78 | let mut next_index = offsets; |
| 79 | let mut sorted_symbols = [0; 288]; |
| 80 | for symbol in 0..lengths.len() { |
| 81 | let length = lengths[symbol]; |
| 82 | sorted_symbols[next_index[length as usize]] = symbol; |
| 83 | next_index[length as usize] += 1; |
| 84 | } |
| 85 | |
| 86 | let mut codeword = 0u16; |
| 87 | let mut i = histogram[0]; |
| 88 | |
| 89 | // Populate the primary decoding table |
| 90 | let primary_table_bits = primary_table.len().ilog2() as usize; |
| 91 | let primary_table_mask = (1 << primary_table_bits) - 1; |
| 92 | for length in 1..=primary_table_bits { |
| 93 | let current_table_end = 1 << length; |
| 94 | |
| 95 | // Loop over all symbols with the current code length and set their table entries. |
| 96 | for _ in 0..histogram[length] { |
| 97 | let symbol = sorted_symbols[i]; |
| 98 | i += 1; |
| 99 | |
| 100 | primary_table[codeword as usize] = entries |
| 101 | .get(symbol) |
| 102 | .cloned() |
| 103 | .unwrap_or((symbol as u32) << 16) |
| 104 | | length as u32; |
| 105 | |
| 106 | codes[symbol] = codeword; |
| 107 | codeword = next_codeword(codeword, current_table_end as u16); |
| 108 | } |
| 109 | |
| 110 | if double_literal { |
| 111 | for len1 in 1..(length - 1) { |
| 112 | let len2 = length - len1; |
| 113 | for sym1_index in offsets[len1]..next_index[len1] { |
| 114 | for sym2_index in offsets[len2]..next_index[len2] { |
| 115 | let sym1 = sorted_symbols[sym1_index]; |
| 116 | let sym2 = sorted_symbols[sym2_index]; |
| 117 | if sym1 < 256 && sym2 < 256 { |
| 118 | let codeword1 = codes[sym1]; |
| 119 | let codeword2 = codes[sym2]; |
| 120 | let codeword = codeword1 | (codeword2 << len1); |
| 121 | let entry = (sym1 as u32) << 16 |
| 122 | | (sym2 as u32) << 24 |
| 123 | | LITERAL_ENTRY |
| 124 | | (2 << 8); |
| 125 | primary_table[codeword as usize] = entry | (length as u32); |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | // If we aren't at the maximum table size, double the size of the table. |
| 133 | if length < primary_table_bits { |
| 134 | primary_table.copy_within(0..current_table_end, current_table_end); |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | // Populate the secondary decoding table. |
| 139 | secondary_table.clear(); |
| 140 | if max_length > primary_table_bits { |
| 141 | let mut subtable_start = 0; |
| 142 | let mut subtable_prefix = !0; |
| 143 | for length in (primary_table_bits + 1)..=max_length { |
| 144 | let subtable_size = 1 << (length - primary_table_bits); |
| 145 | for _ in 0..histogram[length] { |
| 146 | // If the codeword's prefix doesn't match the current subtable, create a new |
| 147 | // subtable. |
| 148 | if codeword & primary_table_mask != subtable_prefix { |
| 149 | subtable_prefix = codeword & primary_table_mask; |
| 150 | subtable_start = secondary_table.len(); |
| 151 | primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) |
| 152 | | EXCEPTIONAL_ENTRY |
| 153 | | SECONDARY_TABLE_ENTRY |
| 154 | | (subtable_size as u32 - 1); |
| 155 | secondary_table.resize(subtable_start + subtable_size, 0); |
| 156 | } |
| 157 | |
| 158 | // Lookup the symbol. |
| 159 | let symbol = sorted_symbols[i]; |
| 160 | i += 1; |
| 161 | |
| 162 | // Insert the symbol into the secondary table and advance to the next codeword. |
| 163 | codes[symbol] = codeword; |
| 164 | secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] = |
| 165 | ((symbol as u16) << 4) | (length as u16); |
| 166 | codeword = next_codeword(codeword, 1 << length); |
| 167 | } |
| 168 | |
| 169 | // If there are more codes with the same subtable prefix, extend the subtable. |
| 170 | if length < max_length && codeword & primary_table_mask == subtable_prefix { |
| 171 | secondary_table.extend_from_within(subtable_start..); |
| 172 | let subtable_size = secondary_table.len() - subtable_start; |
| 173 | primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) |
| 174 | | EXCEPTIONAL_ENTRY |
| 175 | | SECONDARY_TABLE_ENTRY |
| 176 | | (subtable_size as u32 - 1); |
| 177 | } |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | true |
| 182 | } |
| 183 | |