1 | use crate::decompress::{EXCEPTIONAL_ENTRY, LITERAL_ENTRY, SECONDARY_TABLE_ENTRY}; |
2 | |
3 | /// Return the next code, or if the codeword is already all ones (which is the final code), return |
4 | /// the same code again. |
5 | fn next_codeword(mut codeword: u16, table_size: u16) -> u16 { |
6 | if codeword == table_size - 1 { |
7 | return codeword; |
8 | } |
9 | |
10 | let adv: u32 = (u16::BITS - 1) - (codeword ^ (table_size - 1)).leading_zeros(); |
11 | let bit: u16 = 1 << adv; |
12 | codeword &= bit - 1; |
13 | codeword |= bit; |
14 | codeword |
15 | } |
16 | |
17 | #[allow (clippy::needless_range_loop)] |
18 | pub fn build_table( |
19 | lengths: &[u8], |
20 | entries: &[u32], |
21 | codes: &mut [u16], |
22 | primary_table: &mut [u32], |
23 | secondary_table: &mut Vec<u16>, |
24 | is_distance_table: bool, |
25 | double_literal: bool, |
26 | ) -> bool { |
27 | // Count the number of symbols with each code length. |
28 | let mut histogram = [0; 16]; |
29 | for &length in lengths { |
30 | histogram[length as usize] += 1; |
31 | } |
32 | |
33 | // Determine the maximum code length. |
34 | let mut max_length = 15; |
35 | while max_length > 1 && histogram[max_length] == 0 { |
36 | max_length -= 1; |
37 | } |
38 | |
39 | // Handle zero and one symbol huffman codes (which are only allowed for distance codes). |
40 | if is_distance_table { |
41 | if max_length == 0 { |
42 | primary_table.fill(0); |
43 | secondary_table.clear(); |
44 | return true; |
45 | } else if max_length == 1 && histogram[1] == 1 { |
46 | let symbol = lengths.iter().position(|&l| l == 1).unwrap(); |
47 | codes[symbol] = 0; |
48 | let entry = entries |
49 | .get(symbol) |
50 | .cloned() |
51 | .unwrap_or((symbol as u32) << 16) |
52 | | 1; |
53 | for chunk in primary_table.chunks_mut(2) { |
54 | chunk[0] = entry; |
55 | chunk[1] = 0; |
56 | } |
57 | return true; |
58 | } |
59 | } |
60 | |
61 | // Sort symbols by code length. Given the histogram, we can determine the starting offset |
62 | // for each code length. |
63 | let mut offsets = [0; 16]; |
64 | let mut codespace_used = 0; |
65 | offsets[1] = histogram[0]; |
66 | for i in 1..max_length { |
67 | offsets[i + 1] = offsets[i] + histogram[i]; |
68 | codespace_used = (codespace_used << 1) + histogram[i]; |
69 | } |
70 | codespace_used = (codespace_used << 1) + histogram[max_length]; |
71 | |
72 | // Check that the provided lengths form a valid Huffman tree. |
73 | if codespace_used != (1 << max_length) { |
74 | return false; |
75 | } |
76 | |
77 | // Sort the symbols by code length. |
78 | let mut next_index = offsets; |
79 | let mut sorted_symbols = [0; 288]; |
80 | for symbol in 0..lengths.len() { |
81 | let length = lengths[symbol]; |
82 | sorted_symbols[next_index[length as usize]] = symbol; |
83 | next_index[length as usize] += 1; |
84 | } |
85 | |
86 | let mut codeword = 0u16; |
87 | let mut i = histogram[0]; |
88 | |
89 | // Populate the primary decoding table |
90 | let primary_table_bits = primary_table.len().ilog2() as usize; |
91 | let primary_table_mask = (1 << primary_table_bits) - 1; |
92 | for length in 1..=primary_table_bits { |
93 | let current_table_end = 1 << length; |
94 | |
95 | // Loop over all symbols with the current code length and set their table entries. |
96 | for _ in 0..histogram[length] { |
97 | let symbol = sorted_symbols[i]; |
98 | i += 1; |
99 | |
100 | primary_table[codeword as usize] = entries |
101 | .get(symbol) |
102 | .cloned() |
103 | .unwrap_or((symbol as u32) << 16) |
104 | | length as u32; |
105 | |
106 | codes[symbol] = codeword; |
107 | codeword = next_codeword(codeword, current_table_end as u16); |
108 | } |
109 | |
110 | if double_literal { |
111 | for len1 in 1..(length - 1) { |
112 | let len2 = length - len1; |
113 | for sym1_index in offsets[len1]..next_index[len1] { |
114 | for sym2_index in offsets[len2]..next_index[len2] { |
115 | let sym1 = sorted_symbols[sym1_index]; |
116 | let sym2 = sorted_symbols[sym2_index]; |
117 | if sym1 < 256 && sym2 < 256 { |
118 | let codeword1 = codes[sym1]; |
119 | let codeword2 = codes[sym2]; |
120 | let codeword = codeword1 | (codeword2 << len1); |
121 | let entry = (sym1 as u32) << 16 |
122 | | (sym2 as u32) << 24 |
123 | | LITERAL_ENTRY |
124 | | (2 << 8); |
125 | primary_table[codeword as usize] = entry | (length as u32); |
126 | } |
127 | } |
128 | } |
129 | } |
130 | } |
131 | |
132 | // If we aren't at the maximum table size, double the size of the table. |
133 | if length < primary_table_bits { |
134 | primary_table.copy_within(0..current_table_end, current_table_end); |
135 | } |
136 | } |
137 | |
138 | // Populate the secondary decoding table. |
139 | secondary_table.clear(); |
140 | if max_length > primary_table_bits { |
141 | let mut subtable_start = 0; |
142 | let mut subtable_prefix = !0; |
143 | for length in (primary_table_bits + 1)..=max_length { |
144 | let subtable_size = 1 << (length - primary_table_bits); |
145 | for _ in 0..histogram[length] { |
146 | // If the codeword's prefix doesn't match the current subtable, create a new |
147 | // subtable. |
148 | if codeword & primary_table_mask != subtable_prefix { |
149 | subtable_prefix = codeword & primary_table_mask; |
150 | subtable_start = secondary_table.len(); |
151 | primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) |
152 | | EXCEPTIONAL_ENTRY |
153 | | SECONDARY_TABLE_ENTRY |
154 | | (subtable_size as u32 - 1); |
155 | secondary_table.resize(subtable_start + subtable_size, 0); |
156 | } |
157 | |
158 | // Lookup the symbol. |
159 | let symbol = sorted_symbols[i]; |
160 | i += 1; |
161 | |
162 | // Insert the symbol into the secondary table and advance to the next codeword. |
163 | codes[symbol] = codeword; |
164 | secondary_table[subtable_start + (codeword >> primary_table_bits) as usize] = |
165 | ((symbol as u16) << 4) | (length as u16); |
166 | codeword = next_codeword(codeword, 1 << length); |
167 | } |
168 | |
169 | // If there are more codes with the same subtable prefix, extend the subtable. |
170 | if length < max_length && codeword & primary_table_mask == subtable_prefix { |
171 | secondary_table.extend_from_within(subtable_start..); |
172 | let subtable_size = secondary_table.len() - subtable_start; |
173 | primary_table[subtable_prefix as usize] = ((subtable_start as u32) << 16) |
174 | | EXCEPTIONAL_ENTRY |
175 | | SECONDARY_TABLE_ENTRY |
176 | | (subtable_size as u32 - 1); |
177 | } |
178 | } |
179 | } |
180 | |
181 | true |
182 | } |
183 | |