| 1 | //! Rudimentary utility for reading Canonical Huffman Codes. |
| 2 | //! Based off <https://github.com/webmproject/libwebp/blob/7f8472a610b61ec780ef0a8873cd954ac512a505/src/utils/huffman.c> |
| 3 | |
| 4 | use std::io::BufRead; |
| 5 | |
| 6 | use crate::decoder::DecodingError; |
| 7 | |
| 8 | use super::lossless::BitReader; |
| 9 | |
| 10 | const MAX_ALLOWED_CODE_LENGTH: usize = 15; |
| 11 | const MAX_TABLE_BITS: u8 = 10; |
| 12 | |
| 13 | #[derive (Clone, Copy, Debug, PartialEq, Eq)] |
| 14 | enum HuffmanTreeNode { |
| 15 | Branch(usize), //offset in vector to children |
| 16 | Leaf(u16), //symbol stored in leaf |
| 17 | Empty, |
| 18 | } |
| 19 | |
| 20 | #[derive (Clone, Debug)] |
| 21 | enum HuffmanTreeInner { |
| 22 | Single(u16), |
| 23 | Tree { |
| 24 | tree: Vec<HuffmanTreeNode>, |
| 25 | table: Vec<u32>, |
| 26 | table_mask: u16, |
| 27 | }, |
| 28 | } |
| 29 | |
| 30 | /// Huffman tree |
| 31 | #[derive (Clone, Debug)] |
| 32 | pub(crate) struct HuffmanTree(HuffmanTreeInner); |
| 33 | |
| 34 | impl Default for HuffmanTree { |
| 35 | fn default() -> Self { |
| 36 | Self(HuffmanTreeInner::Single(0)) |
| 37 | } |
| 38 | } |
| 39 | |
| 40 | impl HuffmanTree { |
| 41 | /// Builds a tree implicitly, just from code lengths |
| 42 | pub(crate) fn build_implicit(code_lengths: Vec<u16>) -> Result<Self, DecodingError> { |
| 43 | // Count symbols and build histogram |
| 44 | let mut num_symbols = 0; |
| 45 | let mut code_length_hist = [0; MAX_ALLOWED_CODE_LENGTH + 1]; |
| 46 | for &length in code_lengths.iter().filter(|&&x| x != 0) { |
| 47 | code_length_hist[usize::from(length)] += 1; |
| 48 | num_symbols += 1; |
| 49 | } |
| 50 | |
| 51 | // Handle special cases |
| 52 | if num_symbols == 0 { |
| 53 | return Err(DecodingError::HuffmanError); |
| 54 | } else if num_symbols == 1 { |
| 55 | let root_symbol = code_lengths.iter().position(|&x| x != 0).unwrap() as u16; |
| 56 | return Ok(Self::build_single_node(root_symbol)); |
| 57 | }; |
| 58 | |
| 59 | // Assign codes |
| 60 | let mut curr_code = 0; |
| 61 | let mut next_codes = [0; MAX_ALLOWED_CODE_LENGTH + 1]; |
| 62 | let max_code_length = code_length_hist.iter().rposition(|&x| x != 0).unwrap() as u16; |
| 63 | for code_len in 1..usize::from(max_code_length) + 1 { |
| 64 | next_codes[code_len] = curr_code; |
| 65 | curr_code = (curr_code + code_length_hist[code_len]) << 1; |
| 66 | } |
| 67 | |
| 68 | // Confirm that the huffman tree is valid |
| 69 | if curr_code != 2 << max_code_length { |
| 70 | return Err(DecodingError::HuffmanError); |
| 71 | } |
| 72 | |
| 73 | // Calculate table/tree parameters |
| 74 | let table_bits = max_code_length.min(u16::from(MAX_TABLE_BITS)); |
| 75 | let table_size = (1 << table_bits) as usize; |
| 76 | let table_mask = table_size as u16 - 1; |
| 77 | let tree_size = code_length_hist[table_bits as usize + 1..=max_code_length as usize] |
| 78 | .iter() |
| 79 | .sum::<u16>() as usize; |
| 80 | |
| 81 | // Populate decoding table |
| 82 | let mut tree = Vec::with_capacity(2 * tree_size); |
| 83 | let mut table = vec![0; table_size]; |
| 84 | for (symbol, &length) in code_lengths.iter().enumerate() { |
| 85 | if length == 0 { |
| 86 | continue; |
| 87 | } |
| 88 | |
| 89 | let code = next_codes[length as usize]; |
| 90 | next_codes[length as usize] += 1; |
| 91 | |
| 92 | if length <= table_bits { |
| 93 | let mut j = (u16::reverse_bits(code) >> (16 - length)) as usize; |
| 94 | let entry = (u32::from(length) << 16) | symbol as u32; |
| 95 | while j < table_size { |
| 96 | table[j] = entry; |
| 97 | j += 1 << length as usize; |
| 98 | } |
| 99 | } else { |
| 100 | let table_index = |
| 101 | ((u16::reverse_bits(code) >> (16 - length)) & table_mask) as usize; |
| 102 | let table_value = table[table_index]; |
| 103 | |
| 104 | debug_assert_eq!(table_value >> 16, 0); |
| 105 | |
| 106 | let mut node_index = if table_value == 0 { |
| 107 | let node_index = tree.len(); |
| 108 | table[table_index] = (node_index + 1) as u32; |
| 109 | tree.push(HuffmanTreeNode::Empty); |
| 110 | node_index |
| 111 | } else { |
| 112 | (table_value - 1) as usize |
| 113 | }; |
| 114 | |
| 115 | let code = usize::from(code); |
| 116 | for depth in (0..length - table_bits).rev() { |
| 117 | let node = tree[node_index]; |
| 118 | |
| 119 | let offset = match node { |
| 120 | HuffmanTreeNode::Empty => { |
| 121 | // Turns a node from empty into a branch and assigns its children |
| 122 | let offset = tree.len() - node_index; |
| 123 | tree[node_index] = HuffmanTreeNode::Branch(offset); |
| 124 | tree.push(HuffmanTreeNode::Empty); |
| 125 | tree.push(HuffmanTreeNode::Empty); |
| 126 | offset |
| 127 | } |
| 128 | HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), |
| 129 | HuffmanTreeNode::Branch(offset) => offset, |
| 130 | }; |
| 131 | |
| 132 | node_index += offset + ((code >> depth) & 1); |
| 133 | } |
| 134 | |
| 135 | match tree[node_index] { |
| 136 | HuffmanTreeNode::Empty => { |
| 137 | tree[node_index] = HuffmanTreeNode::Leaf(symbol as u16); |
| 138 | } |
| 139 | HuffmanTreeNode::Leaf(_) => return Err(DecodingError::HuffmanError), |
| 140 | HuffmanTreeNode::Branch(_offset) => return Err(DecodingError::HuffmanError), |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | Ok(Self(HuffmanTreeInner::Tree { |
| 146 | tree, |
| 147 | table, |
| 148 | table_mask, |
| 149 | })) |
| 150 | } |
| 151 | |
| 152 | pub(crate) const fn build_single_node(symbol: u16) -> Self { |
| 153 | Self(HuffmanTreeInner::Single(symbol)) |
| 154 | } |
| 155 | |
| 156 | pub(crate) fn build_two_node(zero: u16, one: u16) -> Self { |
| 157 | Self(HuffmanTreeInner::Tree { |
| 158 | tree: vec![ |
| 159 | HuffmanTreeNode::Leaf(zero), |
| 160 | HuffmanTreeNode::Leaf(one), |
| 161 | HuffmanTreeNode::Empty, |
| 162 | ], |
| 163 | table: vec![(1 << 16) | u32::from(zero), (1 << 16) | u32::from(one)], |
| 164 | table_mask: 0x1, |
| 165 | }) |
| 166 | } |
| 167 | |
| 168 | pub(crate) const fn is_single_node(&self) -> bool { |
| 169 | matches!(self.0, HuffmanTreeInner::Single(_)) |
| 170 | } |
| 171 | |
| 172 | #[inline (never)] |
| 173 | fn read_symbol_slowpath<R: BufRead>( |
| 174 | tree: &[HuffmanTreeNode], |
| 175 | mut v: usize, |
| 176 | start_index: usize, |
| 177 | bit_reader: &mut BitReader<R>, |
| 178 | ) -> Result<u16, DecodingError> { |
| 179 | let mut depth = MAX_TABLE_BITS; |
| 180 | let mut index = start_index; |
| 181 | loop { |
| 182 | match &tree[index] { |
| 183 | HuffmanTreeNode::Branch(children_offset) => { |
| 184 | index += children_offset + (v & 1); |
| 185 | depth += 1; |
| 186 | v >>= 1; |
| 187 | } |
| 188 | HuffmanTreeNode::Leaf(symbol) => { |
| 189 | bit_reader.consume(depth)?; |
| 190 | return Ok(*symbol); |
| 191 | } |
| 192 | HuffmanTreeNode::Empty => return Err(DecodingError::HuffmanError), |
| 193 | } |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | /// Reads a symbol using the bit reader. |
| 198 | /// |
| 199 | /// You must call call `bit_reader.fill()` before calling this function or it may erroroneosly |
| 200 | /// detect the end of the stream and return a bitstream error. |
| 201 | pub(crate) fn read_symbol<R: BufRead>( |
| 202 | &self, |
| 203 | bit_reader: &mut BitReader<R>, |
| 204 | ) -> Result<u16, DecodingError> { |
| 205 | match &self.0 { |
| 206 | HuffmanTreeInner::Tree { |
| 207 | tree, |
| 208 | table, |
| 209 | table_mask, |
| 210 | } => { |
| 211 | let v = bit_reader.peek_full() as u16; |
| 212 | let entry = table[(v & table_mask) as usize]; |
| 213 | if entry >> 16 != 0 { |
| 214 | bit_reader.consume((entry >> 16) as u8)?; |
| 215 | return Ok(entry as u16); |
| 216 | } |
| 217 | |
| 218 | Self::read_symbol_slowpath( |
| 219 | tree, |
| 220 | (v >> MAX_TABLE_BITS) as usize, |
| 221 | ((entry & 0xffff) - 1) as usize, |
| 222 | bit_reader, |
| 223 | ) |
| 224 | } |
| 225 | HuffmanTreeInner::Single(symbol) => Ok(*symbol), |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | /// Peek at the next symbol in the bitstream if it can be read with only a primary table lookup. |
| 230 | /// |
| 231 | /// Returns a tuple of the codelength and symbol value. This function may return wrong |
| 232 | /// information if there aren't enough bits in the bit reader to read the next symbol. |
| 233 | pub(crate) fn peek_symbol<R: BufRead>(&self, bit_reader: &BitReader<R>) -> Option<(u8, u16)> { |
| 234 | match &self.0 { |
| 235 | HuffmanTreeInner::Tree { |
| 236 | table, table_mask, .. |
| 237 | } => { |
| 238 | let v = bit_reader.peek_full() as u16; |
| 239 | let entry = table[(v & table_mask) as usize]; |
| 240 | if entry >> 16 != 0 { |
| 241 | return Some(((entry >> 16) as u8, entry as u16)); |
| 242 | } |
| 243 | None |
| 244 | } |
| 245 | HuffmanTreeInner::Single(symbol) => Some((0, *symbol)), |
| 246 | } |
| 247 | } |
| 248 | } |
| 249 | |