1// Copyright 2017 Brian Langenberger
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Traits and implementations for reading or writing Huffman codes
10//! from or to a stream.
11
12#![warn(missing_docs)]
13
14use super::BitQueue;
15use super::Endianness;
16#[cfg(feature = "alloc")]
17use alloc::boxed::Box;
18#[cfg(feature = "alloc")]
19use alloc::collections::BTreeMap;
20#[cfg(feature = "alloc")]
21use alloc::vec::Vec;
22#[cfg(feature = "alloc")]
23use core::fmt;
24#[cfg(feature = "alloc")]
25use core::marker::PhantomData;
26#[cfg(feature = "alloc")]
27use core2::error::Error;
28
29#[cfg(not(feature = "alloc"))]
30use std::collections::BTreeMap;
31#[cfg(not(feature = "alloc"))]
32use std::error::Error;
33#[cfg(not(feature = "alloc"))]
34use std::fmt;
35#[cfg(not(feature = "alloc"))]
36use std::marker::PhantomData;
37
38/// A compiled Huffman tree element for use with the `read_huffman` method.
39/// Returned by `compile_read_tree`.
40///
41/// Compiled read trees are optimized for faster lookup
42/// and are therefore endian-specific.
43///
44/// In addition, each symbol in the source tree may occur many times
45/// in the compiled tree. If symbols require a nontrivial amount of space,
46/// consider using reference counting so that they may be cloned
47/// more efficiently.
48pub enum ReadHuffmanTree<E: Endianness, T: Clone> {
49 /// The final value and new reader state
50 Done(T, u8, u32, PhantomData<E>),
51 /// Another byte is necessary to determine final value
52 Continue(Box<[ReadHuffmanTree<E, T>]>),
53 /// An invalid reader state has been used
54 InvalidState,
55}
56
57/// Given a vector of symbol/code pairs, compiles a Huffman tree
58/// for reading.
59///
60/// Code must be 0 or 1 bits and are always read from the stream
61/// from least-significant in the list to most signficant
62/// (which makes them easier to read for humans).
63///
64/// All possible codes must be assigned some symbol,
65/// and it is acceptable for the same symbol to occur multiple times.
66///
67/// ## Examples
68/// ```
69/// use bitstream_io::huffman::compile_read_tree;
70/// use bitstream_io::BigEndian;
71/// assert!(compile_read_tree::<BigEndian,i32>(
72/// vec![(1, vec![0]),
73/// (2, vec![1, 0]),
74/// (3, vec![1, 1])]).is_ok());
75/// ```
76///
77/// ```
78/// use std::io::{Read, Cursor};
79/// use bitstream_io::{BigEndian, BitReader, HuffmanRead};
80/// use bitstream_io::huffman::compile_read_tree;
81/// let tree = compile_read_tree(
82/// vec![('a', vec![0]),
83/// ('b', vec![1, 0]),
84/// ('c', vec![1, 1, 0]),
85/// ('d', vec![1, 1, 1])]).unwrap();
86/// let data = [0b10110111];
87/// let mut cursor = Cursor::new(&data);
88/// let mut reader = BitReader::endian(&mut cursor, BigEndian);
89/// assert_eq!(reader.read_huffman(&tree).unwrap(), 'b');
90/// assert_eq!(reader.read_huffman(&tree).unwrap(), 'c');
91/// assert_eq!(reader.read_huffman(&tree).unwrap(), 'd');
92/// ```
93pub fn compile_read_tree<E, T>(
94 values: Vec<(T, Vec<u8>)>,
95) -> Result<Box<[ReadHuffmanTree<E, T>]>, HuffmanTreeError>
96where
97 E: Endianness,
98 T: Clone,
99{
100 let tree: FinalHuffmanTree = FinalHuffmanTree::new(values)?;
101
102 let mut result: Vec> = Vec::with_capacity(256);
103 result.extend((0..256).map(|_| ReadHuffmanTree::InvalidState));
104 let queue: BitQueue = BitQueue::from_value(value:0, bits:0);
105 let i: usize = queue.to_state();
106 result[i] = compile_queue(queue, &tree);
107 for bits: u32 in 1..8 {
108 for value: u8 in 0..(1 << bits) {
109 let queue: BitQueue = BitQueue::from_value(value, bits);
110 let i: usize = queue.to_state();
111 result[i] = compile_queue(queue, &tree);
112 }
113 }
114 assert_eq!(result.len(), 256);
115 Ok(result.into_boxed_slice())
116}
117
118fn compile_queue<E, T>(
119 mut queue: BitQueue<E, u8>,
120 tree: &FinalHuffmanTree<T>,
121) -> ReadHuffmanTree<E, T>
122where
123 E: Endianness,
124 T: Clone,
125{
126 match tree {
127 FinalHuffmanTree::Leaf(ref value: &T) => {
128 let len: u32 = queue.len();
129 ReadHuffmanTree::Done(value.clone(), queue.value(), len, PhantomData)
130 }
131 FinalHuffmanTree::Tree(ref bit0: &Box>, ref bit1: &Box>) => {
132 if queue.is_empty() {
133 ReadHuffmanTree::Continue(
134 (0..256)
135 .map(|byte: i32| compile_queue(queue:BitQueue::from_value(value:byte as u8, bits:8), tree))
136 .collect::<Vec<ReadHuffmanTree<E, T>>>()
137 .into_boxed_slice(),
138 )
139 } else if queue.pop(bits:1) == 0 {
140 compile_queue(queue, tree:bit0)
141 } else {
142 compile_queue(queue, tree:bit1)
143 }
144 }
145 }
146}
147
148// A complete Huffman tree with no empty nodes
149enum FinalHuffmanTree<T: Clone> {
150 Leaf(T),
151 Tree(Box<FinalHuffmanTree<T>>, Box<FinalHuffmanTree<T>>),
152}
153
154impl<T: Clone> FinalHuffmanTree<T> {
155 fn new(values: Vec<(T, Vec<u8>)>) -> Result<FinalHuffmanTree<T>, HuffmanTreeError> {
156 let mut tree: WipHuffmanTree = WipHuffmanTree::new_empty();
157
158 for (symbol: T, code: Vec) in values {
159 tree.add(code.as_slice(), symbol)?;
160 }
161
162 tree.into_read_tree()
163 }
164}
165
166// Work-in-progress trees may have empty nodes during construction
167// but those are not allowed in a finalized tree.
168// If the user wants some codes to be None or an error symbol of some sort,
169// those will need to be specified explicitly.
170enum WipHuffmanTree<T: Clone> {
171 Empty,
172 Leaf(T),
173 Tree(Box<WipHuffmanTree<T>>, Box<WipHuffmanTree<T>>),
174}
175
176impl<T: Clone> WipHuffmanTree<T> {
177 fn new_empty() -> WipHuffmanTree<T> {
178 WipHuffmanTree::Empty
179 }
180
181 fn new_leaf(value: T) -> WipHuffmanTree<T> {
182 WipHuffmanTree::Leaf(value)
183 }
184
185 fn new_tree() -> WipHuffmanTree<T> {
186 WipHuffmanTree::Tree(Box::new(Self::new_empty()), Box::new(Self::new_empty()))
187 }
188
189 fn into_read_tree(self) -> Result<FinalHuffmanTree<T>, HuffmanTreeError> {
190 match self {
191 WipHuffmanTree::Empty => Err(HuffmanTreeError::MissingLeaf),
192 WipHuffmanTree::Leaf(v) => Ok(FinalHuffmanTree::Leaf(v)),
193 WipHuffmanTree::Tree(zero, one) => {
194 let zero = zero.into_read_tree()?;
195 let one = one.into_read_tree()?;
196 Ok(FinalHuffmanTree::Tree(Box::new(zero), Box::new(one)))
197 }
198 }
199 }
200
201 fn add(&mut self, code: &[u8], symbol: T) -> Result<(), HuffmanTreeError> {
202 match self {
203 WipHuffmanTree::Empty => {
204 if code.is_empty() {
205 *self = WipHuffmanTree::new_leaf(symbol);
206 Ok(())
207 } else {
208 *self = WipHuffmanTree::new_tree();
209 self.add(code, symbol)
210 }
211 }
212 WipHuffmanTree::Leaf(_) => Err(if code.is_empty() {
213 HuffmanTreeError::DuplicateLeaf
214 } else {
215 HuffmanTreeError::OrphanedLeaf
216 }),
217 WipHuffmanTree::Tree(ref mut zero, ref mut one) => {
218 if code.is_empty() {
219 Err(HuffmanTreeError::DuplicateLeaf)
220 } else {
221 match code[0] {
222 0 => zero.add(&code[1..], symbol),
223 1 => one.add(&code[1..], symbol),
224 _ => Err(HuffmanTreeError::InvalidBit),
225 }
226 }
227 }
228 }
229 }
230}
231
232/// An error type during Huffman tree compilation.
233#[derive(PartialEq, Eq, Copy, Clone, Debug)]
234pub enum HuffmanTreeError {
235 /// One of the bits in a Huffman code is not 0 or 1
236 InvalidBit,
237 /// A Huffman code in the specification has no defined symbol
238 MissingLeaf,
239 /// The same Huffman code specifies multiple symbols
240 DuplicateLeaf,
241 /// A Huffman code is the prefix of some longer code
242 OrphanedLeaf,
243}
244
245impl fmt::Display for HuffmanTreeError {
246 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
247 match *self {
248 HuffmanTreeError::InvalidBit => write!(f, "invalid bit in code"),
249 HuffmanTreeError::MissingLeaf => write!(f, "missing leaf node in specification"),
250 HuffmanTreeError::DuplicateLeaf => write!(f, "duplicate leaf node in specification"),
251 HuffmanTreeError::OrphanedLeaf => write!(f, "orphaned leaf node in specification"),
252 }
253 }
254}
255
256impl Error for HuffmanTreeError {}
257
258/// Given a vector of symbol/code pairs, compiles a Huffman tree
259/// for writing.
260///
261/// Code must be 0 or 1 bits and are always written to the stream
262/// from least-significant in the list to most signficant
263/// (which makes them easier to read for humans).
264///
265/// If the same symbol occurs multiple times, the first code is used.
266/// Unlike in read trees, not all possible codes need to be
267/// assigned a symbol.
268///
269/// ## Examples
270/// ```
271/// use bitstream_io::huffman::compile_write_tree;
272/// use bitstream_io::BigEndian;
273/// assert!(compile_write_tree::<BigEndian,i32>(
274/// vec![(1, vec![0]),
275/// (2, vec![1, 0]),
276/// (3, vec![1, 1])]).is_ok());
277/// ```
278///
279/// ```
280/// use std::io::Write;
281/// use bitstream_io::{BigEndian, BitWriter, HuffmanWrite};
282/// use bitstream_io::huffman::compile_write_tree;
283/// let tree = compile_write_tree(
284/// vec![('a', vec![0]),
285/// ('b', vec![1, 0]),
286/// ('c', vec![1, 1, 0]),
287/// ('d', vec![1, 1, 1])]).unwrap();
288/// let mut data = Vec::new();
289/// {
290/// let mut writer = BitWriter::endian(&mut data, BigEndian);
291/// writer.write_huffman(&tree, 'b').unwrap();
292/// writer.write_huffman(&tree, 'c').unwrap();
293/// writer.write_huffman(&tree, 'd').unwrap();
294/// }
295/// assert_eq!(data, [0b10110111]);
296/// ```
297pub fn compile_write_tree<E, T>(
298 values: Vec<(T, Vec<u8>)>,
299) -> Result<WriteHuffmanTree<E, T>, HuffmanTreeError>
300where
301 E: Endianness,
302 T: Ord + Clone,
303{
304 let mut map = BTreeMap::new();
305
306 for (symbol, code) in values {
307 let mut encoded = Vec::new();
308 for bits in code.chunks(32) {
309 let mut acc = BitQueue::<E, u32>::new();
310 for bit in bits {
311 match *bit {
312 0 => acc.push(1, 0),
313 1 => acc.push(1, 1),
314 _ => return Err(HuffmanTreeError::InvalidBit),
315 }
316 }
317 let len = acc.len();
318 encoded.push((len, acc.value()))
319 }
320 map.entry(symbol)
321 .or_insert_with(|| encoded.into_boxed_slice());
322 }
323
324 Ok(WriteHuffmanTree {
325 map,
326 phantom: PhantomData,
327 })
328}
329
330/// A compiled Huffman tree for use with the `write_huffman` method.
331/// Returned by `compiled_write_tree`.
332pub struct WriteHuffmanTree<E: Endianness, T: Ord> {
333 map: BTreeMap<T, Box<[(u32, u32)]>>,
334 phantom: PhantomData<E>,
335}
336
337impl<E: Endianness, T: Ord + Clone> WriteHuffmanTree<E, T> {
338 /// Returns true if symbol is in tree.
339 #[inline]
340 pub fn has_symbol(&self, symbol: &T) -> bool {
341 self.map.contains_key(symbol)
342 }
343
344 /// Given symbol, returns iterator of
345 /// (bits, value) pairs for writing code.
346 /// Panics if symbol is not found.
347 #[inline]
348 pub fn get(&self, symbol: &T) -> impl Iterator<Item = &(u32, u32)> {
349 self.map[symbol].iter()
350 }
351}
352