1//! 16-bit Huffman compression and decompression.
2//! Huffman compression and decompression routines written
3//! by Christian Rouet for his PIZ image file format.
4// see https://github.com/AcademySoftwareFoundation/openexr/blob/88246d991e0318c043e6f584f7493da08a31f9f8/OpenEXR/IlmImf/ImfHuf.cpp
5
6use crate::math::RoundingMode;
7use crate::error::{Error, Result, UnitResult, u64_to_usize, u32_to_usize};
8use crate::io::Data;
9use std::{
10 cmp::Ordering,
11 collections::BinaryHeap,
12 io::{Cursor, Read, Write},
13};
14use std::convert::TryFrom;
15use smallvec::SmallVec;
16
17
18pub fn decompress(compressed: &[u8], expected_size: usize) -> Result<Vec<u16>> {
19 let mut remaining_compressed = compressed;
20
21 let min_code_index = usize::try_from(u32::read(&mut remaining_compressed)?)?;
22 let max_code_index_32 = u32::read(&mut remaining_compressed)?;
23 let _table_size = usize::try_from(u32::read(&mut remaining_compressed)?)?; // TODO check this and return Err?
24 let bit_count = usize::try_from(u32::read(&mut remaining_compressed)?)?;
25 let _skipped = u32::read(&mut remaining_compressed)?; // what is this
26
27 let max_code_index = usize::try_from(max_code_index_32).unwrap();
28 if min_code_index >= ENCODING_TABLE_SIZE || max_code_index >= ENCODING_TABLE_SIZE {
29 return Err(Error::invalid(INVALID_TABLE_SIZE));
30 }
31
32 if RoundingMode::Up.divide(bit_count, 8) > remaining_compressed.len() {
33 return Err(Error::invalid(NOT_ENOUGH_DATA));
34 }
35
36 let encoding_table = read_encoding_table(&mut remaining_compressed, min_code_index, max_code_index)?;
37 if bit_count > 8 * remaining_compressed.len() { return Err(Error::invalid(INVALID_BIT_COUNT)); }
38
39 let decoding_table = build_decoding_table(&encoding_table, min_code_index, max_code_index)?;
40
41 let result = decode_with_tables(
42 &encoding_table,
43 &decoding_table,
44 &remaining_compressed,
45 i32::try_from(bit_count)?,
46 max_code_index_32,
47 expected_size,
48 )?;
49
50 Ok(result)
51}
52
53pub fn compress(uncompressed: &[u16]) -> Result<Vec<u8>> {
54 if uncompressed.is_empty() { return Ok(vec![]); }
55
56 let mut frequencies = count_frequencies(uncompressed);
57 let (min_code_index, max_code_index) = build_encoding_table(&mut frequencies);
58
59 let mut result = Cursor::new(Vec::with_capacity(uncompressed.len()));
60 u32::write_slice(&mut result, &[0; 5])?; // we come back to these later after we know more about the compressed data
61
62 let table_start = result.position();
63 pack_encoding_table(
64 &frequencies,
65 min_code_index,
66 max_code_index,
67 &mut result,
68 )?;
69
70 let data_start = result.position();
71 let bit_count = encode_with_frequencies(
72 &frequencies,
73 uncompressed,
74 max_code_index,
75 &mut result
76 )?;
77
78 // write meta data after this
79 result.set_position(0);
80 let table_length = data_start - table_start;
81
82 u32::try_from(min_code_index)?.write(&mut result)?;
83 u32::try_from(max_code_index)?.write(&mut result)?;
84 u32::try_from(table_length)?.write(&mut result)?;
85 u32::try_from(bit_count)?.write(&mut result)?;
86 0_u32.write(&mut result)?;
87
88 Ok(result.into_inner())
89}
90
91
92const ENCODE_BITS: u64 = 16; // literal (value) bit length
93const DECODE_BITS: u64 = 14; // decoding bit size (>= 8)
94
95const ENCODING_TABLE_SIZE: usize = ((1 << ENCODE_BITS) + 1) as usize;
96const DECODING_TABLE_SIZE: usize = (1 << DECODE_BITS) as usize;
97const DECODE_MASK: u64 = DECODING_TABLE_SIZE as u64 - 1;
98
99const SHORT_ZEROCODE_RUN: u64 = 59;
100const LONG_ZEROCODE_RUN: u64 = 63;
101const SHORTEST_LONG_RUN: u64 = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
102const LONGEST_LONG_RUN: u64 = 255 + SHORTEST_LONG_RUN;
103
104
105#[derive(Clone, Debug, Eq, PartialEq)]
106enum Code {
107 Empty,
108 Short(ShortCode),
109 Long(SmallVec<[u32; 2]>), // often 2, sometimes 4, rarely 8
110}
111
112#[derive(Clone, Debug, Eq, PartialEq)]
113struct ShortCode {
114 value: u32,
115 len: u8,
116}
117
118impl ShortCode {
119 #[inline] fn len(&self) -> u64 { u64::from(self.len) }
120}
121
122/// Decode (uncompress) n bits based on encoding & decoding tables:
123fn decode_with_tables(
124 encoding_table: &[u64],
125 decoding_table: &[Code],
126 mut input: &[u8],
127 input_bit_count: i32,
128 run_length_code: u32,
129 expected_output_size: usize,
130) -> Result<Vec<u16>>
131{
132 let mut output = Vec::with_capacity(expected_output_size);
133 let mut code_bits = 0_u64;
134 let mut code_bit_count = 0_u64;
135
136 while input.len() > 0 {
137 read_byte(&mut code_bits, &mut code_bit_count, &mut input)?;
138
139 // Access decoding table
140 while code_bit_count >= DECODE_BITS {
141 let code_index = (code_bits >> (code_bit_count - DECODE_BITS)) & DECODE_MASK;
142 let code = &decoding_table[u64_to_usize(code_index)];
143
144 // Get short code
145 if let Code::Short(code) = code {
146 code_bit_count -= code.len();
147
148 read_code_into_vec(
149 code.value,
150 run_length_code,
151 &mut code_bits,
152 &mut code_bit_count,
153 &mut input,
154 &mut output,
155 expected_output_size,
156 )?;
157 }
158 else if let Code::Long(ref long_codes) = code {
159 debug_assert_ne!(long_codes.len(), 0);
160
161 let long_code = long_codes.iter()
162 .filter_map(|&long_code|{
163 let encoded_long_code = encoding_table[u32_to_usize(long_code)];
164 let length = length(encoded_long_code);
165
166 while code_bit_count < length && input.len() > 0 {
167 let err = read_byte(&mut code_bits, &mut code_bit_count, &mut input);
168 if let Err(err) = err { return Some(Err(err)); }
169 }
170
171 if code_bit_count >= length {
172 let required_code = (code_bits >> (code_bit_count - length)) & ((1 << length) - 1);
173
174 if self::code(encoded_long_code) == required_code {
175 code_bit_count -= length;
176 return Some(Ok(long_code));
177 }
178 }
179
180 None
181
182 })
183 .next()
184 .ok_or(Error::invalid(INVALID_CODE))?;
185
186 read_code_into_vec(
187 long_code?,
188 run_length_code,
189 &mut code_bits,
190 &mut code_bit_count,
191 &mut input,
192 &mut output,
193 expected_output_size,
194 )?;
195 }
196 else {
197 return Err(Error::invalid(INVALID_CODE));
198 }
199 }
200 }
201
202 let count = u64::try_from((8 - input_bit_count) & 7)?;
203 code_bits >>= count;
204 code_bit_count -= count;
205
206 while code_bit_count > 0 {
207 let index = (code_bits << (DECODE_BITS - code_bit_count)) & DECODE_MASK;
208 let code = &decoding_table[u64_to_usize(index)];
209
210 if let Code::Short(short_code) = code {
211 if short_code.len() > code_bit_count { return Err(Error::invalid("code")) }; // FIXME why does this happen??
212 code_bit_count -= short_code.len(); // FIXME may throw "attempted to subtract with overflow"
213
214 read_code_into_vec(
215 short_code.value,
216 run_length_code,
217 &mut code_bits,
218 &mut code_bit_count,
219 &mut input,
220 &mut output,
221 expected_output_size,
222 )?;
223 }
224 else {
225 return Err(Error::invalid(INVALID_CODE));
226 }
227 }
228
229 if output.len() != expected_output_size {
230 return Err(Error::invalid(NOT_ENOUGH_DATA));
231 }
232
233 Ok(output)
234}
235
236/// Build a decoding hash table based on the encoding table code:
237/// - short codes (<= HUF_DECBITS) are resolved with a single table access;
238/// - long code entry allocations are not optimized, because long codes are
239/// unfrequent;
240/// - decoding tables are used by hufDecode();
241fn build_decoding_table(
242 encoding_table: &[u64],
243 min_code_index: usize,
244 max_code_index: usize,
245) -> Result<Vec<Code>>
246{
247 let mut decoding_table = vec![Code::Empty; DECODING_TABLE_SIZE]; // not an array because of code not being copy
248
249 for (code_index, &encoded_code) in encoding_table[..= max_code_index].iter().enumerate().skip(min_code_index) {
250 let code_index = u32::try_from(code_index).unwrap();
251
252 let code = code(encoded_code);
253 let length = length(encoded_code);
254
255 if code >> length != 0 {
256 return Err(Error::invalid(INVALID_TABLE_ENTRY));
257 }
258
259 if length > DECODE_BITS {
260 let long_code = &mut decoding_table[u64_to_usize(code >> (length - DECODE_BITS))];
261
262 match long_code {
263 Code::Empty => *long_code = Code::Long(smallvec![code_index]),
264 Code::Long(lits) => lits.push(code_index),
265 _ => { return Err(Error::invalid(INVALID_TABLE_ENTRY)); }
266 }
267 }
268 else if length != 0 {
269 let default_value = Code::Short(ShortCode {
270 value: code_index,
271 len: length as u8,
272 });
273
274 let start_index = u64_to_usize(code << (DECODE_BITS - length));
275 let count = u64_to_usize(1 << (DECODE_BITS - length));
276
277 for value in &mut decoding_table[start_index .. start_index + count] {
278 *value = default_value.clone();
279 }
280 }
281 }
282
283 Ok(decoding_table)
284}
285
286/// Run-length-decompresses all zero runs from the packed table to the encoding table
287fn read_encoding_table(
288 packed: &mut impl Read,
289 min_code_index: usize,
290 max_code_index: usize,
291) -> Result<Vec<u64>>
292{
293 let mut code_bits = 0_u64;
294 let mut code_bit_count = 0_u64;
295
296 // TODO push() into encoding table instead of index stuff?
297 let mut encoding_table = vec![0_u64; ENCODING_TABLE_SIZE];
298 let mut code_index = min_code_index;
299 while code_index <= max_code_index {
300 let code_len = read_bits(6, &mut code_bits, &mut code_bit_count, packed)?;
301 encoding_table[code_index] = code_len;
302
303 if code_len == LONG_ZEROCODE_RUN {
304 let zerun_bits = read_bits(8, &mut code_bits, &mut code_bit_count, packed)?;
305 let zerun = usize::try_from(zerun_bits + SHORTEST_LONG_RUN).unwrap();
306
307 if code_index + zerun > max_code_index + 1 {
308 return Err(Error::invalid(TABLE_TOO_LONG));
309 }
310
311 for value in &mut encoding_table[code_index..code_index + zerun] {
312 *value = 0;
313 }
314
315 code_index += zerun;
316 }
317 else if code_len >= SHORT_ZEROCODE_RUN {
318 let duplication_count = usize::try_from(code_len - SHORT_ZEROCODE_RUN + 2).unwrap();
319 if code_index + duplication_count > max_code_index + 1 {
320 return Err(Error::invalid(TABLE_TOO_LONG));
321 }
322
323 for value in &mut encoding_table[code_index .. code_index + duplication_count] {
324 *value = 0;
325 }
326
327 code_index += duplication_count;
328 }
329 else {
330 code_index += 1;
331 }
332 }
333
334 build_canonical_table(&mut encoding_table);
335 Ok(encoding_table)
336}
337
338// TODO Use BitStreamReader for all the bit reads?!
339#[inline]
340fn read_bits(
341 count: u64,
342 code_bits: &mut u64,
343 code_bit_count: &mut u64,
344 input: &mut impl Read,
345) -> Result<u64>
346{
347 while *code_bit_count < count {
348 read_byte(code_bits, code_bit_count, input)?;
349 }
350
351 *code_bit_count -= count;
352 Ok((*code_bits >> *code_bit_count) & ((1 << count) - 1))
353}
354
355#[inline]
356fn read_byte(code_bits: &mut u64, bit_count: &mut u64, input: &mut impl Read) -> UnitResult {
357 *code_bits = (*code_bits << 8) | u8::read(input)? as u64;
358 *bit_count += 8;
359 Ok(())
360}
361
362#[inline]
363fn read_code_into_vec(
364 code: u32,
365 run_length_code: u32,
366 code_bits: &mut u64,
367 code_bit_count: &mut u64,
368 read: &mut impl Read,
369 out: &mut Vec<u16>,
370 max_len: usize,
371) -> UnitResult
372{
373 if code == run_length_code { // code may be too large for u16
374 if *code_bit_count < 8 {
375 read_byte(code_bits, code_bit_count, read)?;
376 }
377
378 *code_bit_count -= 8;
379
380 let code_repetitions = usize::from((*code_bits >> *code_bit_count) as u8);
381
382 if out.len() + code_repetitions > max_len {
383 return Err(Error::invalid(TOO_MUCH_DATA));
384 }
385 else if out.is_empty() {
386 return Err(Error::invalid(NOT_ENOUGH_DATA));
387 }
388
389 let repeated_code = *out.last().unwrap();
390 out.extend(std::iter::repeat(repeated_code).take(code_repetitions));
391 }
392 else if out.len() < max_len { // implies that code is not larger than u16???
393 out.push(u16::try_from(code)?);
394 }
395 else {
396 return Err(Error::invalid(TOO_MUCH_DATA));
397 }
398
399 Ok(())
400}
401
402fn count_frequencies(data: &[u16]) -> Vec<u64> {
403 let mut frequencies: Vec = vec![0_u64; ENCODING_TABLE_SIZE];
404
405 for value: &u16 in data {
406 frequencies[*value as usize] += 1;
407 }
408
409 frequencies
410}
411
412fn write_bits(
413 count: u64,
414 bits: u64,
415 code_bits: &mut u64,
416 code_bit_count: &mut u64,
417 mut out: impl Write,
418) -> UnitResult
419{
420 *code_bits = (*code_bits << count) | bits;
421 *code_bit_count += count;
422
423 while *code_bit_count >= 8 {
424 *code_bit_count -= 8;
425 out.write(&[
426 (*code_bits >> *code_bit_count) as u8 // TODO make sure never or always wraps?
427 ])?;
428 }
429
430 Ok(())
431}
432
433fn write_code(scode: u64, code_bits: &mut u64, code_bit_count: &mut u64, mut out: impl Write) -> UnitResult {
434 write_bits(count:length(scode), bits:code(scode), code_bits, code_bit_count, &mut out)
435}
436
437#[inline(always)]
438fn send_code(
439 scode: u64,
440 run_count: u64,
441 run_code: u64,
442 code_bits: &mut u64,
443 code_bit_count: &mut u64,
444 mut out: impl Write,
445) -> UnitResult
446{
447 // Output a run of runCount instances of the symbol sCount.
448 // Output the symbols explicitly, or if that is shorter, output
449 // the sCode symbol once followed by a runCode symbol and runCount
450 // expressed as an 8-bit number.
451 if length(code:scode) + length(run_code) + 8 < length(code:scode) * run_count {
452 write_code(scode, code_bits, code_bit_count, &mut out)?;
453 write_code(scode:run_code, code_bits, code_bit_count, &mut out)?;
454 write_bits(count:8, bits:run_count, code_bits, code_bit_count, &mut out)?;
455 }
456 else {
457 for _ in 0 ..= run_count {
458 write_code(scode, code_bits, code_bit_count, &mut out)?;
459 }
460 }
461
462 Ok(())
463}
464
465fn encode_with_frequencies(
466 frequencies: &[u64],
467 uncompressed: &[u16],
468 run_length_code: usize,
469 mut out: &mut Cursor<Vec<u8>>,
470) -> Result<u64>
471{
472 let mut code_bits = 0;
473 let mut code_bit_count = 0;
474
475 let mut run_start_value = uncompressed[0];
476 let mut run_length = 0;
477
478 let start_position = out.position();
479
480 // Loop on input values
481 for &current_value in &uncompressed[1..] {
482 // Count same values or send code
483 if run_start_value == current_value && run_length < 255 {
484 run_length += 1;
485 }
486 else {
487 send_code(
488 frequencies[run_start_value as usize],
489 run_length,
490 frequencies[run_length_code],
491 &mut code_bits,
492 &mut code_bit_count,
493 &mut out,
494 )?;
495
496 run_length = 0;
497 }
498
499 run_start_value = current_value;
500 }
501
502 // Send remaining code
503 send_code(
504 frequencies[run_start_value as usize],
505 run_length,
506 frequencies[run_length_code],
507 &mut code_bits,
508 &mut code_bit_count,
509 &mut out,
510 )?;
511
512 let data_length = out.position() - start_position; // we shouldn't count the last byte write
513
514 if code_bit_count != 0 {
515 out.write(&[
516 (code_bits << (8 - code_bit_count) & 0xff) as u8
517 ])?;
518 }
519
520 Ok(data_length * 8 + code_bit_count)
521}
522
523///
524/// Pack an encoding table:
525/// - only code lengths, not actual codes, are stored
526/// - runs of zeroes are compressed as follows:
527///
528/// unpacked packed
529/// --------------------------------
530/// 1 zero 0 (6 bits)
531/// 2 zeroes 59
532/// 3 zeroes 60
533/// 4 zeroes 61
534/// 5 zeroes 62
535/// n zeroes (6 or more) 63 n-6 (6 + 8 bits)
536///
537fn pack_encoding_table(
538 frequencies: &[u64],
539 min_index: usize,
540 max_index: usize,
541 mut out: &mut Cursor<Vec<u8>>,
542) -> UnitResult
543{
544 let mut code_bits = 0_u64;
545 let mut code_bit_count = 0_u64;
546
547 let mut frequency_index = min_index;
548 while frequency_index <= max_index { // TODO slice iteration?
549 let code_length = length(frequencies[frequency_index]);
550
551 if code_length == 0 {
552 let mut zero_run = 1;
553
554 while frequency_index < max_index && zero_run < LONGEST_LONG_RUN {
555 if length(frequencies[frequency_index + 1]) > 0 {
556 break;
557 }
558
559 frequency_index += 1;
560 zero_run += 1;
561 }
562
563 if zero_run >= 2 {
564 if zero_run >= SHORTEST_LONG_RUN {
565 write_bits(6, LONG_ZEROCODE_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
566 write_bits(8, zero_run - SHORTEST_LONG_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
567 }
568 else {
569 write_bits(6, SHORT_ZEROCODE_RUN + zero_run - 2, &mut code_bits, &mut code_bit_count, &mut out)?;
570 }
571
572 frequency_index += 1; // we must increment or else this may go very wrong
573 continue;
574 }
575 }
576
577 write_bits(6, code_length, &mut code_bits, &mut code_bit_count, &mut out)?;
578 frequency_index += 1;
579 }
580
581 if code_bit_count > 0 {
582 out.write(&[
583 (code_bits << (8 - code_bit_count)) as u8
584 ])?;
585 }
586
587 Ok(())
588}
589
590/// Build a "canonical" Huffman code table:
591/// - for each (uncompressed) symbol, code contains the length
592/// of the corresponding code (in the compressed data)
593/// - canonical codes are computed and stored in code
594/// - the rules for constructing canonical codes are as follows:
595/// * shorter codes (if filled with zeroes to the right)
596/// have a numerically higher value than longer codes
597/// * for codes with the same length, numerical values
598/// increase with numerical symbol values
599/// - because the canonical code table can be constructed from
600/// symbol lengths alone, the code table can be transmitted
601/// without sending the actual code values
602/// - see http://www.compressconsult.com/huffman/
603fn build_canonical_table(code_table: &mut [u64]) {
604 debug_assert_eq!(code_table.len(), ENCODING_TABLE_SIZE);
605
606 let mut count_per_code = [0_u64; 59];
607
608 for &code in code_table.iter() {
609 count_per_code[u64_to_usize(code)] += 1;
610 }
611
612 // For each i from 58 through 1, compute the
613 // numerically lowest code with length i, and
614 // store that code in n[i].
615 {
616 let mut code = 0_u64; // TODO use foldr?
617 for count in &mut count_per_code.iter_mut().rev() {
618 let next_code = (code + *count) >> 1;
619 *count = code;
620 code = next_code;
621 }
622 }
623
624 // code[i] contains the length, l, of the
625 // code for symbol i. Assign the next available
626 // code of length l to the symbol and store both
627 // l and the code in code[i]. // TODO iter + filter ?
628 for symbol_length in code_table.iter_mut() {
629 let current_length = *symbol_length;
630 let code_index = u64_to_usize(current_length);
631 if current_length > 0 {
632 *symbol_length = current_length | (count_per_code[code_index] << 6);
633 count_per_code[code_index] += 1;
634 }
635 }
636}
637
638
639/// Compute Huffman codes (based on frq input) and store them in frq:
640/// - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
641/// - max code length is 58 bits;
642/// - codes outside the range [im-iM] have a null length (unused values);
643/// - original frequencies are destroyed;
644/// - encoding tables are used by hufEncode() and hufBuildDecTable();
645///
646/// NB: The following code "(*a == *b) && (a > b))" was added to ensure
647/// elements in the heap with the same value are sorted by index.
648/// This is to ensure, the STL make_heap()/pop_heap()/push_heap() methods
649/// produced a resultant sorted heap that is identical across OSes.
650fn build_encoding_table(
651 frequencies: &mut [u64], // input frequencies, output encoding table
652) -> (usize, usize) // return frequency max min range
653{
654 debug_assert_eq!(frequencies.len(), ENCODING_TABLE_SIZE);
655
656 /// Frequency with position, used for MinHeap.
657 #[derive(Eq, PartialEq, Copy, Clone)]
658 struct HeapFrequency {
659 position: usize,
660 frequency: u64,
661 }
662
663 impl Ord for HeapFrequency {
664 fn cmp(&self, other: &Self) -> Ordering {
665 other.frequency.cmp(&self.frequency)
666 .then_with(|| other.position.cmp(&self.position))
667 }
668 }
669
670 impl PartialOrd for HeapFrequency {
671 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
672 }
673
674 // This function assumes that when it is called, array frq
675 // indicates the frequency of all possible symbols in the data
676 // that are to be Huffman-encoded. (frq[i] contains the number
677 // of occurrences of symbol i in the data.)
678 //
679 // The loop below does three things:
680 //
681 // 1) Finds the minimum and maximum indices that point
682 // to non-zero entries in frq:
683 //
684 // frq[im] != 0, and frq[i] == 0 for all i < im
685 // frq[iM] != 0, and frq[i] == 0 for all i > iM
686 //
687 // 2) Fills array fHeap with pointers to all non-zero
688 // entries in frq.
689 //
690 // 3) Initializes array hlink such that hlink[i] == i
691 // for all array entries.
692
693 // We need to use vec here or we overflow the stack.
694 let mut links = vec![0_usize; ENCODING_TABLE_SIZE];
695 let mut frequency_heap = vec![0_usize; ENCODING_TABLE_SIZE];
696
697 // This is a good solution since we don't have usize::MAX items (no panics or UB),
698 // and since this is short-circuit, it stops at the first in order non zero element.
699 let min_frequency_index = frequencies.iter().position(|f| *f != 0).unwrap_or(0);
700
701 let mut max_frequency_index = 0;
702 let mut frequency_count = 0;
703
704 // assert bounds check to optimize away bounds check in loops
705 assert!(links.len() >= ENCODING_TABLE_SIZE);
706 assert!(frequencies.len() >= ENCODING_TABLE_SIZE);
707
708 for index in min_frequency_index..ENCODING_TABLE_SIZE {
709 links[index] = index; // TODO for x in links.iter().enumerate()
710
711 if frequencies[index] != 0 {
712 frequency_heap[frequency_count] = index;
713 max_frequency_index = index;
714 frequency_count += 1;
715 }
716 }
717
718
719 // Add a pseudo-symbol, with a frequency count of 1, to frq;
720 // adjust the fHeap and hlink array accordingly. Function
721 // hufEncode() uses the pseudo-symbol for run-length encoding.
722
723 max_frequency_index += 1;
724 frequencies[max_frequency_index] = 1;
725 frequency_heap[frequency_count] = max_frequency_index;
726 frequency_count += 1;
727
728 // Build an array, scode, such that scode[i] contains the number
729 // of bits assigned to symbol i. Conceptually this is done by
730 // constructing a tree whose leaves are the symbols with non-zero
731 // frequency:
732 //
733 // Make a heap that contains all symbols with a non-zero frequency,
734 // with the least frequent symbol on top.
735 //
736 // Repeat until only one symbol is left on the heap:
737 //
738 // Take the two least frequent symbols off the top of the heap.
739 // Create a new node that has first two nodes as children, and
740 // whose frequency is the sum of the frequencies of the first
741 // two nodes. Put the new node back into the heap.
742 //
743 // The last node left on the heap is the root of the tree. For each
744 // leaf node, the distance between the root and the leaf is the length
745 // of the code for the corresponding symbol.
746 //
747 // The loop below doesn't actually build the tree; instead we compute
748 // the distances of the leaves from the root on the fly. When a new
749 // node is added to the heap, then that node's descendants are linked
750 // into a single linear list that starts at the new node, and the code
751 // lengths of the descendants (that is, their distance from the root
752 // of the tree) are incremented by one.
753 let mut heap = BinaryHeap::with_capacity(frequency_count);
754 for index in frequency_heap.drain(..frequency_count) {
755 heap.push(HeapFrequency { position: index, frequency: frequencies[index] });
756 }
757
758 let mut s_code = vec![0_u64; ENCODING_TABLE_SIZE];
759
760 while frequency_count > 1 {
761 // Find the indices, mm and m, of the two smallest non-zero frq
762 // values in fHeap, add the smallest frq to the second-smallest
763 // frq, and remove the smallest frq value from fHeap.
764 let (high_position, low_position) = {
765 let smallest_frequency = heap.pop().expect("heap empty bug");
766 frequency_count -= 1;
767
768 let mut second_smallest_frequency = heap.peek_mut().expect("heap empty bug");
769 second_smallest_frequency.frequency += smallest_frequency.frequency;
770
771 (second_smallest_frequency.position, smallest_frequency.position)
772 };
773
774 // The entries in scode are linked into lists with the
775 // entries in hlink serving as "next" pointers and with
776 // the end of a list marked by hlink[j] == j.
777 //
778 // Traverse the lists that start at scode[m] and scode[mm].
779 // For each element visited, increment the length of the
780 // corresponding code by one bit. (If we visit scode[j]
781 // during the traversal, then the code for symbol j becomes
782 // one bit longer.)
783 //
784 // Merge the lists that start at scode[m] and scode[mm]
785 // into a single list that starts at scode[m].
786
787 // Add a bit to all codes in the first list.
788 let mut index = high_position; // TODO fold()
789 loop {
790 s_code[index] += 1;
791 debug_assert!(s_code[index] <= 58);
792
793 // merge the two lists
794 if links[index] == index {
795 links[index] = low_position;
796 break;
797 }
798
799 index = links[index];
800 }
801
802 // Add a bit to all codes in the second list
803 let mut index = low_position; // TODO fold()
804 loop {
805 s_code[index] += 1;
806 debug_assert!(s_code[index] <= 58);
807
808 if links[index] == index {
809 break;
810 }
811
812 index = links[index];
813 }
814 }
815
816 // Build a canonical Huffman code table, replacing the code
817 // lengths in scode with (code, code length) pairs. Copy the
818 // code table from scode into frq.
819 build_canonical_table(&mut s_code);
820 frequencies.copy_from_slice(&s_code);
821
822 (min_frequency_index, max_frequency_index)
823}
824
825
826#[inline] fn length(code: u64) -> u64 { code & 63 }
827#[inline] fn code(code: u64) -> u64 { code >> 6 }
828
829const INVALID_BIT_COUNT: &'static str = "invalid number of bits";
830const INVALID_TABLE_ENTRY: &'static str = "invalid code table entry";
831const NOT_ENOUGH_DATA: &'static str = "decoded data are shorter than expected";
832const INVALID_TABLE_SIZE: &'static str = "unexpected end of code table data";
833const TABLE_TOO_LONG: &'static str = "code table is longer than expected";
834const INVALID_CODE: &'static str = "invalid code";
835const TOO_MUCH_DATA: &'static str = "decoded data are longer than expected";
836
837
838#[cfg(test)]
839mod test {
840 use super::*;
841 use rand::{Rng, SeedableRng};
842
843 const UNCOMPRESSED_ARRAY: [u16; 100] = [
844 3852, 2432, 33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013, 7715, 59875, 28182,
845 34449, 19983, 20399, 63407, 29486, 4877, 26738, 44815, 14042, 46091, 48228, 25682, 35412,
846 7582, 65069, 6632, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
847 42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
848 56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
849 19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
850 3159, 29002, 14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347, 16991, 21303, 26263,
851 8312, 14017, 41777, 43240, 3500, 60250, 52437, 45715, 61520,
852 ];
853
854 const UNCOMPRESSED_ARRAY_SPECIAL: [u16; 100] = [
855 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28182,
856 0, 65534, 0, 65534, 0, 65534, 0, 65534, 0, 0, 0, 0, 0,
857 0, 0, 0, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
858 42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
859 56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
860 19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
861 3159, 29002, 14535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
862 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
863 ];
864
865 const COMPRESSED_ARRAY: [u8; 703] = [
866 0xc9, 0x0, 0x0, 0x0, 0x2e, 0xfe, 0x0, 0x0, 0x56, 0x2, 0x0, 0x0, 0xa2, 0x2, 0x0, 0x0, 0x0,
867 0x0, 0x0, 0x0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd6, 0x47,
868 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x1f, 0xff, 0xff, 0xed, 0x87, 0xff, 0xff, 0xf0,
869 0x91, 0xff, 0xf8, 0x1f, 0xf4, 0xf1, 0xff, 0x78, 0x1f, 0xfd, 0xa1, 0xff, 0xff, 0xff, 0xff,
870 0xff, 0xff, 0xfa, 0xc7, 0xfe, 0x4, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
871 0xff, 0xed, 0x1f, 0xf3, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe8, 0x7, 0xfd, 0xf8,
872 0x7f, 0xff, 0xff, 0xff, 0xfd, 0x10, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x51, 0xff,
873 0xff, 0xff, 0xff, 0xfe, 0x1, 0xff, 0x73, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
874 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0x0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
875 0xff, 0xff, 0xff, 0xfc, 0xa4, 0x7f, 0xf5, 0x7, 0xfc, 0x48, 0x7f, 0xe0, 0x47, 0xff, 0xff,
876 0xf5, 0x91, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x21, 0xff,
877 0x7f, 0x1f, 0xf8, 0xd1, 0xff, 0xe7, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x1f, 0xf2, 0x91,
878 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1c, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xe7,
879 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x8c, 0x7f, 0xff, 0xff, 0xc, 0x1f, 0xff, 0xff,
880 0xe5, 0x7, 0xff, 0xff, 0xfa, 0x81, 0xff, 0xff, 0xff, 0x20, 0x7f, 0xff, 0xff, 0xff, 0xff,
881 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
882 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xbc, 0x7f, 0xff, 0xff, 0xff, 0xfc, 0x38, 0x7f, 0xff,
883 0xff, 0xff, 0xfc, 0xd0, 0x7f, 0xd3, 0xc7, 0xff, 0xff, 0xf7, 0x91, 0xff, 0xff, 0xff, 0xff,
884 0xfe, 0xc1, 0xff, 0xff, 0xff, 0xff, 0xf9, 0x61, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
885 0x87, 0xff, 0xff, 0xfd, 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x87, 0xff, 0xff,
886 0xff, 0xff, 0xfe, 0x87, 0xff, 0x58, 0x7f, 0xff, 0xff, 0xff, 0xfd, 0xec, 0x7f, 0xff, 0xff,
887 0xff, 0xfe, 0xd0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0x6c, 0x7f, 0xcb, 0x47, 0xff, 0xff, 0xf3,
888 0x61, 0xff, 0xff, 0xff, 0x80, 0x7f, 0xe1, 0xc7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f,
889 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
890 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x18, 0x1f, 0xff, 0xff,
891 0xff, 0xff, 0xff, 0xfd, 0xcc, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x11, 0xff, 0xff,
892 0xff, 0xff, 0xf8, 0x41, 0xff, 0xbc, 0x1f, 0xff, 0xff, 0xc4, 0x47, 0xff, 0xff, 0xf2, 0x91,
893 0xff, 0xe0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0x6d, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
894 0xff, 0xff, 0xff, 0xff, 0xff, 0x2, 0x1f, 0xf9, 0xe1, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xe1,
895 0xff, 0xff, 0xfd, 0xb0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe1, 0xff, 0xff, 0xff, 0xff,
896 0xff, 0xff, 0xff, 0xff, 0x5a, 0x1f, 0xfc, 0x81, 0xbf, 0x29, 0x1b, 0xff, 0xff, 0xff, 0xff,
897 0xff, 0xff, 0xff, 0xf3, 0x61, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc8, 0x1b,
898 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0xb1, 0xbf, 0xff, 0xfd, 0x80, 0x6f, 0xff,
899 0xff, 0xf, 0x1b, 0xf8, 0xc1, 0xbf, 0xff, 0xfc, 0xb4, 0x6f, 0xff, 0xff, 0xff, 0xff, 0xff,
900 0xff, 0xff, 0xda, 0x46, 0xfc, 0x54, 0x6f, 0xc9, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
901 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x21, 0x1b, 0xff, 0xff, 0xe0, 0x86, 0xff, 0xff,
902 0xff, 0xff, 0xe2, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
903 0xff, 0xff, 0xff, 0xff, 0xff, 0xf3, 0x91, 0xbf, 0xff, 0xfe, 0x24, 0x6f, 0xff, 0xff, 0x6b,
904 0x1b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xb1, 0xbf, 0xfa, 0x1b, 0xfb, 0x11,
905 0xbf, 0xff, 0xfe, 0x8, 0x6f, 0xff, 0xff, 0x42, 0x1b, 0xff, 0xff, 0xff, 0xff, 0xb9, 0x1b,
906 0xff, 0xff, 0xcf, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x31,
907 0x86, 0x10, 0x9, 0xb4, 0xe4, 0x4c, 0xf7, 0xef, 0x42, 0x87, 0x6a, 0xb5, 0xc2, 0x34, 0x9e,
908 0x2f, 0x12, 0xae, 0x21, 0x68, 0xf2, 0xa8, 0x74, 0x37, 0xe1, 0x98, 0x14, 0x59, 0x57, 0x2c,
909 0x24, 0x3b, 0x35, 0x6c, 0x1b, 0x8b, 0xcc, 0xe6, 0x13, 0x38, 0xc, 0x8e, 0xe2, 0xc, 0xfe,
910 0x49, 0x73, 0xbc, 0x2b, 0x7b, 0x9, 0x27, 0x79, 0x14, 0xc, 0x94, 0x42, 0xf8, 0x7c, 0x1,
911 0x8d, 0x26, 0xde, 0x87, 0x26, 0x71, 0x50, 0x45, 0xc6, 0x28, 0x40, 0xd5, 0xe, 0x8d, 0x8,
912 0x1e, 0x4c, 0xa4, 0x79, 0x57, 0xf0, 0xc3, 0x6d, 0x5c, 0x6d, 0xc0,
913 ];
914
915 fn fill(rng: &mut impl Rng, size: usize) -> Vec<u16> {
916 if rng.gen_bool(0.2) {
917 let value = if rng.gen_bool(0.5) { 0 } else { u16::MAX };
918 return vec![ value; size ];
919 }
920
921 let mut data = vec![0_u16; size];
922
923 data.iter_mut().for_each(|v| {
924 *v = rng.gen_range(0_u16 .. u16::MAX);
925 });
926
927 data
928 }
929
930 /// Test using both input and output from a custom ILM OpenEXR test.
931 #[test]
932 fn compression_comparation() {
933 let raw = compress(&UNCOMPRESSED_ARRAY).unwrap();
934 assert_eq!(raw, COMPRESSED_ARRAY.to_vec());
935 }
936
937 #[test]
938 fn round_trip() {
939 let mut random = rand::rngs::StdRng::from_seed(SEED);
940 let raw = fill(&mut random, u16::MAX as usize);
941
942 let compressed = compress(&raw).unwrap();
943 let uncompressed = decompress(&compressed, raw.len()).unwrap();
944
945 assert_eq!(uncompressed, raw);
946 }
947
948 #[test]
949 fn repetitions_special() {
950 let raw = UNCOMPRESSED_ARRAY_SPECIAL;
951
952 let compressed = compress(&raw).unwrap();
953 let uncompressed = decompress(&compressed, raw.len()).unwrap();
954
955 assert_eq!(uncompressed, raw.to_vec());
956 }
957
958 #[test]
959 fn round_trip100() {
960 let mut random = rand::rngs::StdRng::from_seed(SEED);
961
962 for size_multiplier in 1..10 {
963 let raw = fill(&mut random, size_multiplier * 50_000);
964
965 let compressed = compress(&raw).unwrap();
966 let uncompressed = decompress(&compressed, raw.len()).unwrap();
967
968 assert_eq!(uncompressed, raw);
969 }
970 }
971
972 #[test]
973 fn test_zeroes(){
974 let uncompressed: &[u16] = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
975
976 let compressed = compress(uncompressed).unwrap();
977 let decompressed = decompress(&compressed, uncompressed.len()).unwrap();
978
979 assert_eq!(uncompressed, decompressed.as_slice());
980 }
981
982 const SEED: [u8; 32] = [
983 12,155,32,34,112,109,98,54,
984 12,255,32,34,112,109,98,55,
985 12,155,32,34,12,109,98,54,
986 12,35,32,34,112,109,48,54,
987 ];
988}
989