1//! 16-bit Huffman compression and decompression.
2//! Huffman compression and decompression routines written
3//! by Christian Rouet for his PIZ image file format.
4// see https://github.com/AcademySoftwareFoundation/openexr/blob/88246d991e0318c043e6f584f7493da08a31f9f8/OpenEXR/IlmImf/ImfHuf.cpp
5
6use crate::math::RoundingMode;
7use crate::error::{Error, Result, UnitResult, u64_to_usize, u32_to_usize};
8use crate::io::Data;
9use std::{
10 cmp::Ordering,
11 collections::BinaryHeap,
12 io::{Cursor, Read, Write},
13};
14use std::convert::TryFrom;
15use smallvec::SmallVec;
16
17
18pub fn decompress(compressed: &[u8], expected_size: usize) -> Result<Vec<u16>> {
19 let mut remaining_compressed = compressed;
20
21 let min_code_index = usize::try_from(u32::read(&mut remaining_compressed)?)?;
22 let max_code_index_32 = u32::read(&mut remaining_compressed)?;
23 let _table_size = usize::try_from(u32::read(&mut remaining_compressed)?)?; // TODO check this and return Err?
24 let bit_count = usize::try_from(u32::read(&mut remaining_compressed)?)?;
25 let _skipped = u32::read(&mut remaining_compressed)?; // what is this
26
27 let max_code_index = usize::try_from(max_code_index_32).unwrap();
28 if min_code_index >= ENCODING_TABLE_SIZE || max_code_index >= ENCODING_TABLE_SIZE {
29 return Err(Error::invalid(INVALID_TABLE_SIZE));
30 }
31
32 if RoundingMode::Up.divide(bit_count, 8) > remaining_compressed.len() {
33 return Err(Error::invalid(NOT_ENOUGH_DATA));
34 }
35
36 let encoding_table = read_encoding_table(&mut remaining_compressed, min_code_index, max_code_index)?;
37 if bit_count > 8 * remaining_compressed.len() { return Err(Error::invalid(INVALID_BIT_COUNT)); }
38
39 let decoding_table = build_decoding_table(&encoding_table, min_code_index, max_code_index)?;
40
41 let result = decode_with_tables(
42 &encoding_table,
43 &decoding_table,
44 &remaining_compressed,
45 i32::try_from(bit_count)?,
46 max_code_index_32,
47 expected_size,
48 )?;
49
50 Ok(result)
51}
52
53pub fn compress(uncompressed: &[u16]) -> Result<Vec<u8>> {
54 if uncompressed.is_empty() { return Ok(vec![]); }
55
56 let mut frequencies = count_frequencies(uncompressed);
57 let (min_code_index, max_code_index) = build_encoding_table(&mut frequencies);
58
59 let mut result = Cursor::new(Vec::with_capacity(uncompressed.len()));
60 u32::write_slice(&mut result, &[0; 5])?; // we come back to these later after we know more about the compressed data
61
62 let table_start = result.position();
63 pack_encoding_table(
64 &frequencies,
65 min_code_index,
66 max_code_index,
67 &mut result,
68 )?;
69
70 let data_start = result.position();
71 let bit_count = encode_with_frequencies(
72 &frequencies,
73 uncompressed,
74 max_code_index,
75 &mut result
76 )?;
77
78 // write meta data after this
79 result.set_position(0);
80 let table_length = data_start - table_start;
81
82 u32::try_from(min_code_index)?.write(&mut result)?;
83 u32::try_from(max_code_index)?.write(&mut result)?;
84 u32::try_from(table_length)?.write(&mut result)?;
85 u32::try_from(bit_count)?.write(&mut result)?;
86 0_u32.write(&mut result)?;
87
88 Ok(result.into_inner())
89}
90
91
92const ENCODE_BITS: u64 = 16; // literal (value) bit length
93const DECODE_BITS: u64 = 14; // decoding bit size (>= 8)
94
95const ENCODING_TABLE_SIZE: usize = ((1 << ENCODE_BITS) + 1) as usize;
96const DECODING_TABLE_SIZE: usize = (1 << DECODE_BITS) as usize;
97const DECODE_MASK: u64 = DECODING_TABLE_SIZE as u64 - 1;
98
99const SHORT_ZEROCODE_RUN: u64 = 59;
100const LONG_ZEROCODE_RUN: u64 = 63;
101const SHORTEST_LONG_RUN: u64 = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
102const LONGEST_LONG_RUN: u64 = 255 + SHORTEST_LONG_RUN;
103
104
105#[derive(Clone, Debug, Eq, PartialEq)]
106enum Code {
107 Empty,
108 Short(ShortCode),
109 Long(SmallVec<[u32; 2]>), // often 2, sometimes 4, rarely 8
110}
111
112#[derive(Clone, Debug, Eq, PartialEq)]
113struct ShortCode {
114 value: u32,
115 len: u8,
116}
117
118impl ShortCode {
119 #[inline] fn len(&self) -> u64 { u64::from(self.len) }
120}
121
122/// Decode (uncompress) n bits based on encoding & decoding tables:
123fn decode_with_tables(
124 encoding_table: &[u64],
125 decoding_table: &[Code],
126 mut input: &[u8],
127 input_bit_count: i32,
128 run_length_code: u32,
129 expected_output_size: usize,
130) -> Result<Vec<u16>>
131{
132 let mut output = Vec::with_capacity(expected_output_size);
133 let mut code_bits = 0_u64;
134 let mut code_bit_count = 0_u64;
135
136 while input.len() > 0 {
137 read_byte(&mut code_bits, &mut code_bit_count, &mut input)?;
138
139 // Access decoding table
140 while code_bit_count >= DECODE_BITS {
141 let code_index = (code_bits >> (code_bit_count - DECODE_BITS)) & DECODE_MASK;
142 let code = &decoding_table[u64_to_usize(code_index)];
143
144 // Get short code
145 if let Code::Short(code) = code {
146 code_bit_count -= code.len();
147
148 read_code_into_vec(
149 code.value,
150 run_length_code,
151 &mut code_bits,
152 &mut code_bit_count,
153 &mut input,
154 &mut output,
155 expected_output_size,
156 )?;
157 }
158 else if let Code::Long(ref long_codes) = code {
159 debug_assert_ne!(long_codes.len(), 0);
160
161 let long_code = long_codes.iter()
162 .filter_map(|&long_code|{
163 let encoded_long_code = encoding_table[u32_to_usize(long_code)];
164 let length = length(encoded_long_code);
165
166 while code_bit_count < length && input.len() > 0 {
167 let err = read_byte(&mut code_bits, &mut code_bit_count, &mut input);
168 if let Err(err) = err { return Some(Err(err)); }
169 }
170
171 if code_bit_count >= length {
172 let required_code = (code_bits >> (code_bit_count - length)) & ((1 << length) - 1);
173
174 if self::code(encoded_long_code) == required_code {
175 code_bit_count -= length;
176 return Some(Ok(long_code));
177 }
178 }
179
180 None
181
182 })
183 .next()
184 .ok_or(Error::invalid(INVALID_CODE))?;
185
186 read_code_into_vec(
187 long_code?,
188 run_length_code,
189 &mut code_bits,
190 &mut code_bit_count,
191 &mut input,
192 &mut output,
193 expected_output_size,
194 )?;
195 }
196 else {
197 return Err(Error::invalid(INVALID_CODE));
198 }
199 }
200 }
201
202 let count = u64::try_from((8 - input_bit_count) & 7)?;
203 code_bits >>= count;
204
205 code_bit_count = code_bit_count.checked_sub(count)
206 .ok_or_else(|| Error::invalid("code"))?;
207
208 while code_bit_count > 0 {
209 let index = (code_bits << (DECODE_BITS - code_bit_count)) & DECODE_MASK;
210 let code = &decoding_table[u64_to_usize(index)];
211
212 if let Code::Short(short_code) = code {
213 if short_code.len() > code_bit_count { return Err(Error::invalid("code")) }; // FIXME why does this happen??
214 code_bit_count -= short_code.len(); // FIXME may throw "attempted to subtract with overflow"
215
216 read_code_into_vec(
217 short_code.value,
218 run_length_code,
219 &mut code_bits,
220 &mut code_bit_count,
221 &mut input,
222 &mut output,
223 expected_output_size,
224 )?;
225 }
226 else {
227 return Err(Error::invalid(INVALID_CODE));
228 }
229 }
230
231 if output.len() != expected_output_size {
232 return Err(Error::invalid(NOT_ENOUGH_DATA));
233 }
234
235 Ok(output)
236}
237
238/// Build a decoding hash table based on the encoding table code:
239/// - short codes (<= HUF_DECBITS) are resolved with a single table access;
240/// - long code entry allocations are not optimized, because long codes are
241/// unfrequent;
242/// - decoding tables are used by hufDecode();
243fn build_decoding_table(
244 encoding_table: &[u64],
245 min_code_index: usize,
246 max_code_index: usize,
247) -> Result<Vec<Code>>
248{
249 let mut decoding_table = vec![Code::Empty; DECODING_TABLE_SIZE]; // not an array because of code not being copy
250
251 for (code_index, &encoded_code) in encoding_table[..= max_code_index].iter().enumerate().skip(min_code_index) {
252 let code_index = u32::try_from(code_index).unwrap();
253
254 let code = code(encoded_code);
255 let length = length(encoded_code);
256
257 if code >> length != 0 {
258 return Err(Error::invalid(INVALID_TABLE_ENTRY));
259 }
260
261 if length > DECODE_BITS {
262 let long_code = &mut decoding_table[u64_to_usize(code >> (length - DECODE_BITS))];
263
264 match long_code {
265 Code::Empty => *long_code = Code::Long(smallvec![code_index]),
266 Code::Long(lits) => lits.push(code_index),
267 _ => { return Err(Error::invalid(INVALID_TABLE_ENTRY)); }
268 }
269 }
270 else if length != 0 {
271 let default_value = Code::Short(ShortCode {
272 value: code_index,
273 len: length as u8,
274 });
275
276 let start_index = u64_to_usize(code << (DECODE_BITS - length));
277 let count = u64_to_usize(1 << (DECODE_BITS - length));
278
279 for value in &mut decoding_table[start_index .. start_index + count] {
280 *value = default_value.clone();
281 }
282 }
283 }
284
285 Ok(decoding_table)
286}
287
288/// Run-length-decompresses all zero runs from the packed table to the encoding table
289fn read_encoding_table(
290 packed: &mut impl Read,
291 min_code_index: usize,
292 max_code_index: usize,
293) -> Result<Vec<u64>>
294{
295 let mut code_bits = 0_u64;
296 let mut code_bit_count = 0_u64;
297
298 // TODO push() into encoding table instead of index stuff?
299 let mut encoding_table = vec![0_u64; ENCODING_TABLE_SIZE];
300 let mut code_index = min_code_index;
301 while code_index <= max_code_index {
302 let code_len = read_bits(6, &mut code_bits, &mut code_bit_count, packed)?;
303 encoding_table[code_index] = code_len;
304
305 if code_len == LONG_ZEROCODE_RUN {
306 let zerun_bits = read_bits(8, &mut code_bits, &mut code_bit_count, packed)?;
307 let zerun = usize::try_from(zerun_bits + SHORTEST_LONG_RUN).unwrap();
308
309 if code_index + zerun > max_code_index + 1 {
310 return Err(Error::invalid(TABLE_TOO_LONG));
311 }
312
313 for value in &mut encoding_table[code_index..code_index + zerun] {
314 *value = 0;
315 }
316
317 code_index += zerun;
318 }
319 else if code_len >= SHORT_ZEROCODE_RUN {
320 let duplication_count = usize::try_from(code_len - SHORT_ZEROCODE_RUN + 2).unwrap();
321 if code_index + duplication_count > max_code_index + 1 {
322 return Err(Error::invalid(TABLE_TOO_LONG));
323 }
324
325 for value in &mut encoding_table[code_index .. code_index + duplication_count] {
326 *value = 0;
327 }
328
329 code_index += duplication_count;
330 }
331 else {
332 code_index += 1;
333 }
334 }
335
336 build_canonical_table(&mut encoding_table);
337 Ok(encoding_table)
338}
339
340// TODO Use BitStreamReader for all the bit reads?!
341#[inline]
342fn read_bits(
343 count: u64,
344 code_bits: &mut u64,
345 code_bit_count: &mut u64,
346 input: &mut impl Read,
347) -> Result<u64>
348{
349 while *code_bit_count < count {
350 read_byte(code_bits, code_bit_count, input)?;
351 }
352
353 *code_bit_count -= count;
354 Ok((*code_bits >> *code_bit_count) & ((1 << count) - 1))
355}
356
357#[inline]
358fn read_byte(code_bits: &mut u64, bit_count: &mut u64, input: &mut impl Read) -> UnitResult {
359 *code_bits = (*code_bits << 8) | u8::read(input)? as u64;
360 *bit_count += 8;
361 Ok(())
362}
363
364#[inline]
365fn read_code_into_vec(
366 code: u32,
367 run_length_code: u32,
368 code_bits: &mut u64,
369 code_bit_count: &mut u64,
370 read: &mut impl Read,
371 out: &mut Vec<u16>,
372 max_len: usize,
373) -> UnitResult
374{
375 if code == run_length_code { // code may be too large for u16
376 if *code_bit_count < 8 {
377 read_byte(code_bits, code_bit_count, read)?;
378 }
379
380 *code_bit_count -= 8;
381
382 let code_repetitions = usize::from((*code_bits >> *code_bit_count) as u8);
383
384 if out.len() + code_repetitions > max_len {
385 return Err(Error::invalid(TOO_MUCH_DATA));
386 }
387 else if out.is_empty() {
388 return Err(Error::invalid(NOT_ENOUGH_DATA));
389 }
390
391 let repeated_code = *out.last().unwrap();
392 out.extend(std::iter::repeat(repeated_code).take(code_repetitions));
393 }
394 else if out.len() < max_len { // implies that code is not larger than u16???
395 out.push(u16::try_from(code)?);
396 }
397 else {
398 return Err(Error::invalid(TOO_MUCH_DATA));
399 }
400
401 Ok(())
402}
403
404fn count_frequencies(data: &[u16]) -> Vec<u64> {
405 let mut frequencies: Vec = vec![0_u64; ENCODING_TABLE_SIZE];
406
407 for value: &u16 in data {
408 frequencies[*value as usize] += 1;
409 }
410
411 frequencies
412}
413
414fn write_bits(
415 count: u64,
416 bits: u64,
417 code_bits: &mut u64,
418 code_bit_count: &mut u64,
419 mut out: impl Write,
420) -> UnitResult
421{
422 *code_bits = (*code_bits << count) | bits;
423 *code_bit_count += count;
424
425 while *code_bit_count >= 8 {
426 *code_bit_count -= 8;
427 out.write(&[
428 (*code_bits >> *code_bit_count) as u8 // TODO make sure never or always wraps?
429 ])?;
430 }
431
432 Ok(())
433}
434
435fn write_code(scode: u64, code_bits: &mut u64, code_bit_count: &mut u64, mut out: impl Write) -> UnitResult {
436 write_bits(count:length(scode), bits:code(scode), code_bits, code_bit_count, &mut out)
437}
438
439#[inline(always)]
440fn send_code(
441 scode: u64,
442 run_count: u64,
443 run_code: u64,
444 code_bits: &mut u64,
445 code_bit_count: &mut u64,
446 mut out: impl Write,
447) -> UnitResult
448{
449 // Output a run of runCount instances of the symbol sCount.
450 // Output the symbols explicitly, or if that is shorter, output
451 // the sCode symbol once followed by a runCode symbol and runCount
452 // expressed as an 8-bit number.
453 if length(code:scode) + length(run_code) + 8 < length(code:scode) * run_count {
454 write_code(scode, code_bits, code_bit_count, &mut out)?;
455 write_code(scode:run_code, code_bits, code_bit_count, &mut out)?;
456 write_bits(count:8, bits:run_count, code_bits, code_bit_count, &mut out)?;
457 }
458 else {
459 for _ in 0 ..= run_count {
460 write_code(scode, code_bits, code_bit_count, &mut out)?;
461 }
462 }
463
464 Ok(())
465}
466
467fn encode_with_frequencies(
468 frequencies: &[u64],
469 uncompressed: &[u16],
470 run_length_code: usize,
471 mut out: &mut Cursor<Vec<u8>>,
472) -> Result<u64>
473{
474 let mut code_bits = 0;
475 let mut code_bit_count = 0;
476
477 let mut run_start_value = uncompressed[0];
478 let mut run_length = 0;
479
480 let start_position = out.position();
481
482 // Loop on input values
483 for &current_value in &uncompressed[1..] {
484 // Count same values or send code
485 if run_start_value == current_value && run_length < 255 {
486 run_length += 1;
487 }
488 else {
489 send_code(
490 frequencies[run_start_value as usize],
491 run_length,
492 frequencies[run_length_code],
493 &mut code_bits,
494 &mut code_bit_count,
495 &mut out,
496 )?;
497
498 run_length = 0;
499 }
500
501 run_start_value = current_value;
502 }
503
504 // Send remaining code
505 send_code(
506 frequencies[run_start_value as usize],
507 run_length,
508 frequencies[run_length_code],
509 &mut code_bits,
510 &mut code_bit_count,
511 &mut out,
512 )?;
513
514 let data_length = out.position() - start_position; // we shouldn't count the last byte write
515
516 if code_bit_count != 0 {
517 out.write(&[
518 (code_bits << (8 - code_bit_count) & 0xff) as u8
519 ])?;
520 }
521
522 Ok(data_length * 8 + code_bit_count)
523}
524
525///
526/// Pack an encoding table:
527/// - only code lengths, not actual codes, are stored
528/// - runs of zeroes are compressed as follows:
529///
530/// unpacked packed
531/// --------------------------------
532/// 1 zero 0 (6 bits)
533/// 2 zeroes 59
534/// 3 zeroes 60
535/// 4 zeroes 61
536/// 5 zeroes 62
537/// n zeroes (6 or more) 63 n-6 (6 + 8 bits)
538///
539fn pack_encoding_table(
540 frequencies: &[u64],
541 min_index: usize,
542 max_index: usize,
543 mut out: &mut Cursor<Vec<u8>>,
544) -> UnitResult
545{
546 let mut code_bits = 0_u64;
547 let mut code_bit_count = 0_u64;
548
549 let mut frequency_index = min_index;
550 while frequency_index <= max_index { // TODO slice iteration?
551 let code_length = length(frequencies[frequency_index]);
552
553 if code_length == 0 {
554 let mut zero_run = 1;
555
556 while frequency_index < max_index && zero_run < LONGEST_LONG_RUN {
557 if length(frequencies[frequency_index + 1]) > 0 {
558 break;
559 }
560
561 frequency_index += 1;
562 zero_run += 1;
563 }
564
565 if zero_run >= 2 {
566 if zero_run >= SHORTEST_LONG_RUN {
567 write_bits(6, LONG_ZEROCODE_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
568 write_bits(8, zero_run - SHORTEST_LONG_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
569 }
570 else {
571 write_bits(6, SHORT_ZEROCODE_RUN + zero_run - 2, &mut code_bits, &mut code_bit_count, &mut out)?;
572 }
573
574 frequency_index += 1; // we must increment or else this may go very wrong
575 continue;
576 }
577 }
578
579 write_bits(6, code_length, &mut code_bits, &mut code_bit_count, &mut out)?;
580 frequency_index += 1;
581 }
582
583 if code_bit_count > 0 {
584 out.write(&[
585 (code_bits << (8 - code_bit_count)) as u8
586 ])?;
587 }
588
589 Ok(())
590}
591
592/// Build a "canonical" Huffman code table:
593/// - for each (uncompressed) symbol, code contains the length
594/// of the corresponding code (in the compressed data)
595/// - canonical codes are computed and stored in code
596/// - the rules for constructing canonical codes are as follows:
597/// * shorter codes (if filled with zeroes to the right)
598/// have a numerically higher value than longer codes
599/// * for codes with the same length, numerical values
600/// increase with numerical symbol values
601/// - because the canonical code table can be constructed from
602/// symbol lengths alone, the code table can be transmitted
603/// without sending the actual code values
604/// - see http://www.compressconsult.com/huffman/
605fn build_canonical_table(code_table: &mut [u64]) {
606 debug_assert_eq!(code_table.len(), ENCODING_TABLE_SIZE);
607
608 let mut count_per_code = [0_u64; 59];
609
610 for &code in code_table.iter() {
611 count_per_code[u64_to_usize(code)] += 1;
612 }
613
614 // For each i from 58 through 1, compute the
615 // numerically lowest code with length i, and
616 // store that code in n[i].
617 {
618 let mut code = 0_u64; // TODO use foldr?
619 for count in &mut count_per_code.iter_mut().rev() {
620 let next_code = (code + *count) >> 1;
621 *count = code;
622 code = next_code;
623 }
624 }
625
626 // code[i] contains the length, l, of the
627 // code for symbol i. Assign the next available
628 // code of length l to the symbol and store both
629 // l and the code in code[i]. // TODO iter + filter ?
630 for symbol_length in code_table.iter_mut() {
631 let current_length = *symbol_length;
632 let code_index = u64_to_usize(current_length);
633 if current_length > 0 {
634 *symbol_length = current_length | (count_per_code[code_index] << 6);
635 count_per_code[code_index] += 1;
636 }
637 }
638}
639
640
641/// Compute Huffman codes (based on frq input) and store them in frq:
642/// - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
643/// - max code length is 58 bits;
644/// - codes outside the range [im-iM] have a null length (unused values);
645/// - original frequencies are destroyed;
646/// - encoding tables are used by hufEncode() and hufBuildDecTable();
647///
648/// NB: The following code "(*a == *b) && (a > b))" was added to ensure
649/// elements in the heap with the same value are sorted by index.
650/// This is to ensure, the STL make_heap()/pop_heap()/push_heap() methods
651/// produced a resultant sorted heap that is identical across OSes.
652fn build_encoding_table(
653 frequencies: &mut [u64], // input frequencies, output encoding table
654) -> (usize, usize) // return frequency max min range
655{
656 debug_assert_eq!(frequencies.len(), ENCODING_TABLE_SIZE);
657
658 /// Frequency with position, used for MinHeap.
659 #[derive(Eq, PartialEq, Copy, Clone)]
660 struct HeapFrequency {
661 position: usize,
662 frequency: u64,
663 }
664
665 impl Ord for HeapFrequency {
666 fn cmp(&self, other: &Self) -> Ordering {
667 other.frequency.cmp(&self.frequency)
668 .then_with(|| other.position.cmp(&self.position))
669 }
670 }
671
672 impl PartialOrd for HeapFrequency {
673 fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
674 }
675
676 // This function assumes that when it is called, array frq
677 // indicates the frequency of all possible symbols in the data
678 // that are to be Huffman-encoded. (frq[i] contains the number
679 // of occurrences of symbol i in the data.)
680 //
681 // The loop below does three things:
682 //
683 // 1) Finds the minimum and maximum indices that point
684 // to non-zero entries in frq:
685 //
686 // frq[im] != 0, and frq[i] == 0 for all i < im
687 // frq[iM] != 0, and frq[i] == 0 for all i > iM
688 //
689 // 2) Fills array fHeap with pointers to all non-zero
690 // entries in frq.
691 //
692 // 3) Initializes array hlink such that hlink[i] == i
693 // for all array entries.
694
695 // We need to use vec here or we overflow the stack.
696 let mut links = vec![0_usize; ENCODING_TABLE_SIZE];
697 let mut frequency_heap = vec![0_usize; ENCODING_TABLE_SIZE];
698
699 // This is a good solution since we don't have usize::MAX items (no panics or UB),
700 // and since this is short-circuit, it stops at the first in order non zero element.
701 let min_frequency_index = frequencies.iter().position(|f| *f != 0).unwrap_or(0);
702
703 let mut max_frequency_index = 0;
704 let mut frequency_count = 0;
705
706 // assert bounds check to optimize away bounds check in loops
707 assert!(links.len() >= ENCODING_TABLE_SIZE);
708 assert!(frequencies.len() >= ENCODING_TABLE_SIZE);
709
710 for index in min_frequency_index..ENCODING_TABLE_SIZE {
711 links[index] = index; // TODO for x in links.iter().enumerate()
712
713 if frequencies[index] != 0 {
714 frequency_heap[frequency_count] = index;
715 max_frequency_index = index;
716 frequency_count += 1;
717 }
718 }
719
720
721 // Add a pseudo-symbol, with a frequency count of 1, to frq;
722 // adjust the fHeap and hlink array accordingly. Function
723 // hufEncode() uses the pseudo-symbol for run-length encoding.
724
725 max_frequency_index += 1;
726 frequencies[max_frequency_index] = 1;
727 frequency_heap[frequency_count] = max_frequency_index;
728 frequency_count += 1;
729
730 // Build an array, scode, such that scode[i] contains the number
731 // of bits assigned to symbol i. Conceptually this is done by
732 // constructing a tree whose leaves are the symbols with non-zero
733 // frequency:
734 //
735 // Make a heap that contains all symbols with a non-zero frequency,
736 // with the least frequent symbol on top.
737 //
738 // Repeat until only one symbol is left on the heap:
739 //
740 // Take the two least frequent symbols off the top of the heap.
741 // Create a new node that has first two nodes as children, and
742 // whose frequency is the sum of the frequencies of the first
743 // two nodes. Put the new node back into the heap.
744 //
745 // The last node left on the heap is the root of the tree. For each
746 // leaf node, the distance between the root and the leaf is the length
747 // of the code for the corresponding symbol.
748 //
749 // The loop below doesn't actually build the tree; instead we compute
750 // the distances of the leaves from the root on the fly. When a new
751 // node is added to the heap, then that node's descendants are linked
752 // into a single linear list that starts at the new node, and the code
753 // lengths of the descendants (that is, their distance from the root
754 // of the tree) are incremented by one.
755 let mut heap = BinaryHeap::with_capacity(frequency_count);
756 for index in frequency_heap.drain(..frequency_count) {
757 heap.push(HeapFrequency { position: index, frequency: frequencies[index] });
758 }
759
760 let mut s_code = vec![0_u64; ENCODING_TABLE_SIZE];
761
762 while frequency_count > 1 {
763 // Find the indices, mm and m, of the two smallest non-zero frq
764 // values in fHeap, add the smallest frq to the second-smallest
765 // frq, and remove the smallest frq value from fHeap.
766 let (high_position, low_position) = {
767 let smallest_frequency = heap.pop().expect("heap empty bug");
768 frequency_count -= 1;
769
770 let mut second_smallest_frequency = heap.peek_mut().expect("heap empty bug");
771 second_smallest_frequency.frequency += smallest_frequency.frequency;
772
773 (second_smallest_frequency.position, smallest_frequency.position)
774 };
775
776 // The entries in scode are linked into lists with the
777 // entries in hlink serving as "next" pointers and with
778 // the end of a list marked by hlink[j] == j.
779 //
780 // Traverse the lists that start at scode[m] and scode[mm].
781 // For each element visited, increment the length of the
782 // corresponding code by one bit. (If we visit scode[j]
783 // during the traversal, then the code for symbol j becomes
784 // one bit longer.)
785 //
786 // Merge the lists that start at scode[m] and scode[mm]
787 // into a single list that starts at scode[m].
788
789 // Add a bit to all codes in the first list.
790 let mut index = high_position; // TODO fold()
791 loop {
792 s_code[index] += 1;
793 debug_assert!(s_code[index] <= 58);
794
795 // merge the two lists
796 if links[index] == index {
797 links[index] = low_position;
798 break;
799 }
800
801 index = links[index];
802 }
803
804 // Add a bit to all codes in the second list
805 let mut index = low_position; // TODO fold()
806 loop {
807 s_code[index] += 1;
808 debug_assert!(s_code[index] <= 58);
809
810 if links[index] == index {
811 break;
812 }
813
814 index = links[index];
815 }
816 }
817
818 // Build a canonical Huffman code table, replacing the code
819 // lengths in scode with (code, code length) pairs. Copy the
820 // code table from scode into frq.
821 build_canonical_table(&mut s_code);
822 frequencies.copy_from_slice(&s_code);
823
824 (min_frequency_index, max_frequency_index)
825}
826
827
828#[inline] fn length(code: u64) -> u64 { code & 63 }
829#[inline] fn code(code: u64) -> u64 { code >> 6 }
830
831const INVALID_BIT_COUNT: &'static str = "invalid number of bits";
832const INVALID_TABLE_ENTRY: &'static str = "invalid code table entry";
833const NOT_ENOUGH_DATA: &'static str = "decoded data are shorter than expected";
834const INVALID_TABLE_SIZE: &'static str = "unexpected end of code table data";
835const TABLE_TOO_LONG: &'static str = "code table is longer than expected";
836const INVALID_CODE: &'static str = "invalid code";
837const TOO_MUCH_DATA: &'static str = "decoded data are longer than expected";
838
839
840#[cfg(test)]
841mod test {
842 use super::*;
843 use rand::{Rng, SeedableRng};
844
845 const UNCOMPRESSED_ARRAY: [u16; 100] = [
846 3852, 2432, 33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013, 7715, 59875, 28182,
847 34449, 19983, 20399, 63407, 29486, 4877, 26738, 44815, 14042, 46091, 48228, 25682, 35412,
848 7582, 65069, 6632, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
849 42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
850 56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
851 19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
852 3159, 29002, 14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347, 16991, 21303, 26263,
853 8312, 14017, 41777, 43240, 3500, 60250, 52437, 45715, 61520,
854 ];
855
856 const UNCOMPRESSED_ARRAY_SPECIAL: [u16; 100] = [
857 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28182,
858 0, 65534, 0, 65534, 0, 65534, 0, 65534, 0, 0, 0, 0, 0,
859 0, 0, 0, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
860 42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
861 56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
862 19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
863 3159, 29002, 14535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
864 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
865 ];
866
867 const COMPRESSED_ARRAY: [u8; 703] = [
868 0xc9, 0x0, 0x0, 0x0, 0x2e, 0xfe, 0x0, 0x0, 0x56, 0x2, 0x0, 0x0, 0xa2, 0x2, 0x0, 0x0, 0x0,
869 0x0, 0x0, 0x0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd6, 0x47,
870 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x1f, 0xff, 0xff, 0xed, 0x87, 0xff, 0xff, 0xf0,
871 0x91, 0xff, 0xf8, 0x1f, 0xf4, 0xf1, 0xff, 0x78, 0x1f, 0xfd, 0xa1, 0xff, 0xff, 0xff, 0xff,
872 0xff, 0xff, 0xfa, 0xc7, 0xfe, 0x4, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
873 0xff, 0xed, 0x1f, 0xf3, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe8, 0x7, 0xfd, 0xf8,
874 0x7f, 0xff, 0xff, 0xff, 0xfd, 0x10, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x51, 0xff,
875 0xff, 0xff, 0xff, 0xfe, 0x1, 0xff, 0x73, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
876 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0x0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
877 0xff, 0xff, 0xff, 0xfc, 0xa4, 0x7f, 0xf5, 0x7, 0xfc, 0x48, 0x7f, 0xe0, 0x47, 0xff, 0xff,
878 0xf5, 0x91, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x21, 0xff,
879 0x7f, 0x1f, 0xf8, 0xd1, 0xff, 0xe7, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x1f, 0xf2, 0x91,
880 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1c, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xe7,
881 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x8c, 0x7f, 0xff, 0xff, 0xc, 0x1f, 0xff, 0xff,
882 0xe5, 0x7, 0xff, 0xff, 0xfa, 0x81, 0xff, 0xff, 0xff, 0x20, 0x7f, 0xff, 0xff, 0xff, 0xff,
883 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
884 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xbc, 0x7f, 0xff, 0xff, 0xff, 0xfc, 0x38, 0x7f, 0xff,
885 0xff, 0xff, 0xfc, 0xd0, 0x7f, 0xd3, 0xc7, 0xff, 0xff, 0xf7, 0x91, 0xff, 0xff, 0xff, 0xff,
886 0xfe, 0xc1, 0xff, 0xff, 0xff, 0xff, 0xf9, 0x61, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
887 0x87, 0xff, 0xff, 0xfd, 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x87, 0xff, 0xff,
888 0xff, 0xff, 0xfe, 0x87, 0xff, 0x58, 0x7f, 0xff, 0xff, 0xff, 0xfd, 0xec, 0x7f, 0xff, 0xff,
889 0xff, 0xfe, 0xd0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0x6c, 0x7f, 0xcb, 0x47, 0xff, 0xff, 0xf3,
890 0x61, 0xff, 0xff, 0xff, 0x80, 0x7f, 0xe1, 0xc7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f,
891 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
892 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x18, 0x1f, 0xff, 0xff,
893 0xff, 0xff, 0xff, 0xfd, 0xcc, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x11, 0xff, 0xff,
894 0xff, 0xff, 0xf8, 0x41, 0xff, 0xbc, 0x1f, 0xff, 0xff, 0xc4, 0x47, 0xff, 0xff, 0xf2, 0x91,
895 0xff, 0xe0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0x6d, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
896 0xff, 0xff, 0xff, 0xff, 0xff, 0x2, 0x1f, 0xf9, 0xe1, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xe1,
897 0xff, 0xff, 0xfd, 0xb0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe1, 0xff, 0xff, 0xff, 0xff,
898 0xff, 0xff, 0xff, 0xff, 0x5a, 0x1f, 0xfc, 0x81, 0xbf, 0x29, 0x1b, 0xff, 0xff, 0xff, 0xff,
899 0xff, 0xff, 0xff, 0xf3, 0x61, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc8, 0x1b,
900 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0xb1, 0xbf, 0xff, 0xfd, 0x80, 0x6f, 0xff,
901 0xff, 0xf, 0x1b, 0xf8, 0xc1, 0xbf, 0xff, 0xfc, 0xb4, 0x6f, 0xff, 0xff, 0xff, 0xff, 0xff,
902 0xff, 0xff, 0xda, 0x46, 0xfc, 0x54, 0x6f, 0xc9, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
903 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x21, 0x1b, 0xff, 0xff, 0xe0, 0x86, 0xff, 0xff,
904 0xff, 0xff, 0xe2, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
905 0xff, 0xff, 0xff, 0xff, 0xff, 0xf3, 0x91, 0xbf, 0xff, 0xfe, 0x24, 0x6f, 0xff, 0xff, 0x6b,
906 0x1b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xb1, 0xbf, 0xfa, 0x1b, 0xfb, 0x11,
907 0xbf, 0xff, 0xfe, 0x8, 0x6f, 0xff, 0xff, 0x42, 0x1b, 0xff, 0xff, 0xff, 0xff, 0xb9, 0x1b,
908 0xff, 0xff, 0xcf, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x31,
909 0x86, 0x10, 0x9, 0xb4, 0xe4, 0x4c, 0xf7, 0xef, 0x42, 0x87, 0x6a, 0xb5, 0xc2, 0x34, 0x9e,
910 0x2f, 0x12, 0xae, 0x21, 0x68, 0xf2, 0xa8, 0x74, 0x37, 0xe1, 0x98, 0x14, 0x59, 0x57, 0x2c,
911 0x24, 0x3b, 0x35, 0x6c, 0x1b, 0x8b, 0xcc, 0xe6, 0x13, 0x38, 0xc, 0x8e, 0xe2, 0xc, 0xfe,
912 0x49, 0x73, 0xbc, 0x2b, 0x7b, 0x9, 0x27, 0x79, 0x14, 0xc, 0x94, 0x42, 0xf8, 0x7c, 0x1,
913 0x8d, 0x26, 0xde, 0x87, 0x26, 0x71, 0x50, 0x45, 0xc6, 0x28, 0x40, 0xd5, 0xe, 0x8d, 0x8,
914 0x1e, 0x4c, 0xa4, 0x79, 0x57, 0xf0, 0xc3, 0x6d, 0x5c, 0x6d, 0xc0,
915 ];
916
917 fn fill(rng: &mut impl Rng, size: usize) -> Vec<u16> {
918 if rng.gen_bool(0.2) {
919 let value = if rng.gen_bool(0.5) { 0 } else { u16::MAX };
920 return vec![ value; size ];
921 }
922
923 let mut data = vec![0_u16; size];
924
925 data.iter_mut().for_each(|v| {
926 *v = rng.gen_range(0_u16 .. u16::MAX);
927 });
928
929 data
930 }
931
932 /// Test using both input and output from a custom ILM OpenEXR test.
933 #[test]
934 fn compression_comparation() {
935 let raw = compress(&UNCOMPRESSED_ARRAY).unwrap();
936 assert_eq!(raw, COMPRESSED_ARRAY.to_vec());
937 }
938
939 #[test]
940 fn round_trip() {
941 let mut random = rand::rngs::StdRng::from_seed(SEED);
942 let raw = fill(&mut random, u16::MAX as usize);
943
944 let compressed = compress(&raw).unwrap();
945 let uncompressed = decompress(&compressed, raw.len()).unwrap();
946
947 assert_eq!(uncompressed, raw);
948 }
949
950 #[test]
951 fn repetitions_special() {
952 let raw = UNCOMPRESSED_ARRAY_SPECIAL;
953
954 let compressed = compress(&raw).unwrap();
955 let uncompressed = decompress(&compressed, raw.len()).unwrap();
956
957 assert_eq!(uncompressed, raw.to_vec());
958 }
959
960 #[test]
961 fn round_trip100() {
962 let mut random = rand::rngs::StdRng::from_seed(SEED);
963
964 for size_multiplier in 1..10 {
965 let raw = fill(&mut random, size_multiplier * 50_000);
966
967 let compressed = compress(&raw).unwrap();
968 let uncompressed = decompress(&compressed, raw.len()).unwrap();
969
970 assert_eq!(uncompressed, raw);
971 }
972 }
973
974 #[test]
975 fn test_zeroes(){
976 let uncompressed: &[u16] = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
977
978 let compressed = compress(uncompressed).unwrap();
979 let decompressed = decompress(&compressed, uncompressed.len()).unwrap();
980
981 assert_eq!(uncompressed, decompressed.as_slice());
982 }
983
984 const SEED: [u8; 32] = [
985 12,155,32,34,112,109,98,54,
986 12,255,32,34,112,109,98,55,
987 12,155,32,34,12,109,98,54,
988 12,35,32,34,112,109,48,54,
989 ];
990}
991