huffman.rs source code [crates/exr/src/compression/piz/huffman.rs]

1	//! 16-bit Huffman compression and decompression.
2	//! Huffman compression and decompression routines written
3	//! by Christian Rouet for his PIZ image file format.
4	// see https://github.com/AcademySoftwareFoundation/openexr/blob/88246d991e0318c043e6f584f7493da08a31f9f8/OpenEXR/IlmImf/ImfHuf.cpp
5
6	use crate::math::RoundingMode;
7	use crate::error::{Error, Result, UnitResult, u64_to_usize, u32_to_usize};
8	use crate::io::Data;
9	use std::{
10	cmp::Ordering,
11	collections::BinaryHeap,
12	io::{Cursor, Read, Write},
13	};
14	use std::convert::TryFrom;
15	use smallvec::SmallVec;
16
17
18	pub fn decompress(compressed: &[u8], expected_size: usize) -> Result<Vec<u16>> {
19	let mut remaining_compressed = compressed;
20
21	let min_code_index = usize::try_from(u32::read(&mut remaining_compressed)?)?;
22	let max_code_index_32 = u32::read(&mut remaining_compressed)?;
23	let _table_size = usize::try_from(u32::read(&mut remaining_compressed)?)?; // TODO check this and return Err?
24	let bit_count = usize::try_from(u32::read(&mut remaining_compressed)?)?;
25	let _skipped = u32::read(&mut remaining_compressed)?; // what is this
26
27	let max_code_index = usize::try_from(max_code_index_32).unwrap();
28	if min_code_index >= ENCODING_TABLE_SIZE \|\| max_code_index >= ENCODING_TABLE_SIZE {
29	return Err(Error::invalid(INVALID_TABLE_SIZE));
30	}
31
32	if RoundingMode::Up.divide(bit_count, `8`) > remaining_compressed.len() {
33	return Err(Error::invalid(NOT_ENOUGH_DATA));
34	}
35
36	let encoding_table = read_encoding_table(&mut remaining_compressed, min_code_index, max_code_index)?;
37	if bit_count > `8` * remaining_compressed.len() { return Err(Error::invalid(INVALID_BIT_COUNT)); }
38
39	let decoding_table = build_decoding_table(&encoding_table, min_code_index, max_code_index)?;
40
41	let result = decode_with_tables(
42	&encoding_table,
43	&decoding_table,
44	&remaining_compressed,
45	i32::try_from(bit_count)?,
46	max_code_index_32,
47	expected_size,
48	)?;
49
50	Ok(result)
51	}
52
53	pub fn compress(uncompressed: &[u16]) -> Result<Vec<u8>> {
54	if uncompressed.is_empty() { return Ok(vec![]); }
55
56	let mut frequencies = count_frequencies(uncompressed);
57	let (min_code_index, max_code_index) = build_encoding_table(&mut frequencies);
58
59	let mut result = Cursor::new(Vec::with_capacity(uncompressed.len()));
60	u32::write_slice(&mut result, &[`0`; `5`])?; // we come back to these later after we know more about the compressed data
61
62	let table_start = result.position();
63	pack_encoding_table(
64	&frequencies,
65	min_code_index,
66	max_code_index,
67	&mut result,
68	)?;
69
70	let data_start = result.position();
71	let bit_count = encode_with_frequencies(
72	&frequencies,
73	uncompressed,
74	max_code_index,
75	&mut result
76	)?;
77
78	// write meta data after this
79	result.set_position(`0`);
80	let table_length = data_start - table_start;
81
82	u32::try_from(min_code_index)?.write(&mut result)?;
83	u32::try_from(max_code_index)?.write(&mut result)?;
84	u32::try_from(table_length)?.write(&mut result)?;
85	u32::try_from(bit_count)?.write(&mut result)?;
86	`0_u32`.write(&mut result)?;
87
88	Ok(result.into_inner())
89	}
90
91
92	const ENCODE_BITS: u64 = `16`; // literal (value) bit length
93	const DECODE_BITS: u64 = `14`; // decoding bit size (>= 8)
94
95	const ENCODING_TABLE_SIZE: usize = ((`1` << ENCODE_BITS) + `1`) as usize;
96	const DECODING_TABLE_SIZE: usize = (`1` << DECODE_BITS) as usize;
97	const DECODE_MASK: u64 = DECODING_TABLE_SIZE as u64 - `1`;
98
99	const SHORT_ZEROCODE_RUN: u64 = `59`;
100	const LONG_ZEROCODE_RUN: u64 = `63`;
101	const SHORTEST_LONG_RUN: u64 = `2` + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
102	const LONGEST_LONG_RUN: u64 = `255` + SHORTEST_LONG_RUN;
103
104
105	#[derive(Clone, Debug, Eq, PartialEq)]
106	enum Code {
107	Empty,
108	Short(ShortCode),
109	Long(SmallVec<[u32; `2`]>), // often 2, sometimes 4, rarely 8
110	}
111
112	#[derive(Clone, Debug, Eq, PartialEq)]
113	struct ShortCode {
114	value: u32,
115	len: u8,
116	}
117
118	impl ShortCode {
119	#[inline] fn len(&self) -> u64 { u64::from(self.len) }
120	}
121
122	/// Decode (uncompress) n bits based on encoding & decoding tables:
123	fn decode_with_tables(
124	encoding_table: &[u64],
125	decoding_table: &[Code],
126	mut input: &[u8],
127	input_bit_count: i32,
128	run_length_code: u32,
129	expected_output_size: usize,
130	) -> Result<Vec<u16>>
131	{
132	let mut output = Vec::with_capacity(expected_output_size);
133	let mut code_bits = `0_u64`;
134	let mut code_bit_count = `0_u64`;
135
136	while input.len() > `0` {
137	read_byte(&mut code_bits, &mut code_bit_count, &mut input)?;
138
139	// Access decoding table
140	while code_bit_count >= DECODE_BITS {
141	let code_index = (code_bits >> (code_bit_count - DECODE_BITS)) & DECODE_MASK;
142	let code = &decoding_table[u64_to_usize(code_index)];
143
144	// Get short code
145	if let Code::Short(code) = code {
146	code_bit_count -= code.len();
147
148	read_code_into_vec(
149	code.value,
150	run_length_code,
151	&mut code_bits,
152	&mut code_bit_count,
153	&mut input,
154	&mut output,
155	expected_output_size,
156	)?;
157	}
158	else if let Code::Long(ref long_codes) = code {
159	debug_assert_ne!(long_codes.len(), `0`);
160
161	let long_code = long_codes.iter()
162	.filter_map(\|&long_code\|{
163	let encoded_long_code = encoding_table[u32_to_usize(long_code)];
164	let length = length(encoded_long_code);
165
166	while code_bit_count < length && input.len() > `0` {
167	let err = read_byte(&mut code_bits, &mut code_bit_count, &mut input);
168	if let Err(err) = err { return Some(Err(err)); }
169	}
170
171	if code_bit_count >= length {
172	let required_code = (code_bits >> (code_bit_count - length)) & ((`1` << length) - `1`);
173
174	if self::code(encoded_long_code) == required_code {
175	code_bit_count -= length;
176	return Some(Ok(long_code));
177	}
178	}
179
180	None
181
182	})
183	.next()
184	.ok_or(Error::invalid(INVALID_CODE))?;
185
186	read_code_into_vec(
187	long_code?,
188	run_length_code,
189	&mut code_bits,
190	&mut code_bit_count,
191	&mut input,
192	&mut output,
193	expected_output_size,
194	)?;
195	}
196	else {
197	return Err(Error::invalid(INVALID_CODE));
198	}
199	}
200	}
201
202	let count = u64::try_from((`8` - input_bit_count) & `7`)?;
203	code_bits >>= count;
204
205	code_bit_count = code_bit_count.checked_sub(count)
206	.ok_or_else(\|\| Error::invalid("code"))?;
207
208	while code_bit_count > `0` {
209	let index = (code_bits << (DECODE_BITS - code_bit_count)) & DECODE_MASK;
210	let code = &decoding_table[u64_to_usize(index)];
211
212	if let Code::Short(short_code) = code {
213	if short_code.len() > code_bit_count { return Err(Error::invalid("code")) }; // FIXME why does this happen??
214	code_bit_count -= short_code.len(); // FIXME may throw "attempted to subtract with overflow"
215
216	read_code_into_vec(
217	short_code.value,
218	run_length_code,
219	&mut code_bits,
220	&mut code_bit_count,
221	&mut input,
222	&mut output,
223	expected_output_size,
224	)?;
225	}
226	else {
227	return Err(Error::invalid(INVALID_CODE));
228	}
229	}
230
231	if output.len() != expected_output_size {
232	return Err(Error::invalid(NOT_ENOUGH_DATA));
233	}
234
235	Ok(output)
236	}
237
238	/// Build a decoding hash table based on the encoding table code:
239	/// - short codes (<= HUF_DECBITS) are resolved with a single table access;
240	/// - long code entry allocations are not optimized, because long codes are
241	/// unfrequent;
242	/// - decoding tables are used by hufDecode();
243	fn build_decoding_table(
244	encoding_table: &[u64],
245	min_code_index: usize,
246	max_code_index: usize,
247	) -> Result<Vec<Code>>
248	{
249	let mut decoding_table = vec![Code::Empty; DECODING_TABLE_SIZE]; // not an array because of code not being copy
250
251	for (code_index, &encoded_code) in encoding_table[..= max_code_index].iter().enumerate().skip(min_code_index) {
252	let code_index = u32::try_from(code_index).unwrap();
253
254	let code = code(encoded_code);
255	let length = length(encoded_code);
256
257	if code >> length != `0` {
258	return Err(Error::invalid(INVALID_TABLE_ENTRY));
259	}
260
261	if length > DECODE_BITS {
262	let long_code = &mut decoding_table[u64_to_usize(code >> (length - DECODE_BITS))];
263
264	match long_code {
265	Code::Empty => *long_code = Code::Long(smallvec![code_index]),
266	Code::Long(lits) => lits.push(code_index),
267	_ => { return Err(Error::invalid(INVALID_TABLE_ENTRY)); }
268	}
269	}
270	else if length != `0` {
271	let default_value = Code::Short(ShortCode {
272	value: code_index,
273	len: length as u8,
274	});
275
276	let start_index = u64_to_usize(code << (DECODE_BITS - length));
277	let count = u64_to_usize(`1` << (DECODE_BITS - length));
278
279	for value in &mut decoding_table[start_index .. start_index + count] {
280	*value = default_value.clone();
281	}
282	}
283	}
284
285	Ok(decoding_table)
286	}
287
288	/// Run-length-decompresses all zero runs from the packed table to the encoding table
289	fn read_encoding_table(
290	packed: &mut impl Read,
291	min_code_index: usize,
292	max_code_index: usize,
293	) -> Result<Vec<u64>>
294	{
295	let mut code_bits = `0_u64`;
296	let mut code_bit_count = `0_u64`;
297
298	// TODO push() into encoding table instead of index stuff?
299	let mut encoding_table = vec![`0_u64`; ENCODING_TABLE_SIZE];
300	let mut code_index = min_code_index;
301	while code_index <= max_code_index {
302	let code_len = read_bits(`6`, &mut code_bits, &mut code_bit_count, packed)?;
303	encoding_table[code_index] = code_len;
304
305	if code_len == LONG_ZEROCODE_RUN {
306	let zerun_bits = read_bits(`8`, &mut code_bits, &mut code_bit_count, packed)?;
307	let zerun = usize::try_from(zerun_bits + SHORTEST_LONG_RUN).unwrap();
308
309	if code_index + zerun > max_code_index + `1` {
310	return Err(Error::invalid(TABLE_TOO_LONG));
311	}
312
313	for value in &mut encoding_table[code_index..code_index + zerun] {
314	*value = `0`;
315	}
316
317	code_index += zerun;
318	}
319	else if code_len >= SHORT_ZEROCODE_RUN {
320	let duplication_count = usize::try_from(code_len - SHORT_ZEROCODE_RUN + `2`).unwrap();
321	if code_index + duplication_count > max_code_index + `1` {
322	return Err(Error::invalid(TABLE_TOO_LONG));
323	}
324
325	for value in &mut encoding_table[code_index .. code_index + duplication_count] {
326	*value = `0`;
327	}
328
329	code_index += duplication_count;
330	}
331	else {
332	code_index += `1`;
333	}
334	}
335
336	build_canonical_table(&mut encoding_table);
337	Ok(encoding_table)
338	}
339
340	// TODO Use BitStreamReader for all the bit reads?!
341	#[inline]
342	fn read_bits(
343	count: u64,
344	code_bits: &mut u64,
345	code_bit_count: &mut u64,
346	input: &mut impl Read,
347	) -> Result<u64>
348	{
349	while *code_bit_count < count {
350	read_byte(code_bits, code_bit_count, input)?;
351	}
352
353	*code_bit_count -= count;
354	Ok((code_bits >> code_bit_count) & ((`1` << count) - `1`))
355	}
356
357	#[inline]
358	fn read_byte(code_bits: &mut u64, bit_count: &mut u64, input: &mut impl Read) -> UnitResult {
359	code_bits = (code_bits << `8`) \| u8::read(input)? as u64;
360	*bit_count += `8`;
361	Ok(())
362	}
363
364	#[inline]
365	fn read_code_into_vec(
366	code: u32,
367	run_length_code: u32,
368	code_bits: &mut u64,
369	code_bit_count: &mut u64,
370	read: &mut impl Read,
371	out: &mut Vec<u16>,
372	max_len: usize,
373	) -> UnitResult
374	{
375	if code == run_length_code { // code may be too large for u16
376	if *code_bit_count < `8` {
377	read_byte(code_bits, code_bit_count, read)?;
378	}
379
380	*code_bit_count -= `8`;
381
382	let code_repetitions = usize::from((code_bits >> code_bit_count) as u8);
383
384	if out.len() + code_repetitions > max_len {
385	return Err(Error::invalid(TOO_MUCH_DATA));
386	}
387	else if out.is_empty() {
388	return Err(Error::invalid(NOT_ENOUGH_DATA));
389	}
390
391	let repeated_code = *out.last().unwrap();
392	out.extend(std::iter::repeat(repeated_code).take(code_repetitions));
393	}
394	else if out.len() < max_len { // implies that code is not larger than u16???
395	out.push(u16::try_from(code)?);
396	}
397	else {
398	return Err(Error::invalid(TOO_MUCH_DATA));
399	}
400
401	Ok(())
402	}
403
404	fn count_frequencies(data: &[u16]) -> Vec<u64> {
405	let mut frequencies: Vec = vec![`0_u64`; ENCODING_TABLE_SIZE];
406
407	for value: &u16 in data {
408	frequencies[value as usize*] += `1`;
409	}
410
411	frequencies
412	}
413
414	fn write_bits(
415	count: u64,
416	bits: u64,
417	code_bits: &mut u64,
418	code_bit_count: &mut u64,
419	mut out: impl Write,
420	) -> UnitResult
421	{
422	code_bits = (code_bits << count) \| bits;
423	*code_bit_count += count;
424
425	while *code_bit_count >= `8` {
426	*code_bit_count -= `8`;
427	out.write(&[
428	(code_bits >> code_bit_count) as u8 // TODO make sure never or always wraps?
429	])?;
430	}
431
432	Ok(())
433	}
434
435	fn write_code(scode: u64, code_bits: &mut u64, code_bit_count: &mut u64, mut out: impl Write) -> UnitResult {
436	write_bits(count:length(scode), bits:code(scode), code_bits, code_bit_count, &mut out)
437	}
438
439	#[inline(always)]
440	fn send_code(
441	scode: u64,
442	run_count: u64,
443	run_code: u64,
444	code_bits: &mut u64,
445	code_bit_count: &mut u64,
446	mut out: impl Write,
447	) -> UnitResult
448	{
449	// Output a run of runCount instances of the symbol sCount.
450	// Output the symbols explicitly, or if that is shorter, output
451	// the sCode symbol once followed by a runCode symbol and runCount
452	// expressed as an 8-bit number.
453	if length(code:scode) + length(run_code) + `8` < length(code:scode) * run_count {
454	write_code(scode, code_bits, code_bit_count, &mut out)?;
455	write_code(scode:run_code, code_bits, code_bit_count, &mut out)?;
456	write_bits(count:`8`, bits:run_count, code_bits, code_bit_count, &mut out)?;
457	}
458	else {
459	for _ in `0` ..= run_count {
460	write_code(scode, code_bits, code_bit_count, &mut out)?;
461	}
462	}
463
464	Ok(())
465	}
466
467	fn encode_with_frequencies(
468	frequencies: &[u64],
469	uncompressed: &[u16],
470	run_length_code: usize,
471	mut out: &mut Cursor<Vec<u8>>,
472	) -> Result<u64>
473	{
474	let mut code_bits = `0`;
475	let mut code_bit_count = `0`;
476
477	let mut run_start_value = uncompressed[`0`];
478	let mut run_length = `0`;
479
480	let start_position = out.position();
481
482	// Loop on input values
483	for &current_value in &uncompressed[`1`..] {
484	// Count same values or send code
485	if run_start_value == current_value && run_length < `255` {
486	run_length += `1`;
487	}
488	else {
489	send_code(
490	frequencies[run_start_value as usize],
491	run_length,
492	frequencies[run_length_code],
493	&mut code_bits,
494	&mut code_bit_count,
495	&mut out,
496	)?;
497
498	run_length = `0`;
499	}
500
501	run_start_value = current_value;
502	}
503
504	// Send remaining code
505	send_code(
506	frequencies[run_start_value as usize],
507	run_length,
508	frequencies[run_length_code],
509	&mut code_bits,
510	&mut code_bit_count,
511	&mut out,
512	)?;
513
514	let data_length = out.position() - start_position; // we shouldn't count the last byte write
515
516	if code_bit_count != `0` {
517	out.write(&[
518	(code_bits << (`8` - code_bit_count) & `0xff`) as u8
519	])?;
520	}
521
522	Ok(data_length * `8` + code_bit_count)
523	}
524
525	///
526	/// Pack an encoding table:
527	/// - only code lengths, not actual codes, are stored
528	/// - runs of zeroes are compressed as follows:
529	///
530	/// unpacked packed
531	/// --------------------------------
532	/// 1 zero 0 (6 bits)
533	/// 2 zeroes 59
534	/// 3 zeroes 60
535	/// 4 zeroes 61
536	/// 5 zeroes 62
537	/// n zeroes (6 or more) 63 n-6 (6 + 8 bits)
538	///
539	fn pack_encoding_table(
540	frequencies: &[u64],
541	min_index: usize,
542	max_index: usize,
543	mut out: &mut Cursor<Vec<u8>>,
544	) -> UnitResult
545	{
546	let mut code_bits = `0_u64`;
547	let mut code_bit_count = `0_u64`;
548
549	let mut frequency_index = min_index;
550	while frequency_index <= max_index { // TODO slice iteration?
551	let code_length = length(frequencies[frequency_index]);
552
553	if code_length == `0` {
554	let mut zero_run = `1`;
555
556	while frequency_index < max_index && zero_run < LONGEST_LONG_RUN {
557	if length(frequencies[frequency_index + `1`]) > `0` {
558	break;
559	}
560
561	frequency_index += `1`;
562	zero_run += `1`;
563	}
564
565	if zero_run >= `2` {
566	if zero_run >= SHORTEST_LONG_RUN {
567	write_bits(`6`, LONG_ZEROCODE_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
568	write_bits(`8`, zero_run - SHORTEST_LONG_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
569	}
570	else {
571	write_bits(`6`, SHORT_ZEROCODE_RUN + zero_run - `2`, &mut code_bits, &mut code_bit_count, &mut out)?;
572	}
573
574	frequency_index += `1`; // we must increment or else this may go very wrong
575	continue;
576	}
577	}
578
579	write_bits(`6`, code_length, &mut code_bits, &mut code_bit_count, &mut out)?;
580	frequency_index += `1`;
581	}
582
583	if code_bit_count > `0` {
584	out.write(&[
585	(code_bits << (`8` - code_bit_count)) as u8
586	])?;
587	}
588
589	Ok(())
590	}
591
592	/// Build a "canonical" Huffman code table:
593	/// - for each (uncompressed) symbol, code contains the length
594	/// of the corresponding code (in the compressed data)
595	/// - canonical codes are computed and stored in code
596	/// - the rules for constructing canonical codes are as follows:
597	/// shorter codes (if filled with zeroes to the right)*
598	/// have a numerically higher value than longer codes
599	/// for codes with the same length, numerical values*
600	/// increase with numerical symbol values
601	/// - because the canonical code table can be constructed from
602	/// symbol lengths alone, the code table can be transmitted
603	/// without sending the actual code values
604	/// - see http://www.compressconsult.com/huffman/
605	fn build_canonical_table(code_table: &mut [u64]) {
606	debug_assert_eq!(code_table.len(), ENCODING_TABLE_SIZE);
607
608	let mut count_per_code = [`0_u64`; `59`];
609
610	for &code in code_table.iter() {
611	count_per_code[u64_to_usize(code)] += `1`;
612	}
613
614	// For each i from 58 through 1, compute the
615	// numerically lowest code with length i, and
616	// store that code in n[i].
617	{
618	let mut code = `0_u64`; // TODO use foldr?
619	for count in &mut count_per_code.iter_mut().rev() {
620	let next_code = (code + *count) >> `1`;
621	*count = code;
622	code = next_code;
623	}
624	}
625
626	// code[i] contains the length, l, of the
627	// code for symbol i. Assign the next available
628	// code of length l to the symbol and store both
629	// l and the code in code[i]. // TODO iter + filter ?
630	for symbol_length in code_table.iter_mut() {
631	let current_length = *symbol_length;
632	let code_index = u64_to_usize(current_length);
633	if current_length > `0` {
634	*symbol_length = current_length \| (count_per_code[code_index] << `6`);
635	count_per_code[code_index] += `1`;
636	}
637	}
638	}
639
640
641	/// Compute Huffman codes (based on frq input) and store them in frq:
642	/// - code structure is : [63:lsb - 6:msb] \| [5-0: bit length];
643	/// - max code length is 58 bits;
644	/// - codes outside the range [im-iM] have a null length (unused values);
645	/// - original frequencies are destroyed;
646	/// - encoding tables are used by hufEncode() and hufBuildDecTable();
647	///
648	/// NB: The following code "(a == b) && (a > b))" was added to ensure
649	/// elements in the heap with the same value are sorted by index.
650	/// This is to ensure, the STL make_heap()/pop_heap()/push_heap() methods
651	/// produced a resultant sorted heap that is identical across OSes.
652	fn build_encoding_table(
653	frequencies: &mut [u64], // input frequencies, output encoding table
654	) -> (usize, usize) // return frequency max min range
655	{
656	debug_assert_eq!(frequencies.len(), ENCODING_TABLE_SIZE);
657
658	/// Frequency with position, used for MinHeap.
659	#[derive(Eq, PartialEq, Copy, Clone)]
660	struct HeapFrequency {
661	position: usize,
662	frequency: u64,
663	}
664
665	impl Ord for HeapFrequency {
666	fn cmp(&self, other: &Self) -> Ordering {
667	other.frequency.cmp(&self.frequency)
668	.then_with(\|\| other.position.cmp(&self.position))
669	}
670	}
671
672	impl PartialOrd for HeapFrequency {
673	fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
674	}
675
676	// This function assumes that when it is called, array frq
677	// indicates the frequency of all possible symbols in the data
678	// that are to be Huffman-encoded. (frq[i] contains the number
679	// of occurrences of symbol i in the data.)
680	//
681	// The loop below does three things:
682	//
683	// 1) Finds the minimum and maximum indices that point
684	// to non-zero entries in frq:
685	//
686	// frq[im] != 0, and frq[i] == 0 for all i < im
687	// frq[iM] != 0, and frq[i] == 0 for all i > iM
688	//
689	// 2) Fills array fHeap with pointers to all non-zero
690	// entries in frq.
691	//
692	// 3) Initializes array hlink such that hlink[i] == i
693	// for all array entries.
694
695	// We need to use vec here or we overflow the stack.
696	let mut links = vec![`0_usize`; ENCODING_TABLE_SIZE];
697	let mut frequency_heap = vec![`0_usize`; ENCODING_TABLE_SIZE];
698
699	// This is a good solution since we don't have usize::MAX items (no panics or UB),
700	// and since this is short-circuit, it stops at the first in order non zero element.
701	let min_frequency_index = frequencies.iter().position(\|f\| *f != `0`).unwrap_or(`0`);
702
703	let mut max_frequency_index = `0`;
704	let mut frequency_count = `0`;
705
706	// assert bounds check to optimize away bounds check in loops
707	assert!(links.len() >= ENCODING_TABLE_SIZE);
708	assert!(frequencies.len() >= ENCODING_TABLE_SIZE);
709
710	for index in min_frequency_index..ENCODING_TABLE_SIZE {
711	links[index] = index; // TODO for x in links.iter().enumerate()
712
713	if frequencies[index] != `0` {
714	frequency_heap[frequency_count] = index;
715	max_frequency_index = index;
716	frequency_count += `1`;
717	}
718	}
719
720
721	// Add a pseudo-symbol, with a frequency count of 1, to frq;
722	// adjust the fHeap and hlink array accordingly. Function
723	// hufEncode() uses the pseudo-symbol for run-length encoding.
724
725	max_frequency_index += `1`;
726	frequencies[max_frequency_index] = `1`;
727	frequency_heap[frequency_count] = max_frequency_index;
728	frequency_count += `1`;
729
730	// Build an array, scode, such that scode[i] contains the number
731	// of bits assigned to symbol i. Conceptually this is done by
732	// constructing a tree whose leaves are the symbols with non-zero
733	// frequency:
734	//
735	// Make a heap that contains all symbols with a non-zero frequency,
736	// with the least frequent symbol on top.
737	//
738	// Repeat until only one symbol is left on the heap:
739	//
740	// Take the two least frequent symbols off the top of the heap.
741	// Create a new node that has first two nodes as children, and
742	// whose frequency is the sum of the frequencies of the first
743	// two nodes. Put the new node back into the heap.
744	//
745	// The last node left on the heap is the root of the tree. For each
746	// leaf node, the distance between the root and the leaf is the length
747	// of the code for the corresponding symbol.
748	//
749	// The loop below doesn't actually build the tree; instead we compute
750	// the distances of the leaves from the root on the fly. When a new
751	// node is added to the heap, then that node's descendants are linked
752	// into a single linear list that starts at the new node, and the code
753	// lengths of the descendants (that is, their distance from the root
754	// of the tree) are incremented by one.
755	let mut heap = BinaryHeap::with_capacity(frequency_count);
756	for index in frequency_heap.drain(..frequency_count) {
757	heap.push(HeapFrequency { position: index, frequency: frequencies[index] });
758	}
759
760	let mut s_code = vec![`0_u64`; ENCODING_TABLE_SIZE];
761
762	while frequency_count > `1` {
763	// Find the indices, mm and m, of the two smallest non-zero frq
764	// values in fHeap, add the smallest frq to the second-smallest
765	// frq, and remove the smallest frq value from fHeap.
766	let (high_position, low_position) = {
767	let smallest_frequency = heap.pop().expect("heap empty bug");
768	frequency_count -= `1`;
769
770	let mut second_smallest_frequency = heap.peek_mut().expect("heap empty bug");
771	second_smallest_frequency.frequency += smallest_frequency.frequency;
772
773	(second_smallest_frequency.position, smallest_frequency.position)
774	};
775
776	// The entries in scode are linked into lists with the
777	// entries in hlink serving as "next" pointers and with
778	// the end of a list marked by hlink[j] == j.
779	//
780	// Traverse the lists that start at scode[m] and scode[mm].
781	// For each element visited, increment the length of the
782	// corresponding code by one bit. (If we visit scode[j]
783	// during the traversal, then the code for symbol j becomes
784	// one bit longer.)
785	//
786	// Merge the lists that start at scode[m] and scode[mm]
787	// into a single list that starts at scode[m].
788
789	// Add a bit to all codes in the first list.
790	let mut index = high_position; // TODO fold()
791	loop {
792	s_code[index] += `1`;
793	debug_assert!(s_code[index] <= `58`);
794
795	// merge the two lists
796	if links[index] == index {
797	links[index] = low_position;
798	break;
799	}
800
801	index = links[index];
802	}
803
804	// Add a bit to all codes in the second list
805	let mut index = low_position; // TODO fold()
806	loop {
807	s_code[index] += `1`;
808	debug_assert!(s_code[index] <= `58`);
809
810	if links[index] == index {
811	break;
812	}
813
814	index = links[index];
815	}
816	}
817
818	// Build a canonical Huffman code table, replacing the code
819	// lengths in scode with (code, code length) pairs. Copy the
820	// code table from scode into frq.
821	build_canonical_table(&mut s_code);
822	frequencies.copy_from_slice(&s_code);
823
824	(min_frequency_index, max_frequency_index)
825	}
826
827
828	#[inline] fn length(code: u64) -> u64 { code & `63` }
829	#[inline] fn code(code: u64) -> u64 { code >> `6` }
830
831	const INVALID_BIT_COUNT: &'static str = "invalid number of bits";
832	const INVALID_TABLE_ENTRY: &'static str = "invalid code table entry";
833	const NOT_ENOUGH_DATA: &'static str = "decoded data are shorter than expected";
834	const INVALID_TABLE_SIZE: &'static str = "unexpected end of code table data";
835	const TABLE_TOO_LONG: &'static str = "code table is longer than expected";
836	const INVALID_CODE: &'static str = "invalid code";
837	const TOO_MUCH_DATA: &'static str = "decoded data are longer than expected";
838
839
840	#[cfg(test)]
841	mod test {
842	use super::*;
843	use rand::{Rng, SeedableRng};
844
845	const UNCOMPRESSED_ARRAY: [u16; `100`] = [
846	`3852`, `2432`, `33635`, `49381`, `10100`, `15095`, `62693`, `63738`, `62359`, `5013`, `7715`, `59875`, `28182`,
847	`34449`, `19983`, `20399`, `63407`, `29486`, `4877`, `26738`, `44815`, `14042`, `46091`, `48228`, `25682`, `35412`,
848	`7582`, `65069`, `6632`, `54124`, `13798`, `27503`, `52154`, `61961`, `30474`, `46880`, `39097`, `15754`, `52897`,
849	`42371`, `54053`, `14178`, `48276`, `34591`, `42602`, `32126`, `42062`, `31474`, `16274`, `55991`, `2882`, `17039`,
850	`56389`, `20835`, `57057`, `54081`, `3414`, `33957`, `52584`, `10222`, `25139`, `40002`, `44980`, `1602`, `48021`,
851	`19703`, `6562`, `61777`, `41582`, `201`, `31253`, `51790`, `15888`, `40921`, `3627`, `12184`, `16036`, `26349`,
852	`3159`, `29002`, `14535`, `50632`, `18118`, `33583`, `18878`, `59470`, `32835`, `9347`, `16991`, `21303`, `26263`,
853	`8312`, `14017`, `41777`, `43240`, `3500`, `60250`, `52437`, `45715`, `61520`,
854	];
855
856	const UNCOMPRESSED_ARRAY_SPECIAL: [u16; `100`] = [
857	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `28182`,
858	`0`, `65534`, `0`, `65534`, `0`, `65534`, `0`, `65534`, `0`, `0`, `0`, `0`, `0`,
859	`0`, `0`, `0`, `54124`, `13798`, `27503`, `52154`, `61961`, `30474`, `46880`, `39097`, `15754`, `52897`,
860	`42371`, `54053`, `14178`, `48276`, `34591`, `42602`, `32126`, `42062`, `31474`, `16274`, `55991`, `2882`, `17039`,
861	`56389`, `20835`, `57057`, `54081`, `3414`, `33957`, `52584`, `10222`, `25139`, `40002`, `44980`, `1602`, `48021`,
862	`19703`, `6562`, `61777`, `41582`, `201`, `31253`, `51790`, `15888`, `40921`, `3627`, `12184`, `16036`, `26349`,
863	`3159`, `29002`, `14535`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
864	`65534`, `65534`, `65534`, `65534`, `65534`, `65534`, `65534`, `65534`, `65534`,
865	];
866
867	const COMPRESSED_ARRAY: [u8; `703`] = [
868	`0xc9`, `0x0`, `0x0`, `0x0`, `0x2e`, `0xfe`, `0x0`, `0x0`, `0x56`, `0x2`, `0x0`, `0x0`, `0xa2`, `0x2`, `0x0`, `0x0`, `0x0`,
869	`0x0`, `0x0`, `0x0`, `0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xd6`, `0x47`,
870	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x28`, `0x1f`, `0xff`, `0xff`, `0xed`, `0x87`, `0xff`, `0xff`, `0xf0`,
871	`0x91`, `0xff`, `0xf8`, `0x1f`, `0xf4`, `0xf1`, `0xff`, `0x78`, `0x1f`, `0xfd`, `0xa1`, `0xff`, `0xff`, `0xff`, `0xff`,
872	`0xff`, `0xff`, `0xfa`, `0xc7`, `0xfe`, `0x4`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
873	`0xff`, `0xed`, `0x1f`, `0xf3`, `0xf1`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xe8`, `0x7`, `0xfd`, `0xf8`,
874	`0x7f`, `0xff`, `0xff`, `0xff`, `0xfd`, `0x10`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x51`, `0xff`,
875	`0xff`, `0xff`, `0xff`, `0xfe`, `0x1`, `0xff`, `0x73`, `0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
876	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xfe`, `0x0`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
877	`0xff`, `0xff`, `0xff`, `0xfc`, `0xa4`, `0x7f`, `0xf5`, `0x7`, `0xfc`, `0x48`, `0x7f`, `0xe0`, `0x47`, `0xff`, `0xff`,
878	`0xf5`, `0x91`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf1`, `0xf1`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf8`, `0x21`, `0xff`,
879	`0x7f`, `0x1f`, `0xf8`, `0xd1`, `0xff`, `0xe7`, `0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xbc`, `0x1f`, `0xf2`, `0x91`,
880	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x1c`, `0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xe7`,
881	`0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xfc`, `0x8c`, `0x7f`, `0xff`, `0xff`, `0xc`, `0x1f`, `0xff`, `0xff`,
882	`0xe5`, `0x7`, `0xff`, `0xff`, `0xfa`, `0x81`, `0xff`, `0xff`, `0xff`, `0x20`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`,
883	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
884	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xfe`, `0xbc`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xfc`, `0x38`, `0x7f`, `0xff`,
885	`0xff`, `0xff`, `0xfc`, `0xd0`, `0x7f`, `0xd3`, `0xc7`, `0xff`, `0xff`, `0xf7`, `0x91`, `0xff`, `0xff`, `0xff`, `0xff`,
886	`0xfe`, `0xc1`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf9`, `0x61`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xc7`,
887	`0x87`, `0xff`, `0xff`, `0xfd`, `0x81`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf1`, `0x87`, `0xff`, `0xff`,
888	`0xff`, `0xff`, `0xfe`, `0x87`, `0xff`, `0x58`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xfd`, `0xec`, `0x7f`, `0xff`, `0xff`,
889	`0xff`, `0xfe`, `0xd0`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`, `0x6c`, `0x7f`, `0xcb`, `0x47`, `0xff`, `0xff`, `0xf3`,
890	`0x61`, `0xff`, `0xff`, `0xff`, `0x80`, `0x7f`, `0xe1`, `0xc7`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x1f`,
891	`0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
892	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x18`, `0x1f`, `0xff`, `0xff`,
893	`0xff`, `0xff`, `0xff`, `0xfd`, `0xcc`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf8`, `0x11`, `0xff`, `0xff`,
894	`0xff`, `0xff`, `0xf8`, `0x41`, `0xff`, `0xbc`, `0x1f`, `0xff`, `0xff`, `0xc4`, `0x47`, `0xff`, `0xff`, `0xf2`, `0x91`,
895	`0xff`, `0xe0`, `0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0x6d`, `0x1f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
896	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x2`, `0x1f`, `0xf9`, `0xe1`, `0xff`, `0xff`, `0xff`, `0xff`, `0xfc`, `0xe1`,
897	`0xff`, `0xff`, `0xfd`, `0xb0`, `0x7f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xe1`, `0xff`, `0xff`, `0xff`, `0xff`,
898	`0xff`, `0xff`, `0xff`, `0xff`, `0x5a`, `0x1f`, `0xfc`, `0x81`, `0xbf`, `0x29`, `0x1b`, `0xff`, `0xff`, `0xff`, `0xff`,
899	`0xff`, `0xff`, `0xff`, `0xf3`, `0x61`, `0xbf`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xc8`, `0x1b`,
900	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf6`, `0xb1`, `0xbf`, `0xff`, `0xfd`, `0x80`, `0x6f`, `0xff`,
901	`0xff`, `0xf`, `0x1b`, `0xf8`, `0xc1`, `0xbf`, `0xff`, `0xfc`, `0xb4`, `0x6f`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
902	`0xff`, `0xff`, `0xda`, `0x46`, `0xfc`, `0x54`, `0x6f`, `0xc9`, `0x6`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
903	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0x21`, `0x1b`, `0xff`, `0xff`, `0xe0`, `0x86`, `0xff`, `0xff`,
904	`0xff`, `0xff`, `0xe2`, `0xc6`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`,
905	`0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf3`, `0x91`, `0xbf`, `0xff`, `0xfe`, `0x24`, `0x6f`, `0xff`, `0xff`, `0x6b`,
906	`0x1b`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xfd`, `0xb1`, `0xbf`, `0xfa`, `0x1b`, `0xfb`, `0x11`,
907	`0xbf`, `0xff`, `0xfe`, `0x8`, `0x6f`, `0xff`, `0xff`, `0x42`, `0x1b`, `0xff`, `0xff`, `0xff`, `0xff`, `0xb9`, `0x1b`,
908	`0xff`, `0xff`, `0xcf`, `0xc6`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xff`, `0xf1`, `0x31`,
909	`0x86`, `0x10`, `0x9`, `0xb4`, `0xe4`, `0x4c`, `0xf7`, `0xef`, `0x42`, `0x87`, `0x6a`, `0xb5`, `0xc2`, `0x34`, `0x9e`,
910	`0x2f`, `0x12`, `0xae`, `0x21`, `0x68`, `0xf2`, `0xa8`, `0x74`, `0x37`, `0xe1`, `0x98`, `0x14`, `0x59`, `0x57`, `0x2c`,
911	`0x24`, `0x3b`, `0x35`, `0x6c`, `0x1b`, `0x8b`, `0xcc`, `0xe6`, `0x13`, `0x38`, `0xc`, `0x8e`, `0xe2`, `0xc`, `0xfe`,
912	`0x49`, `0x73`, `0xbc`, `0x2b`, `0x7b`, `0x9`, `0x27`, `0x79`, `0x14`, `0xc`, `0x94`, `0x42`, `0xf8`, `0x7c`, `0x1`,
913	`0x8d`, `0x26`, `0xde`, `0x87`, `0x26`, `0x71`, `0x50`, `0x45`, `0xc6`, `0x28`, `0x40`, `0xd5`, `0xe`, `0x8d`, `0x8`,
914	`0x1e`, `0x4c`, `0xa4`, `0x79`, `0x57`, `0xf0`, `0xc3`, `0x6d`, `0x5c`, `0x6d`, `0xc0`,
915	];
916
917	fn fill(rng: &mut impl Rng, size: usize) -> Vec<u16> {
918	if rng.gen_bool(`0.2`) {
919	let value = if rng.gen_bool(`0.5`) { `0` } else { u16::MAX };
920	return vec![ value; size ];
921	}
922
923	let mut data = vec![`0_u16`; size];
924
925	data.iter_mut().for_each(\|v\| {
926	v = rng.gen_range(`0_u16` .. u16*::MAX);
927	});
928
929	data
930	}
931
932	/// Test using both input and output from a custom ILM OpenEXR test.
933	#[test]
934	fn compression_comparation() {
935	let raw = compress(&UNCOMPRESSED_ARRAY).unwrap();
936	assert_eq!(raw, COMPRESSED_ARRAY.to_vec());
937	}
938
939	#[test]
940	fn round_trip() {
941	let mut random = rand::rngs::StdRng::from_seed(SEED);
942	let raw = fill(&mut random, u16::MAX as usize);
943
944	let compressed = compress(&raw).unwrap();
945	let uncompressed = decompress(&compressed, raw.len()).unwrap();
946
947	assert_eq!(uncompressed, raw);
948	}
949
950	#[test]
951	fn repetitions_special() {
952	let raw = UNCOMPRESSED_ARRAY_SPECIAL;
953
954	let compressed = compress(&raw).unwrap();
955	let uncompressed = decompress(&compressed, raw.len()).unwrap();
956
957	assert_eq!(uncompressed, raw.to_vec());
958	}
959
960	#[test]
961	fn round_trip100() {
962	let mut random = rand::rngs::StdRng::from_seed(SEED);
963
964	for size_multiplier in `1`..`10` {
965	let raw = fill(&mut random, size_multiplier * `50_000`);
966
967	let compressed = compress(&raw).unwrap();
968	let uncompressed = decompress(&compressed, raw.len()).unwrap();
969
970	assert_eq!(uncompressed, raw);
971	}
972	}
973
974	#[test]
975	fn test_zeroes(){
976	let uncompressed: &[u16] = &[ `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0` ];
977
978	let compressed = compress(uncompressed).unwrap();
979	let decompressed = decompress(&compressed, uncompressed.len()).unwrap();
980
981	assert_eq!(uncompressed, decompressed.as_slice());
982	}
983
984	const SEED: [u8; `32`] = [
985	`12`,`155`,`32`,`34`,`112`,`109`,`98`,`54`,
986	`12`,`255`,`32`,`34`,`112`,`109`,`98`,`55`,
987	`12`,`155`,`32`,`34`,`12`,`109`,`98`,`54`,
988	`12`,`35`,`32`,`34`,`112`,`109`,`48`,`54`,
989	];
990	}
991