| 1 | //! 16-bit Huffman compression and decompression.
|
| 2 | //! Huffman compression and decompression routines written
|
| 3 | //! by Christian Rouet for his PIZ image file format.
|
| 4 | // see https://github.com/AcademySoftwareFoundation/openexr/blob/88246d991e0318c043e6f584f7493da08a31f9f8/OpenEXR/IlmImf/ImfHuf.cpp
|
| 5 |
|
| 6 | use crate::math::RoundingMode;
|
| 7 | use crate::error::{Error, Result, UnitResult, u64_to_usize, u32_to_usize};
|
| 8 | use crate::io::Data;
|
| 9 | use std::{
|
| 10 | cmp::Ordering,
|
| 11 | collections::BinaryHeap,
|
| 12 | io::{Cursor, Read, Write},
|
| 13 | };
|
| 14 | use std::convert::TryFrom;
|
| 15 | use smallvec::SmallVec;
|
| 16 |
|
| 17 |
|
| 18 | pub fn decompress(compressed: &[u8], expected_size: usize) -> Result<Vec<u16>> {
|
| 19 | let mut remaining_compressed = compressed;
|
| 20 |
|
| 21 | let min_code_index = usize::try_from(u32::read(&mut remaining_compressed)?)?;
|
| 22 | let max_code_index_32 = u32::read(&mut remaining_compressed)?;
|
| 23 | let _table_size = usize::try_from(u32::read(&mut remaining_compressed)?)?; // TODO check this and return Err?
|
| 24 | let bit_count = usize::try_from(u32::read(&mut remaining_compressed)?)?;
|
| 25 | let _skipped = u32::read(&mut remaining_compressed)?; // what is this
|
| 26 |
|
| 27 | let max_code_index = usize::try_from(max_code_index_32).unwrap();
|
| 28 | if min_code_index >= ENCODING_TABLE_SIZE || max_code_index >= ENCODING_TABLE_SIZE {
|
| 29 | return Err(Error::invalid(INVALID_TABLE_SIZE));
|
| 30 | }
|
| 31 |
|
| 32 | if RoundingMode::Up.divide(bit_count, 8) > remaining_compressed.len() {
|
| 33 | return Err(Error::invalid(NOT_ENOUGH_DATA));
|
| 34 | }
|
| 35 |
|
| 36 | let encoding_table = read_encoding_table(&mut remaining_compressed, min_code_index, max_code_index)?;
|
| 37 | if bit_count > 8 * remaining_compressed.len() { return Err(Error::invalid(INVALID_BIT_COUNT)); }
|
| 38 |
|
| 39 | let decoding_table = build_decoding_table(&encoding_table, min_code_index, max_code_index)?;
|
| 40 |
|
| 41 | let result = decode_with_tables(
|
| 42 | &encoding_table,
|
| 43 | &decoding_table,
|
| 44 | &remaining_compressed,
|
| 45 | i32::try_from(bit_count)?,
|
| 46 | max_code_index_32,
|
| 47 | expected_size,
|
| 48 | )?;
|
| 49 |
|
| 50 | Ok(result)
|
| 51 | }
|
| 52 |
|
| 53 | pub fn compress(uncompressed: &[u16]) -> Result<Vec<u8>> {
|
| 54 | if uncompressed.is_empty() { return Ok(vec![]); }
|
| 55 |
|
| 56 | let mut frequencies = count_frequencies(uncompressed);
|
| 57 | let (min_code_index, max_code_index) = build_encoding_table(&mut frequencies);
|
| 58 |
|
| 59 | let mut result = Cursor::new(Vec::with_capacity(uncompressed.len()));
|
| 60 | u32::write_slice(&mut result, &[0; 5])?; // we come back to these later after we know more about the compressed data
|
| 61 |
|
| 62 | let table_start = result.position();
|
| 63 | pack_encoding_table(
|
| 64 | &frequencies,
|
| 65 | min_code_index,
|
| 66 | max_code_index,
|
| 67 | &mut result,
|
| 68 | )?;
|
| 69 |
|
| 70 | let data_start = result.position();
|
| 71 | let bit_count = encode_with_frequencies(
|
| 72 | &frequencies,
|
| 73 | uncompressed,
|
| 74 | max_code_index,
|
| 75 | &mut result
|
| 76 | )?;
|
| 77 |
|
| 78 | // write meta data after this
|
| 79 | result.set_position(0);
|
| 80 | let table_length = data_start - table_start;
|
| 81 |
|
| 82 | u32::try_from(min_code_index)?.write(&mut result)?;
|
| 83 | u32::try_from(max_code_index)?.write(&mut result)?;
|
| 84 | u32::try_from(table_length)?.write(&mut result)?;
|
| 85 | u32::try_from(bit_count)?.write(&mut result)?;
|
| 86 | 0_u32.write(&mut result)?;
|
| 87 |
|
| 88 | Ok(result.into_inner())
|
| 89 | }
|
| 90 |
|
| 91 |
|
| 92 | const ENCODE_BITS: u64 = 16; // literal (value) bit length
|
| 93 | const DECODE_BITS: u64 = 14; // decoding bit size (>= 8)
|
| 94 |
|
| 95 | const ENCODING_TABLE_SIZE: usize = ((1 << ENCODE_BITS) + 1) as usize;
|
| 96 | const DECODING_TABLE_SIZE: usize = (1 << DECODE_BITS) as usize;
|
| 97 | const DECODE_MASK: u64 = DECODING_TABLE_SIZE as u64 - 1;
|
| 98 |
|
| 99 | const SHORT_ZEROCODE_RUN: u64 = 59;
|
| 100 | const LONG_ZEROCODE_RUN: u64 = 63;
|
| 101 | const SHORTEST_LONG_RUN: u64 = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN;
|
| 102 | const LONGEST_LONG_RUN: u64 = 255 + SHORTEST_LONG_RUN;
|
| 103 |
|
| 104 |
|
| 105 | #[derive (Clone, Debug, Eq, PartialEq)]
|
| 106 | enum Code {
|
| 107 | Empty,
|
| 108 | Short(ShortCode),
|
| 109 | Long(SmallVec<[u32; 2]>), // often 2, sometimes 4, rarely 8
|
| 110 | }
|
| 111 |
|
| 112 | #[derive (Clone, Debug, Eq, PartialEq)]
|
| 113 | struct ShortCode {
|
| 114 | value: u32,
|
| 115 | len: u8,
|
| 116 | }
|
| 117 |
|
| 118 | impl ShortCode {
|
| 119 | #[inline ] fn len(&self) -> u64 { u64::from(self.len) }
|
| 120 | }
|
| 121 |
|
| 122 | /// Decode (uncompress) n bits based on encoding & decoding tables:
|
| 123 | fn decode_with_tables(
|
| 124 | encoding_table: &[u64],
|
| 125 | decoding_table: &[Code],
|
| 126 | mut input: &[u8],
|
| 127 | input_bit_count: i32,
|
| 128 | run_length_code: u32,
|
| 129 | expected_output_size: usize,
|
| 130 | ) -> Result<Vec<u16>>
|
| 131 | {
|
| 132 | let mut output = Vec::with_capacity(expected_output_size);
|
| 133 | let mut code_bits = 0_u64;
|
| 134 | let mut code_bit_count = 0_u64;
|
| 135 |
|
| 136 | while input.len() > 0 {
|
| 137 | read_byte(&mut code_bits, &mut code_bit_count, &mut input)?;
|
| 138 |
|
| 139 | // Access decoding table
|
| 140 | while code_bit_count >= DECODE_BITS {
|
| 141 | let code_index = (code_bits >> (code_bit_count - DECODE_BITS)) & DECODE_MASK;
|
| 142 | let code = &decoding_table[u64_to_usize(code_index)];
|
| 143 |
|
| 144 | // Get short code
|
| 145 | if let Code::Short(code) = code {
|
| 146 | code_bit_count -= code.len();
|
| 147 |
|
| 148 | read_code_into_vec(
|
| 149 | code.value,
|
| 150 | run_length_code,
|
| 151 | &mut code_bits,
|
| 152 | &mut code_bit_count,
|
| 153 | &mut input,
|
| 154 | &mut output,
|
| 155 | expected_output_size,
|
| 156 | )?;
|
| 157 | }
|
| 158 | else if let Code::Long(ref long_codes) = code {
|
| 159 | debug_assert_ne!(long_codes.len(), 0);
|
| 160 |
|
| 161 | let long_code = long_codes.iter()
|
| 162 | .filter_map(|&long_code|{
|
| 163 | let encoded_long_code = encoding_table[u32_to_usize(long_code)];
|
| 164 | let length = length(encoded_long_code);
|
| 165 |
|
| 166 | while code_bit_count < length && input.len() > 0 {
|
| 167 | let err = read_byte(&mut code_bits, &mut code_bit_count, &mut input);
|
| 168 | if let Err(err) = err { return Some(Err(err)); }
|
| 169 | }
|
| 170 |
|
| 171 | if code_bit_count >= length {
|
| 172 | let required_code = (code_bits >> (code_bit_count - length)) & ((1 << length) - 1);
|
| 173 |
|
| 174 | if self::code(encoded_long_code) == required_code {
|
| 175 | code_bit_count -= length;
|
| 176 | return Some(Ok(long_code));
|
| 177 | }
|
| 178 | }
|
| 179 |
|
| 180 | None
|
| 181 |
|
| 182 | })
|
| 183 | .next()
|
| 184 | .ok_or(Error::invalid(INVALID_CODE))?;
|
| 185 |
|
| 186 | read_code_into_vec(
|
| 187 | long_code?,
|
| 188 | run_length_code,
|
| 189 | &mut code_bits,
|
| 190 | &mut code_bit_count,
|
| 191 | &mut input,
|
| 192 | &mut output,
|
| 193 | expected_output_size,
|
| 194 | )?;
|
| 195 | }
|
| 196 | else {
|
| 197 | return Err(Error::invalid(INVALID_CODE));
|
| 198 | }
|
| 199 | }
|
| 200 | }
|
| 201 |
|
| 202 | let count = u64::try_from((8 - input_bit_count) & 7)?;
|
| 203 | code_bits >>= count;
|
| 204 |
|
| 205 | code_bit_count = code_bit_count.checked_sub(count)
|
| 206 | .ok_or_else(|| Error::invalid("code" ))?;
|
| 207 |
|
| 208 | while code_bit_count > 0 {
|
| 209 | let index = (code_bits << (DECODE_BITS - code_bit_count)) & DECODE_MASK;
|
| 210 | let code = &decoding_table[u64_to_usize(index)];
|
| 211 |
|
| 212 | if let Code::Short(short_code) = code {
|
| 213 | if short_code.len() > code_bit_count { return Err(Error::invalid("code" )) }; // FIXME why does this happen??
|
| 214 | code_bit_count -= short_code.len(); // FIXME may throw "attempted to subtract with overflow"
|
| 215 |
|
| 216 | read_code_into_vec(
|
| 217 | short_code.value,
|
| 218 | run_length_code,
|
| 219 | &mut code_bits,
|
| 220 | &mut code_bit_count,
|
| 221 | &mut input,
|
| 222 | &mut output,
|
| 223 | expected_output_size,
|
| 224 | )?;
|
| 225 | }
|
| 226 | else {
|
| 227 | return Err(Error::invalid(INVALID_CODE));
|
| 228 | }
|
| 229 | }
|
| 230 |
|
| 231 | if output.len() != expected_output_size {
|
| 232 | return Err(Error::invalid(NOT_ENOUGH_DATA));
|
| 233 | }
|
| 234 |
|
| 235 | Ok(output)
|
| 236 | }
|
| 237 |
|
| 238 | /// Build a decoding hash table based on the encoding table code:
|
| 239 | /// - short codes (<= HUF_DECBITS) are resolved with a single table access;
|
| 240 | /// - long code entry allocations are not optimized, because long codes are
|
| 241 | /// unfrequent;
|
| 242 | /// - decoding tables are used by hufDecode();
|
| 243 | fn build_decoding_table(
|
| 244 | encoding_table: &[u64],
|
| 245 | min_code_index: usize,
|
| 246 | max_code_index: usize,
|
| 247 | ) -> Result<Vec<Code>>
|
| 248 | {
|
| 249 | let mut decoding_table = vec![Code::Empty; DECODING_TABLE_SIZE]; // not an array because of code not being copy
|
| 250 |
|
| 251 | for (code_index, &encoded_code) in encoding_table[..= max_code_index].iter().enumerate().skip(min_code_index) {
|
| 252 | let code_index = u32::try_from(code_index).unwrap();
|
| 253 |
|
| 254 | let code = code(encoded_code);
|
| 255 | let length = length(encoded_code);
|
| 256 |
|
| 257 | if code >> length != 0 {
|
| 258 | return Err(Error::invalid(INVALID_TABLE_ENTRY));
|
| 259 | }
|
| 260 |
|
| 261 | if length > DECODE_BITS {
|
| 262 | let long_code = &mut decoding_table[u64_to_usize(code >> (length - DECODE_BITS))];
|
| 263 |
|
| 264 | match long_code {
|
| 265 | Code::Empty => *long_code = Code::Long(smallvec![code_index]),
|
| 266 | Code::Long(lits) => lits.push(code_index),
|
| 267 | _ => { return Err(Error::invalid(INVALID_TABLE_ENTRY)); }
|
| 268 | }
|
| 269 | }
|
| 270 | else if length != 0 {
|
| 271 | let default_value = Code::Short(ShortCode {
|
| 272 | value: code_index,
|
| 273 | len: length as u8,
|
| 274 | });
|
| 275 |
|
| 276 | let start_index = u64_to_usize(code << (DECODE_BITS - length));
|
| 277 | let count = u64_to_usize(1 << (DECODE_BITS - length));
|
| 278 |
|
| 279 | for value in &mut decoding_table[start_index .. start_index + count] {
|
| 280 | *value = default_value.clone();
|
| 281 | }
|
| 282 | }
|
| 283 | }
|
| 284 |
|
| 285 | Ok(decoding_table)
|
| 286 | }
|
| 287 |
|
| 288 | /// Run-length-decompresses all zero runs from the packed table to the encoding table
|
| 289 | fn read_encoding_table(
|
| 290 | packed: &mut impl Read,
|
| 291 | min_code_index: usize,
|
| 292 | max_code_index: usize,
|
| 293 | ) -> Result<Vec<u64>>
|
| 294 | {
|
| 295 | let mut code_bits = 0_u64;
|
| 296 | let mut code_bit_count = 0_u64;
|
| 297 |
|
| 298 | // TODO push() into encoding table instead of index stuff?
|
| 299 | let mut encoding_table = vec![0_u64; ENCODING_TABLE_SIZE];
|
| 300 | let mut code_index = min_code_index;
|
| 301 | while code_index <= max_code_index {
|
| 302 | let code_len = read_bits(6, &mut code_bits, &mut code_bit_count, packed)?;
|
| 303 | encoding_table[code_index] = code_len;
|
| 304 |
|
| 305 | if code_len == LONG_ZEROCODE_RUN {
|
| 306 | let zerun_bits = read_bits(8, &mut code_bits, &mut code_bit_count, packed)?;
|
| 307 | let zerun = usize::try_from(zerun_bits + SHORTEST_LONG_RUN).unwrap();
|
| 308 |
|
| 309 | if code_index + zerun > max_code_index + 1 {
|
| 310 | return Err(Error::invalid(TABLE_TOO_LONG));
|
| 311 | }
|
| 312 |
|
| 313 | for value in &mut encoding_table[code_index..code_index + zerun] {
|
| 314 | *value = 0;
|
| 315 | }
|
| 316 |
|
| 317 | code_index += zerun;
|
| 318 | }
|
| 319 | else if code_len >= SHORT_ZEROCODE_RUN {
|
| 320 | let duplication_count = usize::try_from(code_len - SHORT_ZEROCODE_RUN + 2).unwrap();
|
| 321 | if code_index + duplication_count > max_code_index + 1 {
|
| 322 | return Err(Error::invalid(TABLE_TOO_LONG));
|
| 323 | }
|
| 324 |
|
| 325 | for value in &mut encoding_table[code_index .. code_index + duplication_count] {
|
| 326 | *value = 0;
|
| 327 | }
|
| 328 |
|
| 329 | code_index += duplication_count;
|
| 330 | }
|
| 331 | else {
|
| 332 | code_index += 1;
|
| 333 | }
|
| 334 | }
|
| 335 |
|
| 336 | build_canonical_table(&mut encoding_table);
|
| 337 | Ok(encoding_table)
|
| 338 | }
|
| 339 |
|
| 340 | // TODO Use BitStreamReader for all the bit reads?!
|
| 341 | #[inline ]
|
| 342 | fn read_bits(
|
| 343 | count: u64,
|
| 344 | code_bits: &mut u64,
|
| 345 | code_bit_count: &mut u64,
|
| 346 | input: &mut impl Read,
|
| 347 | ) -> Result<u64>
|
| 348 | {
|
| 349 | while *code_bit_count < count {
|
| 350 | read_byte(code_bits, code_bit_count, input)?;
|
| 351 | }
|
| 352 |
|
| 353 | *code_bit_count -= count;
|
| 354 | Ok((*code_bits >> *code_bit_count) & ((1 << count) - 1))
|
| 355 | }
|
| 356 |
|
| 357 | #[inline ]
|
| 358 | fn read_byte(code_bits: &mut u64, bit_count: &mut u64, input: &mut impl Read) -> UnitResult {
|
| 359 | *code_bits = (*code_bits << 8) | u8::read(input)? as u64;
|
| 360 | *bit_count += 8;
|
| 361 | Ok(())
|
| 362 | }
|
| 363 |
|
| 364 | #[inline ]
|
| 365 | fn read_code_into_vec(
|
| 366 | code: u32,
|
| 367 | run_length_code: u32,
|
| 368 | code_bits: &mut u64,
|
| 369 | code_bit_count: &mut u64,
|
| 370 | read: &mut impl Read,
|
| 371 | out: &mut Vec<u16>,
|
| 372 | max_len: usize,
|
| 373 | ) -> UnitResult
|
| 374 | {
|
| 375 | if code == run_length_code { // code may be too large for u16
|
| 376 | if *code_bit_count < 8 {
|
| 377 | read_byte(code_bits, code_bit_count, read)?;
|
| 378 | }
|
| 379 |
|
| 380 | *code_bit_count -= 8;
|
| 381 |
|
| 382 | let code_repetitions = usize::from((*code_bits >> *code_bit_count) as u8);
|
| 383 |
|
| 384 | if out.len() + code_repetitions > max_len {
|
| 385 | return Err(Error::invalid(TOO_MUCH_DATA));
|
| 386 | }
|
| 387 | else if out.is_empty() {
|
| 388 | return Err(Error::invalid(NOT_ENOUGH_DATA));
|
| 389 | }
|
| 390 |
|
| 391 | let repeated_code = *out.last().unwrap();
|
| 392 | out.extend(std::iter::repeat(repeated_code).take(code_repetitions));
|
| 393 | }
|
| 394 | else if out.len() < max_len { // implies that code is not larger than u16???
|
| 395 | out.push(u16::try_from(code)?);
|
| 396 | }
|
| 397 | else {
|
| 398 | return Err(Error::invalid(TOO_MUCH_DATA));
|
| 399 | }
|
| 400 |
|
| 401 | Ok(())
|
| 402 | }
|
| 403 |
|
| 404 | fn count_frequencies(data: &[u16]) -> Vec<u64> {
|
| 405 | let mut frequencies: Vec = vec![0_u64; ENCODING_TABLE_SIZE];
|
| 406 |
|
| 407 | for value: &u16 in data {
|
| 408 | frequencies[*value as usize] += 1;
|
| 409 | }
|
| 410 |
|
| 411 | frequencies
|
| 412 | }
|
| 413 |
|
| 414 | fn write_bits(
|
| 415 | count: u64,
|
| 416 | bits: u64,
|
| 417 | code_bits: &mut u64,
|
| 418 | code_bit_count: &mut u64,
|
| 419 | mut out: impl Write,
|
| 420 | ) -> UnitResult
|
| 421 | {
|
| 422 | *code_bits = (*code_bits << count) | bits;
|
| 423 | *code_bit_count += count;
|
| 424 |
|
| 425 | while *code_bit_count >= 8 {
|
| 426 | *code_bit_count -= 8;
|
| 427 | out.write(&[
|
| 428 | (*code_bits >> *code_bit_count) as u8 // TODO make sure never or always wraps?
|
| 429 | ])?;
|
| 430 | }
|
| 431 |
|
| 432 | Ok(())
|
| 433 | }
|
| 434 |
|
| 435 | fn write_code(scode: u64, code_bits: &mut u64, code_bit_count: &mut u64, mut out: impl Write) -> UnitResult {
|
| 436 | write_bits(count:length(scode), bits:code(scode), code_bits, code_bit_count, &mut out)
|
| 437 | }
|
| 438 |
|
| 439 | #[inline (always)]
|
| 440 | fn send_code(
|
| 441 | scode: u64,
|
| 442 | run_count: u64,
|
| 443 | run_code: u64,
|
| 444 | code_bits: &mut u64,
|
| 445 | code_bit_count: &mut u64,
|
| 446 | mut out: impl Write,
|
| 447 | ) -> UnitResult
|
| 448 | {
|
| 449 | // Output a run of runCount instances of the symbol sCount.
|
| 450 | // Output the symbols explicitly, or if that is shorter, output
|
| 451 | // the sCode symbol once followed by a runCode symbol and runCount
|
| 452 | // expressed as an 8-bit number.
|
| 453 | if length(code:scode) + length(run_code) + 8 < length(code:scode) * run_count {
|
| 454 | write_code(scode, code_bits, code_bit_count, &mut out)?;
|
| 455 | write_code(scode:run_code, code_bits, code_bit_count, &mut out)?;
|
| 456 | write_bits(count:8, bits:run_count, code_bits, code_bit_count, &mut out)?;
|
| 457 | }
|
| 458 | else {
|
| 459 | for _ in 0 ..= run_count {
|
| 460 | write_code(scode, code_bits, code_bit_count, &mut out)?;
|
| 461 | }
|
| 462 | }
|
| 463 |
|
| 464 | Ok(())
|
| 465 | }
|
| 466 |
|
| 467 | fn encode_with_frequencies(
|
| 468 | frequencies: &[u64],
|
| 469 | uncompressed: &[u16],
|
| 470 | run_length_code: usize,
|
| 471 | mut out: &mut Cursor<Vec<u8>>,
|
| 472 | ) -> Result<u64>
|
| 473 | {
|
| 474 | let mut code_bits = 0;
|
| 475 | let mut code_bit_count = 0;
|
| 476 |
|
| 477 | let mut run_start_value = uncompressed[0];
|
| 478 | let mut run_length = 0;
|
| 479 |
|
| 480 | let start_position = out.position();
|
| 481 |
|
| 482 | // Loop on input values
|
| 483 | for ¤t_value in &uncompressed[1..] {
|
| 484 | // Count same values or send code
|
| 485 | if run_start_value == current_value && run_length < 255 {
|
| 486 | run_length += 1;
|
| 487 | }
|
| 488 | else {
|
| 489 | send_code(
|
| 490 | frequencies[run_start_value as usize],
|
| 491 | run_length,
|
| 492 | frequencies[run_length_code],
|
| 493 | &mut code_bits,
|
| 494 | &mut code_bit_count,
|
| 495 | &mut out,
|
| 496 | )?;
|
| 497 |
|
| 498 | run_length = 0;
|
| 499 | }
|
| 500 |
|
| 501 | run_start_value = current_value;
|
| 502 | }
|
| 503 |
|
| 504 | // Send remaining code
|
| 505 | send_code(
|
| 506 | frequencies[run_start_value as usize],
|
| 507 | run_length,
|
| 508 | frequencies[run_length_code],
|
| 509 | &mut code_bits,
|
| 510 | &mut code_bit_count,
|
| 511 | &mut out,
|
| 512 | )?;
|
| 513 |
|
| 514 | let data_length = out.position() - start_position; // we shouldn't count the last byte write
|
| 515 |
|
| 516 | if code_bit_count != 0 {
|
| 517 | out.write(&[
|
| 518 | (code_bits << (8 - code_bit_count) & 0xff) as u8
|
| 519 | ])?;
|
| 520 | }
|
| 521 |
|
| 522 | Ok(data_length * 8 + code_bit_count)
|
| 523 | }
|
| 524 |
|
| 525 | ///
|
| 526 | /// Pack an encoding table:
|
| 527 | /// - only code lengths, not actual codes, are stored
|
| 528 | /// - runs of zeroes are compressed as follows:
|
| 529 | ///
|
| 530 | /// unpacked packed
|
| 531 | /// --------------------------------
|
| 532 | /// 1 zero 0 (6 bits)
|
| 533 | /// 2 zeroes 59
|
| 534 | /// 3 zeroes 60
|
| 535 | /// 4 zeroes 61
|
| 536 | /// 5 zeroes 62
|
| 537 | /// n zeroes (6 or more) 63 n-6 (6 + 8 bits)
|
| 538 | ///
|
| 539 | fn pack_encoding_table(
|
| 540 | frequencies: &[u64],
|
| 541 | min_index: usize,
|
| 542 | max_index: usize,
|
| 543 | mut out: &mut Cursor<Vec<u8>>,
|
| 544 | ) -> UnitResult
|
| 545 | {
|
| 546 | let mut code_bits = 0_u64;
|
| 547 | let mut code_bit_count = 0_u64;
|
| 548 |
|
| 549 | let mut frequency_index = min_index;
|
| 550 | while frequency_index <= max_index { // TODO slice iteration?
|
| 551 | let code_length = length(frequencies[frequency_index]);
|
| 552 |
|
| 553 | if code_length == 0 {
|
| 554 | let mut zero_run = 1;
|
| 555 |
|
| 556 | while frequency_index < max_index && zero_run < LONGEST_LONG_RUN {
|
| 557 | if length(frequencies[frequency_index + 1]) > 0 {
|
| 558 | break;
|
| 559 | }
|
| 560 |
|
| 561 | frequency_index += 1;
|
| 562 | zero_run += 1;
|
| 563 | }
|
| 564 |
|
| 565 | if zero_run >= 2 {
|
| 566 | if zero_run >= SHORTEST_LONG_RUN {
|
| 567 | write_bits(6, LONG_ZEROCODE_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
|
| 568 | write_bits(8, zero_run - SHORTEST_LONG_RUN, &mut code_bits, &mut code_bit_count, &mut out)?;
|
| 569 | }
|
| 570 | else {
|
| 571 | write_bits(6, SHORT_ZEROCODE_RUN + zero_run - 2, &mut code_bits, &mut code_bit_count, &mut out)?;
|
| 572 | }
|
| 573 |
|
| 574 | frequency_index += 1; // we must increment or else this may go very wrong
|
| 575 | continue;
|
| 576 | }
|
| 577 | }
|
| 578 |
|
| 579 | write_bits(6, code_length, &mut code_bits, &mut code_bit_count, &mut out)?;
|
| 580 | frequency_index += 1;
|
| 581 | }
|
| 582 |
|
| 583 | if code_bit_count > 0 {
|
| 584 | out.write(&[
|
| 585 | (code_bits << (8 - code_bit_count)) as u8
|
| 586 | ])?;
|
| 587 | }
|
| 588 |
|
| 589 | Ok(())
|
| 590 | }
|
| 591 |
|
| 592 | /// Build a "canonical" Huffman code table:
|
| 593 | /// - for each (uncompressed) symbol, code contains the length
|
| 594 | /// of the corresponding code (in the compressed data)
|
| 595 | /// - canonical codes are computed and stored in code
|
| 596 | /// - the rules for constructing canonical codes are as follows:
|
| 597 | /// * shorter codes (if filled with zeroes to the right)
|
| 598 | /// have a numerically higher value than longer codes
|
| 599 | /// * for codes with the same length, numerical values
|
| 600 | /// increase with numerical symbol values
|
| 601 | /// - because the canonical code table can be constructed from
|
| 602 | /// symbol lengths alone, the code table can be transmitted
|
| 603 | /// without sending the actual code values
|
| 604 | /// - see http://www.compressconsult.com/huffman/
|
| 605 | fn build_canonical_table(code_table: &mut [u64]) {
|
| 606 | debug_assert_eq!(code_table.len(), ENCODING_TABLE_SIZE);
|
| 607 |
|
| 608 | let mut count_per_code = [0_u64; 59];
|
| 609 |
|
| 610 | for &code in code_table.iter() {
|
| 611 | count_per_code[u64_to_usize(code)] += 1;
|
| 612 | }
|
| 613 |
|
| 614 | // For each i from 58 through 1, compute the
|
| 615 | // numerically lowest code with length i, and
|
| 616 | // store that code in n[i].
|
| 617 | {
|
| 618 | let mut code = 0_u64; // TODO use foldr?
|
| 619 | for count in &mut count_per_code.iter_mut().rev() {
|
| 620 | let next_code = (code + *count) >> 1;
|
| 621 | *count = code;
|
| 622 | code = next_code;
|
| 623 | }
|
| 624 | }
|
| 625 |
|
| 626 | // code[i] contains the length, l, of the
|
| 627 | // code for symbol i. Assign the next available
|
| 628 | // code of length l to the symbol and store both
|
| 629 | // l and the code in code[i]. // TODO iter + filter ?
|
| 630 | for symbol_length in code_table.iter_mut() {
|
| 631 | let current_length = *symbol_length;
|
| 632 | let code_index = u64_to_usize(current_length);
|
| 633 | if current_length > 0 {
|
| 634 | *symbol_length = current_length | (count_per_code[code_index] << 6);
|
| 635 | count_per_code[code_index] += 1;
|
| 636 | }
|
| 637 | }
|
| 638 | }
|
| 639 |
|
| 640 |
|
| 641 | /// Compute Huffman codes (based on frq input) and store them in frq:
|
| 642 | /// - code structure is : [63:lsb - 6:msb] | [5-0: bit length];
|
| 643 | /// - max code length is 58 bits;
|
| 644 | /// - codes outside the range [im-iM] have a null length (unused values);
|
| 645 | /// - original frequencies are destroyed;
|
| 646 | /// - encoding tables are used by hufEncode() and hufBuildDecTable();
|
| 647 | ///
|
| 648 | /// NB: The following code "(*a == *b) && (a > b))" was added to ensure
|
| 649 | /// elements in the heap with the same value are sorted by index.
|
| 650 | /// This is to ensure, the STL make_heap()/pop_heap()/push_heap() methods
|
| 651 | /// produced a resultant sorted heap that is identical across OSes.
|
| 652 | fn build_encoding_table(
|
| 653 | frequencies: &mut [u64], // input frequencies, output encoding table
|
| 654 | ) -> (usize, usize) // return frequency max min range
|
| 655 | {
|
| 656 | debug_assert_eq!(frequencies.len(), ENCODING_TABLE_SIZE);
|
| 657 |
|
| 658 | /// Frequency with position, used for MinHeap.
|
| 659 | #[derive (Eq, PartialEq, Copy, Clone)]
|
| 660 | struct HeapFrequency {
|
| 661 | position: usize,
|
| 662 | frequency: u64,
|
| 663 | }
|
| 664 |
|
| 665 | impl Ord for HeapFrequency {
|
| 666 | fn cmp(&self, other: &Self) -> Ordering {
|
| 667 | other.frequency.cmp(&self.frequency)
|
| 668 | .then_with(|| other.position.cmp(&self.position))
|
| 669 | }
|
| 670 | }
|
| 671 |
|
| 672 | impl PartialOrd for HeapFrequency {
|
| 673 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
|
| 674 | }
|
| 675 |
|
| 676 | // This function assumes that when it is called, array frq
|
| 677 | // indicates the frequency of all possible symbols in the data
|
| 678 | // that are to be Huffman-encoded. (frq[i] contains the number
|
| 679 | // of occurrences of symbol i in the data.)
|
| 680 | //
|
| 681 | // The loop below does three things:
|
| 682 | //
|
| 683 | // 1) Finds the minimum and maximum indices that point
|
| 684 | // to non-zero entries in frq:
|
| 685 | //
|
| 686 | // frq[im] != 0, and frq[i] == 0 for all i < im
|
| 687 | // frq[iM] != 0, and frq[i] == 0 for all i > iM
|
| 688 | //
|
| 689 | // 2) Fills array fHeap with pointers to all non-zero
|
| 690 | // entries in frq.
|
| 691 | //
|
| 692 | // 3) Initializes array hlink such that hlink[i] == i
|
| 693 | // for all array entries.
|
| 694 |
|
| 695 | // We need to use vec here or we overflow the stack.
|
| 696 | let mut links = vec![0_usize; ENCODING_TABLE_SIZE];
|
| 697 | let mut frequency_heap = vec![0_usize; ENCODING_TABLE_SIZE];
|
| 698 |
|
| 699 | // This is a good solution since we don't have usize::MAX items (no panics or UB),
|
| 700 | // and since this is short-circuit, it stops at the first in order non zero element.
|
| 701 | let min_frequency_index = frequencies.iter().position(|f| *f != 0).unwrap_or(0);
|
| 702 |
|
| 703 | let mut max_frequency_index = 0;
|
| 704 | let mut frequency_count = 0;
|
| 705 |
|
| 706 | // assert bounds check to optimize away bounds check in loops
|
| 707 | assert!(links.len() >= ENCODING_TABLE_SIZE);
|
| 708 | assert!(frequencies.len() >= ENCODING_TABLE_SIZE);
|
| 709 |
|
| 710 | for index in min_frequency_index..ENCODING_TABLE_SIZE {
|
| 711 | links[index] = index; // TODO for x in links.iter().enumerate()
|
| 712 |
|
| 713 | if frequencies[index] != 0 {
|
| 714 | frequency_heap[frequency_count] = index;
|
| 715 | max_frequency_index = index;
|
| 716 | frequency_count += 1;
|
| 717 | }
|
| 718 | }
|
| 719 |
|
| 720 |
|
| 721 | // Add a pseudo-symbol, with a frequency count of 1, to frq;
|
| 722 | // adjust the fHeap and hlink array accordingly. Function
|
| 723 | // hufEncode() uses the pseudo-symbol for run-length encoding.
|
| 724 |
|
| 725 | max_frequency_index += 1;
|
| 726 | frequencies[max_frequency_index] = 1;
|
| 727 | frequency_heap[frequency_count] = max_frequency_index;
|
| 728 | frequency_count += 1;
|
| 729 |
|
| 730 | // Build an array, scode, such that scode[i] contains the number
|
| 731 | // of bits assigned to symbol i. Conceptually this is done by
|
| 732 | // constructing a tree whose leaves are the symbols with non-zero
|
| 733 | // frequency:
|
| 734 | //
|
| 735 | // Make a heap that contains all symbols with a non-zero frequency,
|
| 736 | // with the least frequent symbol on top.
|
| 737 | //
|
| 738 | // Repeat until only one symbol is left on the heap:
|
| 739 | //
|
| 740 | // Take the two least frequent symbols off the top of the heap.
|
| 741 | // Create a new node that has first two nodes as children, and
|
| 742 | // whose frequency is the sum of the frequencies of the first
|
| 743 | // two nodes. Put the new node back into the heap.
|
| 744 | //
|
| 745 | // The last node left on the heap is the root of the tree. For each
|
| 746 | // leaf node, the distance between the root and the leaf is the length
|
| 747 | // of the code for the corresponding symbol.
|
| 748 | //
|
| 749 | // The loop below doesn't actually build the tree; instead we compute
|
| 750 | // the distances of the leaves from the root on the fly. When a new
|
| 751 | // node is added to the heap, then that node's descendants are linked
|
| 752 | // into a single linear list that starts at the new node, and the code
|
| 753 | // lengths of the descendants (that is, their distance from the root
|
| 754 | // of the tree) are incremented by one.
|
| 755 | let mut heap = BinaryHeap::with_capacity(frequency_count);
|
| 756 | for index in frequency_heap.drain(..frequency_count) {
|
| 757 | heap.push(HeapFrequency { position: index, frequency: frequencies[index] });
|
| 758 | }
|
| 759 |
|
| 760 | let mut s_code = vec![0_u64; ENCODING_TABLE_SIZE];
|
| 761 |
|
| 762 | while frequency_count > 1 {
|
| 763 | // Find the indices, mm and m, of the two smallest non-zero frq
|
| 764 | // values in fHeap, add the smallest frq to the second-smallest
|
| 765 | // frq, and remove the smallest frq value from fHeap.
|
| 766 | let (high_position, low_position) = {
|
| 767 | let smallest_frequency = heap.pop().expect("heap empty bug" );
|
| 768 | frequency_count -= 1;
|
| 769 |
|
| 770 | let mut second_smallest_frequency = heap.peek_mut().expect("heap empty bug" );
|
| 771 | second_smallest_frequency.frequency += smallest_frequency.frequency;
|
| 772 |
|
| 773 | (second_smallest_frequency.position, smallest_frequency.position)
|
| 774 | };
|
| 775 |
|
| 776 | // The entries in scode are linked into lists with the
|
| 777 | // entries in hlink serving as "next" pointers and with
|
| 778 | // the end of a list marked by hlink[j] == j.
|
| 779 | //
|
| 780 | // Traverse the lists that start at scode[m] and scode[mm].
|
| 781 | // For each element visited, increment the length of the
|
| 782 | // corresponding code by one bit. (If we visit scode[j]
|
| 783 | // during the traversal, then the code for symbol j becomes
|
| 784 | // one bit longer.)
|
| 785 | //
|
| 786 | // Merge the lists that start at scode[m] and scode[mm]
|
| 787 | // into a single list that starts at scode[m].
|
| 788 |
|
| 789 | // Add a bit to all codes in the first list.
|
| 790 | let mut index = high_position; // TODO fold()
|
| 791 | loop {
|
| 792 | s_code[index] += 1;
|
| 793 | debug_assert!(s_code[index] <= 58);
|
| 794 |
|
| 795 | // merge the two lists
|
| 796 | if links[index] == index {
|
| 797 | links[index] = low_position;
|
| 798 | break;
|
| 799 | }
|
| 800 |
|
| 801 | index = links[index];
|
| 802 | }
|
| 803 |
|
| 804 | // Add a bit to all codes in the second list
|
| 805 | let mut index = low_position; // TODO fold()
|
| 806 | loop {
|
| 807 | s_code[index] += 1;
|
| 808 | debug_assert!(s_code[index] <= 58);
|
| 809 |
|
| 810 | if links[index] == index {
|
| 811 | break;
|
| 812 | }
|
| 813 |
|
| 814 | index = links[index];
|
| 815 | }
|
| 816 | }
|
| 817 |
|
| 818 | // Build a canonical Huffman code table, replacing the code
|
| 819 | // lengths in scode with (code, code length) pairs. Copy the
|
| 820 | // code table from scode into frq.
|
| 821 | build_canonical_table(&mut s_code);
|
| 822 | frequencies.copy_from_slice(&s_code);
|
| 823 |
|
| 824 | (min_frequency_index, max_frequency_index)
|
| 825 | }
|
| 826 |
|
| 827 |
|
| 828 | #[inline ] fn length(code: u64) -> u64 { code & 63 }
|
| 829 | #[inline ] fn code(code: u64) -> u64 { code >> 6 }
|
| 830 |
|
| 831 | const INVALID_BIT_COUNT: &'static str = "invalid number of bits" ;
|
| 832 | const INVALID_TABLE_ENTRY: &'static str = "invalid code table entry" ;
|
| 833 | const NOT_ENOUGH_DATA: &'static str = "decoded data are shorter than expected" ;
|
| 834 | const INVALID_TABLE_SIZE: &'static str = "unexpected end of code table data" ;
|
| 835 | const TABLE_TOO_LONG: &'static str = "code table is longer than expected" ;
|
| 836 | const INVALID_CODE: &'static str = "invalid code" ;
|
| 837 | const TOO_MUCH_DATA: &'static str = "decoded data are longer than expected" ;
|
| 838 |
|
| 839 |
|
| 840 | #[cfg (test)]
|
| 841 | mod test {
|
| 842 | use super::*;
|
| 843 | use rand::{Rng, SeedableRng};
|
| 844 |
|
| 845 | const UNCOMPRESSED_ARRAY: [u16; 100] = [
|
| 846 | 3852, 2432, 33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013, 7715, 59875, 28182,
|
| 847 | 34449, 19983, 20399, 63407, 29486, 4877, 26738, 44815, 14042, 46091, 48228, 25682, 35412,
|
| 848 | 7582, 65069, 6632, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
|
| 849 | 42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
|
| 850 | 56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
|
| 851 | 19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
|
| 852 | 3159, 29002, 14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347, 16991, 21303, 26263,
|
| 853 | 8312, 14017, 41777, 43240, 3500, 60250, 52437, 45715, 61520,
|
| 854 | ];
|
| 855 |
|
| 856 | const UNCOMPRESSED_ARRAY_SPECIAL: [u16; 100] = [
|
| 857 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28182,
|
| 858 | 0, 65534, 0, 65534, 0, 65534, 0, 65534, 0, 0, 0, 0, 0,
|
| 859 | 0, 0, 0, 54124, 13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897,
|
| 860 | 42371, 54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991, 2882, 17039,
|
| 861 | 56389, 20835, 57057, 54081, 3414, 33957, 52584, 10222, 25139, 40002, 44980, 1602, 48021,
|
| 862 | 19703, 6562, 61777, 41582, 201, 31253, 51790, 15888, 40921, 3627, 12184, 16036, 26349,
|
| 863 | 3159, 29002, 14535, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
| 864 | 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534, 65534,
|
| 865 | ];
|
| 866 |
|
| 867 | const COMPRESSED_ARRAY: [u8; 703] = [
|
| 868 | 0xc9, 0x0, 0x0, 0x0, 0x2e, 0xfe, 0x0, 0x0, 0x56, 0x2, 0x0, 0x0, 0xa2, 0x2, 0x0, 0x0, 0x0,
|
| 869 | 0x0, 0x0, 0x0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd6, 0x47,
|
| 870 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x28, 0x1f, 0xff, 0xff, 0xed, 0x87, 0xff, 0xff, 0xf0,
|
| 871 | 0x91, 0xff, 0xf8, 0x1f, 0xf4, 0xf1, 0xff, 0x78, 0x1f, 0xfd, 0xa1, 0xff, 0xff, 0xff, 0xff,
|
| 872 | 0xff, 0xff, 0xfa, 0xc7, 0xfe, 0x4, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 873 | 0xff, 0xed, 0x1f, 0xf3, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe8, 0x7, 0xfd, 0xf8,
|
| 874 | 0x7f, 0xff, 0xff, 0xff, 0xfd, 0x10, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x51, 0xff,
|
| 875 | 0xff, 0xff, 0xff, 0xfe, 0x1, 0xff, 0x73, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 876 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0x0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 877 | 0xff, 0xff, 0xff, 0xfc, 0xa4, 0x7f, 0xf5, 0x7, 0xfc, 0x48, 0x7f, 0xe0, 0x47, 0xff, 0xff,
|
| 878 | 0xf5, 0x91, 0xff, 0xff, 0xff, 0xff, 0xf1, 0xf1, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x21, 0xff,
|
| 879 | 0x7f, 0x1f, 0xf8, 0xd1, 0xff, 0xe7, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x1f, 0xf2, 0x91,
|
| 880 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1c, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xe7,
|
| 881 | 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x8c, 0x7f, 0xff, 0xff, 0xc, 0x1f, 0xff, 0xff,
|
| 882 | 0xe5, 0x7, 0xff, 0xff, 0xfa, 0x81, 0xff, 0xff, 0xff, 0x20, 0x7f, 0xff, 0xff, 0xff, 0xff,
|
| 883 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 884 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xbc, 0x7f, 0xff, 0xff, 0xff, 0xfc, 0x38, 0x7f, 0xff,
|
| 885 | 0xff, 0xff, 0xfc, 0xd0, 0x7f, 0xd3, 0xc7, 0xff, 0xff, 0xf7, 0x91, 0xff, 0xff, 0xff, 0xff,
|
| 886 | 0xfe, 0xc1, 0xff, 0xff, 0xff, 0xff, 0xf9, 0x61, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc7,
|
| 887 | 0x87, 0xff, 0xff, 0xfd, 0x81, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x87, 0xff, 0xff,
|
| 888 | 0xff, 0xff, 0xfe, 0x87, 0xff, 0x58, 0x7f, 0xff, 0xff, 0xff, 0xfd, 0xec, 0x7f, 0xff, 0xff,
|
| 889 | 0xff, 0xfe, 0xd0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0x6c, 0x7f, 0xcb, 0x47, 0xff, 0xff, 0xf3,
|
| 890 | 0x61, 0xff, 0xff, 0xff, 0x80, 0x7f, 0xe1, 0xc7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f,
|
| 891 | 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 892 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x18, 0x1f, 0xff, 0xff,
|
| 893 | 0xff, 0xff, 0xff, 0xfd, 0xcc, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x11, 0xff, 0xff,
|
| 894 | 0xff, 0xff, 0xf8, 0x41, 0xff, 0xbc, 0x1f, 0xff, 0xff, 0xc4, 0x47, 0xff, 0xff, 0xf2, 0x91,
|
| 895 | 0xff, 0xe0, 0x1f, 0xff, 0xff, 0xff, 0xff, 0x6d, 0x1f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 896 | 0xff, 0xff, 0xff, 0xff, 0xff, 0x2, 0x1f, 0xf9, 0xe1, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xe1,
|
| 897 | 0xff, 0xff, 0xfd, 0xb0, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe1, 0xff, 0xff, 0xff, 0xff,
|
| 898 | 0xff, 0xff, 0xff, 0xff, 0x5a, 0x1f, 0xfc, 0x81, 0xbf, 0x29, 0x1b, 0xff, 0xff, 0xff, 0xff,
|
| 899 | 0xff, 0xff, 0xff, 0xf3, 0x61, 0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc8, 0x1b,
|
| 900 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0xb1, 0xbf, 0xff, 0xfd, 0x80, 0x6f, 0xff,
|
| 901 | 0xff, 0xf, 0x1b, 0xf8, 0xc1, 0xbf, 0xff, 0xfc, 0xb4, 0x6f, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 902 | 0xff, 0xff, 0xda, 0x46, 0xfc, 0x54, 0x6f, 0xc9, 0x6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 903 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x21, 0x1b, 0xff, 0xff, 0xe0, 0x86, 0xff, 0xff,
|
| 904 | 0xff, 0xff, 0xe2, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
| 905 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xf3, 0x91, 0xbf, 0xff, 0xfe, 0x24, 0x6f, 0xff, 0xff, 0x6b,
|
| 906 | 0x1b, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfd, 0xb1, 0xbf, 0xfa, 0x1b, 0xfb, 0x11,
|
| 907 | 0xbf, 0xff, 0xfe, 0x8, 0x6f, 0xff, 0xff, 0x42, 0x1b, 0xff, 0xff, 0xff, 0xff, 0xb9, 0x1b,
|
| 908 | 0xff, 0xff, 0xcf, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf1, 0x31,
|
| 909 | 0x86, 0x10, 0x9, 0xb4, 0xe4, 0x4c, 0xf7, 0xef, 0x42, 0x87, 0x6a, 0xb5, 0xc2, 0x34, 0x9e,
|
| 910 | 0x2f, 0x12, 0xae, 0x21, 0x68, 0xf2, 0xa8, 0x74, 0x37, 0xe1, 0x98, 0x14, 0x59, 0x57, 0x2c,
|
| 911 | 0x24, 0x3b, 0x35, 0x6c, 0x1b, 0x8b, 0xcc, 0xe6, 0x13, 0x38, 0xc, 0x8e, 0xe2, 0xc, 0xfe,
|
| 912 | 0x49, 0x73, 0xbc, 0x2b, 0x7b, 0x9, 0x27, 0x79, 0x14, 0xc, 0x94, 0x42, 0xf8, 0x7c, 0x1,
|
| 913 | 0x8d, 0x26, 0xde, 0x87, 0x26, 0x71, 0x50, 0x45, 0xc6, 0x28, 0x40, 0xd5, 0xe, 0x8d, 0x8,
|
| 914 | 0x1e, 0x4c, 0xa4, 0x79, 0x57, 0xf0, 0xc3, 0x6d, 0x5c, 0x6d, 0xc0,
|
| 915 | ];
|
| 916 |
|
| 917 | fn fill(rng: &mut impl Rng, size: usize) -> Vec<u16> {
|
| 918 | if rng.gen_bool(0.2) {
|
| 919 | let value = if rng.gen_bool(0.5) { 0 } else { u16::MAX };
|
| 920 | return vec![ value; size ];
|
| 921 | }
|
| 922 |
|
| 923 | let mut data = vec![0_u16; size];
|
| 924 |
|
| 925 | data.iter_mut().for_each(|v| {
|
| 926 | *v = rng.gen_range(0_u16 .. u16::MAX);
|
| 927 | });
|
| 928 |
|
| 929 | data
|
| 930 | }
|
| 931 |
|
| 932 | /// Test using both input and output from a custom ILM OpenEXR test.
|
| 933 | #[test ]
|
| 934 | fn compression_comparation() {
|
| 935 | let raw = compress(&UNCOMPRESSED_ARRAY).unwrap();
|
| 936 | assert_eq!(raw, COMPRESSED_ARRAY.to_vec());
|
| 937 | }
|
| 938 |
|
| 939 | #[test ]
|
| 940 | fn round_trip() {
|
| 941 | let mut random = rand::rngs::StdRng::from_seed(SEED);
|
| 942 | let raw = fill(&mut random, u16::MAX as usize);
|
| 943 |
|
| 944 | let compressed = compress(&raw).unwrap();
|
| 945 | let uncompressed = decompress(&compressed, raw.len()).unwrap();
|
| 946 |
|
| 947 | assert_eq!(uncompressed, raw);
|
| 948 | }
|
| 949 |
|
| 950 | #[test ]
|
| 951 | fn repetitions_special() {
|
| 952 | let raw = UNCOMPRESSED_ARRAY_SPECIAL;
|
| 953 |
|
| 954 | let compressed = compress(&raw).unwrap();
|
| 955 | let uncompressed = decompress(&compressed, raw.len()).unwrap();
|
| 956 |
|
| 957 | assert_eq!(uncompressed, raw.to_vec());
|
| 958 | }
|
| 959 |
|
| 960 | #[test ]
|
| 961 | fn round_trip100() {
|
| 962 | let mut random = rand::rngs::StdRng::from_seed(SEED);
|
| 963 |
|
| 964 | for size_multiplier in 1..10 {
|
| 965 | let raw = fill(&mut random, size_multiplier * 50_000);
|
| 966 |
|
| 967 | let compressed = compress(&raw).unwrap();
|
| 968 | let uncompressed = decompress(&compressed, raw.len()).unwrap();
|
| 969 |
|
| 970 | assert_eq!(uncompressed, raw);
|
| 971 | }
|
| 972 | }
|
| 973 |
|
| 974 | #[test ]
|
| 975 | fn test_zeroes(){
|
| 976 | let uncompressed: &[u16] = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ];
|
| 977 |
|
| 978 | let compressed = compress(uncompressed).unwrap();
|
| 979 | let decompressed = decompress(&compressed, uncompressed.len()).unwrap();
|
| 980 |
|
| 981 | assert_eq!(uncompressed, decompressed.as_slice());
|
| 982 | }
|
| 983 |
|
| 984 | const SEED: [u8; 32] = [
|
| 985 | 12,155,32,34,112,109,98,54,
|
| 986 | 12,255,32,34,112,109,98,55,
|
| 987 | 12,155,32,34,12,109,98,54,
|
| 988 | 12,35,32,34,112,109,48,54,
|
| 989 | ];
|
| 990 | }
|
| 991 | |