1 | use super::super::blocks::sequence_section::ModeType; |
2 | use super::super::blocks::sequence_section::Sequence; |
3 | use super::super::blocks::sequence_section::SequencesHeader; |
4 | use super::bit_reader_reverse::{BitReaderReversed, GetBitsError}; |
5 | use super::scratch::FSEScratch; |
6 | use crate::fse::{FSEDecoder, FSEDecoderError, FSETableError}; |
7 | use alloc::vec::Vec; |
8 | |
9 | #[derive (Debug, derive_more::Display, derive_more::From)] |
10 | #[cfg_attr (feature = "std" , derive(derive_more::Error))] |
11 | #[non_exhaustive ] |
12 | pub enum DecodeSequenceError { |
13 | #[display(fmt = "{_0:?}" )] |
14 | #[from] |
15 | GetBitsError(GetBitsError), |
16 | #[display(fmt = "{_0:?}" )] |
17 | #[from] |
18 | FSEDecoderError(FSEDecoderError), |
19 | #[display(fmt = "{_0:?}" )] |
20 | #[from] |
21 | FSETableError(FSETableError), |
22 | #[display( |
23 | fmt = "Padding at the end of the sequence_section was more than a byte long: {skipped_bits} bits. Probably caused by data corruption" |
24 | )] |
25 | ExtraPadding { skipped_bits: i32 }, |
26 | #[display(fmt = "Do not support offsets bigger than 1<<32; got: {offset_code}" )] |
27 | UnsupportedOffset { offset_code: u8 }, |
28 | #[display(fmt = "Read an offset == 0. That is an illegal value for offsets" )] |
29 | ZeroOffset, |
30 | #[display(fmt = "Bytestream did not contain enough bytes to decode num_sequences" )] |
31 | NotEnoughBytesForNumSequences, |
32 | #[display(fmt = "Did not use full bitstream. Bits left: {bits_remaining} ({} bytes)" , bits_remaining / 8)] |
33 | ExtraBits { bits_remaining: isize }, |
34 | #[display(fmt = "compression modes are none but they must be set to something" )] |
35 | MissingCompressionMode, |
36 | #[display(fmt = "Need a byte to read for RLE ll table" )] |
37 | MissingByteForRleLlTable, |
38 | #[display(fmt = "Need a byte to read for RLE of table" )] |
39 | MissingByteForRleOfTable, |
40 | #[display(fmt = "Need a byte to read for RLE ml table" )] |
41 | MissingByteForRleMlTable, |
42 | } |
43 | |
44 | pub fn decode_sequences( |
45 | section: &SequencesHeader, |
46 | source: &[u8], |
47 | scratch: &mut FSEScratch, |
48 | target: &mut Vec<Sequence>, |
49 | ) -> Result<(), DecodeSequenceError> { |
50 | let bytes_read = maybe_update_fse_tables(section, source, scratch)?; |
51 | |
52 | vprintln!("Updating tables used {} bytes" , bytes_read); |
53 | |
54 | let bit_stream = &source[bytes_read..]; |
55 | |
56 | let mut br = BitReaderReversed::new(bit_stream); |
57 | |
58 | //skip the 0 padding at the end of the last byte of the bit stream and throw away the first 1 found |
59 | let mut skipped_bits = 0; |
60 | loop { |
61 | let val = br.get_bits(1)?; |
62 | skipped_bits += 1; |
63 | if val == 1 || skipped_bits > 8 { |
64 | break; |
65 | } |
66 | } |
67 | if skipped_bits > 8 { |
68 | //if more than 7 bits are 0, this is not the correct end of the bitstream. Either a bug or corrupted data |
69 | return Err(DecodeSequenceError::ExtraPadding { skipped_bits }); |
70 | } |
71 | |
72 | if scratch.ll_rle.is_some() || scratch.ml_rle.is_some() || scratch.of_rle.is_some() { |
73 | decode_sequences_with_rle(section, &mut br, scratch, target) |
74 | } else { |
75 | decode_sequences_without_rle(section, &mut br, scratch, target) |
76 | } |
77 | } |
78 | |
79 | fn decode_sequences_with_rle( |
80 | section: &SequencesHeader, |
81 | br: &mut BitReaderReversed<'_>, |
82 | scratch: &FSEScratch, |
83 | target: &mut Vec<Sequence>, |
84 | ) -> Result<(), DecodeSequenceError> { |
85 | let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths); |
86 | let mut ml_dec = FSEDecoder::new(&scratch.match_lengths); |
87 | let mut of_dec = FSEDecoder::new(&scratch.offsets); |
88 | |
89 | if scratch.ll_rle.is_none() { |
90 | ll_dec.init_state(br)?; |
91 | } |
92 | if scratch.of_rle.is_none() { |
93 | of_dec.init_state(br)?; |
94 | } |
95 | if scratch.ml_rle.is_none() { |
96 | ml_dec.init_state(br)?; |
97 | } |
98 | |
99 | target.clear(); |
100 | target.reserve(section.num_sequences as usize); |
101 | |
102 | for _seq_idx in 0..section.num_sequences { |
103 | //get the codes from either the RLE byte or from the decoder |
104 | let ll_code = if scratch.ll_rle.is_some() { |
105 | scratch.ll_rle.unwrap() |
106 | } else { |
107 | ll_dec.decode_symbol() |
108 | }; |
109 | let ml_code = if scratch.ml_rle.is_some() { |
110 | scratch.ml_rle.unwrap() |
111 | } else { |
112 | ml_dec.decode_symbol() |
113 | }; |
114 | let of_code = if scratch.of_rle.is_some() { |
115 | scratch.of_rle.unwrap() |
116 | } else { |
117 | of_dec.decode_symbol() |
118 | }; |
119 | |
120 | let (ll_value, ll_num_bits) = lookup_ll_code(ll_code); |
121 | let (ml_value, ml_num_bits) = lookup_ml_code(ml_code); |
122 | |
123 | //println!("Sequence: {}", i); |
124 | //println!("of stat: {}", of_dec.state); |
125 | //println!("of Code: {}", of_code); |
126 | //println!("ll stat: {}", ll_dec.state); |
127 | //println!("ll bits: {}", ll_num_bits); |
128 | //println!("ll Code: {}", ll_value); |
129 | //println!("ml stat: {}", ml_dec.state); |
130 | //println!("ml bits: {}", ml_num_bits); |
131 | //println!("ml Code: {}", ml_value); |
132 | //println!(""); |
133 | |
134 | if of_code >= 32 { |
135 | return Err(DecodeSequenceError::UnsupportedOffset { |
136 | offset_code: of_code, |
137 | }); |
138 | } |
139 | |
140 | let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits)?; |
141 | let offset = obits as u32 + (1u32 << of_code); |
142 | |
143 | if offset == 0 { |
144 | return Err(DecodeSequenceError::ZeroOffset); |
145 | } |
146 | |
147 | target.push(Sequence { |
148 | ll: ll_value + ll_add as u32, |
149 | ml: ml_value + ml_add as u32, |
150 | of: offset, |
151 | }); |
152 | |
153 | if target.len() < section.num_sequences as usize { |
154 | //println!( |
155 | // "Bits left: {} ({} bytes)", |
156 | // br.bits_remaining(), |
157 | // br.bits_remaining() / 8, |
158 | //); |
159 | if scratch.ll_rle.is_none() { |
160 | ll_dec.update_state(br)?; |
161 | } |
162 | if scratch.ml_rle.is_none() { |
163 | ml_dec.update_state(br)?; |
164 | } |
165 | if scratch.of_rle.is_none() { |
166 | of_dec.update_state(br)?; |
167 | } |
168 | } |
169 | |
170 | if br.bits_remaining() < 0 { |
171 | return Err(DecodeSequenceError::NotEnoughBytesForNumSequences); |
172 | } |
173 | } |
174 | |
175 | if br.bits_remaining() > 0 { |
176 | Err(DecodeSequenceError::ExtraBits { |
177 | bits_remaining: br.bits_remaining(), |
178 | }) |
179 | } else { |
180 | Ok(()) |
181 | } |
182 | } |
183 | |
184 | fn decode_sequences_without_rle( |
185 | section: &SequencesHeader, |
186 | br: &mut BitReaderReversed<'_>, |
187 | scratch: &FSEScratch, |
188 | target: &mut Vec<Sequence>, |
189 | ) -> Result<(), DecodeSequenceError> { |
190 | let mut ll_dec = FSEDecoder::new(&scratch.literal_lengths); |
191 | let mut ml_dec = FSEDecoder::new(&scratch.match_lengths); |
192 | let mut of_dec = FSEDecoder::new(&scratch.offsets); |
193 | |
194 | ll_dec.init_state(br)?; |
195 | of_dec.init_state(br)?; |
196 | ml_dec.init_state(br)?; |
197 | |
198 | target.clear(); |
199 | target.reserve(section.num_sequences as usize); |
200 | |
201 | for _seq_idx in 0..section.num_sequences { |
202 | let ll_code = ll_dec.decode_symbol(); |
203 | let ml_code = ml_dec.decode_symbol(); |
204 | let of_code = of_dec.decode_symbol(); |
205 | |
206 | let (ll_value, ll_num_bits) = lookup_ll_code(ll_code); |
207 | let (ml_value, ml_num_bits) = lookup_ml_code(ml_code); |
208 | |
209 | if of_code >= 32 { |
210 | return Err(DecodeSequenceError::UnsupportedOffset { |
211 | offset_code: of_code, |
212 | }); |
213 | } |
214 | |
215 | let (obits, ml_add, ll_add) = br.get_bits_triple(of_code, ml_num_bits, ll_num_bits)?; |
216 | let offset = obits as u32 + (1u32 << of_code); |
217 | |
218 | if offset == 0 { |
219 | return Err(DecodeSequenceError::ZeroOffset); |
220 | } |
221 | |
222 | target.push(Sequence { |
223 | ll: ll_value + ll_add as u32, |
224 | ml: ml_value + ml_add as u32, |
225 | of: offset, |
226 | }); |
227 | |
228 | if target.len() < section.num_sequences as usize { |
229 | //println!( |
230 | // "Bits left: {} ({} bytes)", |
231 | // br.bits_remaining(), |
232 | // br.bits_remaining() / 8, |
233 | //); |
234 | ll_dec.update_state(br)?; |
235 | ml_dec.update_state(br)?; |
236 | of_dec.update_state(br)?; |
237 | } |
238 | |
239 | if br.bits_remaining() < 0 { |
240 | return Err(DecodeSequenceError::NotEnoughBytesForNumSequences); |
241 | } |
242 | } |
243 | |
244 | if br.bits_remaining() > 0 { |
245 | Err(DecodeSequenceError::ExtraBits { |
246 | bits_remaining: br.bits_remaining(), |
247 | }) |
248 | } else { |
249 | Ok(()) |
250 | } |
251 | } |
252 | |
253 | fn lookup_ll_code(code: u8) -> (u32, u8) { |
254 | match code { |
255 | 0..=15 => (u32::from(code), 0), |
256 | 16 => (16, 1), |
257 | 17 => (18, 1), |
258 | 18 => (20, 1), |
259 | 19 => (22, 1), |
260 | 20 => (24, 2), |
261 | 21 => (28, 2), |
262 | 22 => (32, 3), |
263 | 23 => (40, 3), |
264 | 24 => (48, 4), |
265 | 25 => (64, 6), |
266 | 26 => (128, 7), |
267 | 27 => (256, 8), |
268 | 28 => (512, 9), |
269 | 29 => (1024, 10), |
270 | 30 => (2048, 11), |
271 | 31 => (4096, 12), |
272 | 32 => (8192, 13), |
273 | 33 => (16384, 14), |
274 | 34 => (32768, 15), |
275 | 35 => (65536, 16), |
276 | _ => (0, 255), |
277 | } |
278 | } |
279 | |
280 | fn lookup_ml_code(code: u8) -> (u32, u8) { |
281 | match code { |
282 | 0..=31 => (u32::from(code) + 3, 0), |
283 | 32 => (35, 1), |
284 | 33 => (37, 1), |
285 | 34 => (39, 1), |
286 | 35 => (41, 1), |
287 | 36 => (43, 2), |
288 | 37 => (47, 2), |
289 | 38 => (51, 3), |
290 | 39 => (59, 3), |
291 | 40 => (67, 4), |
292 | 41 => (83, 4), |
293 | 42 => (99, 5), |
294 | 43 => (131, 7), |
295 | 44 => (259, 8), |
296 | 45 => (515, 9), |
297 | 46 => (1027, 10), |
298 | 47 => (2051, 11), |
299 | 48 => (4099, 12), |
300 | 49 => (8195, 13), |
301 | 50 => (16387, 14), |
302 | 51 => (32771, 15), |
303 | 52 => (65539, 16), |
304 | _ => (0, 255), |
305 | } |
306 | } |
307 | |
308 | pub const LL_MAX_LOG: u8 = 9; |
309 | pub const ML_MAX_LOG: u8 = 9; |
310 | pub const OF_MAX_LOG: u8 = 8; |
311 | |
312 | fn maybe_update_fse_tables( |
313 | section: &SequencesHeader, |
314 | source: &[u8], |
315 | scratch: &mut FSEScratch, |
316 | ) -> Result<usize, DecodeSequenceError> { |
317 | let modes = section |
318 | .modes |
319 | .ok_or(DecodeSequenceError::MissingCompressionMode)?; |
320 | |
321 | let mut bytes_read = 0; |
322 | |
323 | match modes.ll_mode() { |
324 | ModeType::FSECompressed => { |
325 | let bytes = scratch.literal_lengths.build_decoder(source, LL_MAX_LOG)?; |
326 | bytes_read += bytes; |
327 | |
328 | vprintln!("Updating ll table" ); |
329 | vprintln!("Used bytes: {}" , bytes); |
330 | scratch.ll_rle = None; |
331 | } |
332 | ModeType::RLE => { |
333 | vprintln!("Use RLE ll table" ); |
334 | if source.is_empty() { |
335 | return Err(DecodeSequenceError::MissingByteForRleLlTable); |
336 | } |
337 | bytes_read += 1; |
338 | scratch.ll_rle = Some(source[0]); |
339 | } |
340 | ModeType::Predefined => { |
341 | vprintln!("Use predefined ll table" ); |
342 | scratch.literal_lengths.build_from_probabilities( |
343 | LL_DEFAULT_ACC_LOG, |
344 | &Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]), |
345 | )?; |
346 | scratch.ll_rle = None; |
347 | } |
348 | ModeType::Repeat => { |
349 | vprintln!("Repeat ll table" ); |
350 | /* Nothing to do */ |
351 | } |
352 | }; |
353 | |
354 | let of_source = &source[bytes_read..]; |
355 | |
356 | match modes.of_mode() { |
357 | ModeType::FSECompressed => { |
358 | let bytes = scratch.offsets.build_decoder(of_source, OF_MAX_LOG)?; |
359 | vprintln!("Updating of table" ); |
360 | vprintln!("Used bytes: {}" , bytes); |
361 | bytes_read += bytes; |
362 | scratch.of_rle = None; |
363 | } |
364 | ModeType::RLE => { |
365 | vprintln!("Use RLE of table" ); |
366 | if of_source.is_empty() { |
367 | return Err(DecodeSequenceError::MissingByteForRleOfTable); |
368 | } |
369 | bytes_read += 1; |
370 | scratch.of_rle = Some(of_source[0]); |
371 | } |
372 | ModeType::Predefined => { |
373 | vprintln!("Use predefined of table" ); |
374 | scratch.offsets.build_from_probabilities( |
375 | OF_DEFAULT_ACC_LOG, |
376 | &Vec::from(&OFFSET_DEFAULT_DISTRIBUTION[..]), |
377 | )?; |
378 | scratch.of_rle = None; |
379 | } |
380 | ModeType::Repeat => { |
381 | vprintln!("Repeat of table" ); |
382 | /* Nothing to do */ |
383 | } |
384 | }; |
385 | |
386 | let ml_source = &source[bytes_read..]; |
387 | |
388 | match modes.ml_mode() { |
389 | ModeType::FSECompressed => { |
390 | let bytes = scratch.match_lengths.build_decoder(ml_source, ML_MAX_LOG)?; |
391 | bytes_read += bytes; |
392 | vprintln!("Updating ml table" ); |
393 | vprintln!("Used bytes: {}" , bytes); |
394 | scratch.ml_rle = None; |
395 | } |
396 | ModeType::RLE => { |
397 | vprintln!("Use RLE ml table" ); |
398 | if ml_source.is_empty() { |
399 | return Err(DecodeSequenceError::MissingByteForRleMlTable); |
400 | } |
401 | bytes_read += 1; |
402 | scratch.ml_rle = Some(ml_source[0]); |
403 | } |
404 | ModeType::Predefined => { |
405 | vprintln!("Use predefined ml table" ); |
406 | scratch.match_lengths.build_from_probabilities( |
407 | ML_DEFAULT_ACC_LOG, |
408 | &Vec::from(&MATCH_LENGTH_DEFAULT_DISTRIBUTION[..]), |
409 | )?; |
410 | scratch.ml_rle = None; |
411 | } |
412 | ModeType::Repeat => { |
413 | vprintln!("Repeat ml table" ); |
414 | /* Nothing to do */ |
415 | } |
416 | }; |
417 | |
418 | Ok(bytes_read) |
419 | } |
420 | |
421 | const LL_DEFAULT_ACC_LOG: u8 = 6; |
422 | const LITERALS_LENGTH_DEFAULT_DISTRIBUTION: [i32; 36] = [ |
423 | 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, |
424 | -1, -1, -1, -1, |
425 | ]; |
426 | |
427 | const ML_DEFAULT_ACC_LOG: u8 = 6; |
428 | const MATCH_LENGTH_DEFAULT_DISTRIBUTION: [i32; 53] = [ |
429 | 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
430 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1, |
431 | ]; |
432 | |
433 | const OF_DEFAULT_ACC_LOG: u8 = 5; |
434 | const OFFSET_DEFAULT_DISTRIBUTION: [i32; 29] = [ |
435 | 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, |
436 | ]; |
437 | |
438 | #[test ] |
439 | fn test_ll_default() { |
440 | let mut table = crate::fse::FSETable::new(); |
441 | table |
442 | .build_from_probabilities( |
443 | LL_DEFAULT_ACC_LOG, |
444 | &Vec::from(&LITERALS_LENGTH_DEFAULT_DISTRIBUTION[..]), |
445 | ) |
446 | .unwrap(); |
447 | |
448 | #[cfg (feature = "std" )] |
449 | for idx in 0..table.decode.len() { |
450 | std::println!( |
451 | " {:3}: {:3} {:3} {:3}" , |
452 | idx, |
453 | table.decode[idx].symbol, |
454 | table.decode[idx].num_bits, |
455 | table.decode[idx].base_line |
456 | ); |
457 | } |
458 | |
459 | assert!(table.decode.len() == 64); |
460 | |
461 | //just test a few values. TODO test all values |
462 | assert!(table.decode[0].symbol == 0); |
463 | assert!(table.decode[0].num_bits == 4); |
464 | assert!(table.decode[0].base_line == 0); |
465 | |
466 | assert!(table.decode[19].symbol == 27); |
467 | assert!(table.decode[19].num_bits == 6); |
468 | assert!(table.decode[19].base_line == 0); |
469 | |
470 | assert!(table.decode[39].symbol == 25); |
471 | assert!(table.decode[39].num_bits == 4); |
472 | assert!(table.decode[39].base_line == 16); |
473 | |
474 | assert!(table.decode[60].symbol == 35); |
475 | assert!(table.decode[60].num_bits == 6); |
476 | assert!(table.decode[60].base_line == 0); |
477 | |
478 | assert!(table.decode[59].symbol == 24); |
479 | assert!(table.decode[59].num_bits == 5); |
480 | assert!(table.decode[59].base_line == 32); |
481 | } |
482 | |