1 | use super::super::blocks::block::BlockHeader; |
2 | use super::super::blocks::block::BlockType; |
3 | use super::super::blocks::literals_section::LiteralsSection; |
4 | use super::super::blocks::literals_section::LiteralsSectionType; |
5 | use super::super::blocks::sequence_section::SequencesHeader; |
6 | use super::literals_section_decoder::{decode_literals, DecompressLiteralsError}; |
7 | use super::sequence_execution::ExecuteSequencesError; |
8 | use super::sequence_section_decoder::decode_sequences; |
9 | use super::sequence_section_decoder::DecodeSequenceError; |
10 | use crate::blocks::literals_section::LiteralsSectionParseError; |
11 | use crate::blocks::sequence_section::SequencesHeaderParseError; |
12 | use crate::decoding::scratch::DecoderScratch; |
13 | use crate::decoding::sequence_execution::execute_sequences; |
14 | use crate::io::{self, Read}; |
15 | |
16 | pub struct BlockDecoder { |
17 | header_buffer: [u8; 3], |
18 | internal_state: DecoderState, |
19 | } |
20 | |
21 | enum DecoderState { |
22 | ReadyToDecodeNextHeader, |
23 | ReadyToDecodeNextBody, |
24 | #[allow (dead_code)] |
25 | Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs |
26 | } |
27 | |
28 | #[derive (Debug)] |
29 | #[non_exhaustive ] |
30 | pub enum BlockHeaderReadError { |
31 | ReadError(io::Error), |
32 | FoundReservedBlock, |
33 | BlockTypeError(BlockTypeError), |
34 | BlockSizeError(BlockSizeError), |
35 | } |
36 | |
37 | #[cfg (feature = "std" )] |
38 | impl std::error::Error for BlockHeaderReadError { |
39 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
40 | match self { |
41 | BlockHeaderReadError::ReadError(source: &Error) => Some(source), |
42 | BlockHeaderReadError::BlockTypeError(source: &BlockTypeError) => Some(source), |
43 | BlockHeaderReadError::BlockSizeError(source: &BlockSizeError) => Some(source), |
44 | BlockHeaderReadError::FoundReservedBlock => None, |
45 | } |
46 | } |
47 | } |
48 | |
49 | impl ::core::fmt::Display for BlockHeaderReadError { |
50 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> ::core::fmt::Result { |
51 | match self { |
52 | BlockHeaderReadError::ReadError(_) => write!(f, "Error while reading the block header" ), |
53 | BlockHeaderReadError::FoundReservedBlock => write!( |
54 | f, |
55 | "Reserved block occured. This is considered corruption by the documentation" |
56 | ), |
57 | BlockHeaderReadError::BlockTypeError(e: &BlockTypeError) => write!(f, "Error getting block type: {}" , e), |
58 | BlockHeaderReadError::BlockSizeError(e: &BlockSizeError) => { |
59 | write!(f, "Error getting block content size: {}" , e) |
60 | } |
61 | } |
62 | } |
63 | } |
64 | |
65 | impl From<io::Error> for BlockHeaderReadError { |
66 | fn from(val: io::Error) -> Self { |
67 | Self::ReadError(val) |
68 | } |
69 | } |
70 | |
71 | impl From<BlockTypeError> for BlockHeaderReadError { |
72 | fn from(val: BlockTypeError) -> Self { |
73 | Self::BlockTypeError(val) |
74 | } |
75 | } |
76 | |
77 | impl From<BlockSizeError> for BlockHeaderReadError { |
78 | fn from(val: BlockSizeError) -> Self { |
79 | Self::BlockSizeError(val) |
80 | } |
81 | } |
82 | |
83 | #[derive (Debug)] |
84 | #[non_exhaustive ] |
85 | pub enum BlockTypeError { |
86 | InvalidBlocktypeNumber { num: u8 }, |
87 | } |
88 | |
89 | #[cfg (feature = "std" )] |
90 | impl std::error::Error for BlockTypeError {} |
91 | |
92 | impl core::fmt::Display for BlockTypeError { |
93 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
94 | match self { |
95 | BlockTypeError::InvalidBlocktypeNumber { num: &u8 } => { |
96 | write!(f, |
97 | "Invalid Blocktype number. Is: {} Should be one of: 0, 1, 2, 3 (3 is reserved though" , |
98 | num, |
99 | ) |
100 | } |
101 | } |
102 | } |
103 | } |
104 | |
105 | #[derive (Debug)] |
106 | #[non_exhaustive ] |
107 | pub enum BlockSizeError { |
108 | BlockSizeTooLarge { size: u32 }, |
109 | } |
110 | |
111 | #[cfg (feature = "std" )] |
112 | impl std::error::Error for BlockSizeError {} |
113 | |
114 | impl core::fmt::Display for BlockSizeError { |
115 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
116 | match self { |
117 | BlockSizeError::BlockSizeTooLarge { size: &u32 } => { |
118 | write!( |
119 | f, |
120 | "Blocksize was bigger than the absolute maximum {} (128kb). Is: {}" , |
121 | ABSOLUTE_MAXIMUM_BLOCK_SIZE, size, |
122 | ) |
123 | } |
124 | } |
125 | } |
126 | } |
127 | |
128 | #[derive (Debug)] |
129 | #[non_exhaustive ] |
130 | pub enum DecompressBlockError { |
131 | BlockContentReadError(io::Error), |
132 | MalformedSectionHeader { |
133 | expected_len: usize, |
134 | remaining_bytes: usize, |
135 | }, |
136 | DecompressLiteralsError(DecompressLiteralsError), |
137 | LiteralsSectionParseError(LiteralsSectionParseError), |
138 | SequencesHeaderParseError(SequencesHeaderParseError), |
139 | DecodeSequenceError(DecodeSequenceError), |
140 | ExecuteSequencesError(ExecuteSequencesError), |
141 | } |
142 | |
143 | #[cfg (feature = "std" )] |
144 | impl std::error::Error for DecompressBlockError { |
145 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
146 | match self { |
147 | DecompressBlockError::BlockContentReadError(source: &Error) => Some(source), |
148 | DecompressBlockError::DecompressLiteralsError(source: &DecompressLiteralsError) => Some(source), |
149 | DecompressBlockError::LiteralsSectionParseError(source: &LiteralsSectionParseError) => Some(source), |
150 | DecompressBlockError::SequencesHeaderParseError(source: &SequencesHeaderParseError) => Some(source), |
151 | DecompressBlockError::DecodeSequenceError(source: &DecodeSequenceError) => Some(source), |
152 | DecompressBlockError::ExecuteSequencesError(source: &ExecuteSequencesError) => Some(source), |
153 | _ => None, |
154 | } |
155 | } |
156 | } |
157 | |
158 | impl core::fmt::Display for DecompressBlockError { |
159 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
160 | match self { |
161 | DecompressBlockError::BlockContentReadError(e: &Error) => { |
162 | write!(f, "Error while reading the block content: {}" , e) |
163 | } |
164 | DecompressBlockError::MalformedSectionHeader { |
165 | expected_len: &usize, |
166 | remaining_bytes: &usize, |
167 | } => { |
168 | write!(f, |
169 | "Malformed section header. Says literals would be this long: {} but there are only {} bytes left" , |
170 | expected_len, |
171 | remaining_bytes, |
172 | ) |
173 | } |
174 | DecompressBlockError::DecompressLiteralsError(e: &DecompressLiteralsError) => write!(f, " {:?}" , e), |
175 | DecompressBlockError::LiteralsSectionParseError(e: &LiteralsSectionParseError) => write!(f, " {:?}" , e), |
176 | DecompressBlockError::SequencesHeaderParseError(e: &SequencesHeaderParseError) => write!(f, " {:?}" , e), |
177 | DecompressBlockError::DecodeSequenceError(e: &DecodeSequenceError) => write!(f, " {:?}" , e), |
178 | DecompressBlockError::ExecuteSequencesError(e: &ExecuteSequencesError) => write!(f, " {:?}" , e), |
179 | } |
180 | } |
181 | } |
182 | |
183 | impl From<io::Error> for DecompressBlockError { |
184 | fn from(val: io::Error) -> Self { |
185 | Self::BlockContentReadError(val) |
186 | } |
187 | } |
188 | |
189 | impl From<DecompressLiteralsError> for DecompressBlockError { |
190 | fn from(val: DecompressLiteralsError) -> Self { |
191 | Self::DecompressLiteralsError(val) |
192 | } |
193 | } |
194 | |
195 | impl From<LiteralsSectionParseError> for DecompressBlockError { |
196 | fn from(val: LiteralsSectionParseError) -> Self { |
197 | Self::LiteralsSectionParseError(val) |
198 | } |
199 | } |
200 | |
201 | impl From<SequencesHeaderParseError> for DecompressBlockError { |
202 | fn from(val: SequencesHeaderParseError) -> Self { |
203 | Self::SequencesHeaderParseError(val) |
204 | } |
205 | } |
206 | |
207 | impl From<DecodeSequenceError> for DecompressBlockError { |
208 | fn from(val: DecodeSequenceError) -> Self { |
209 | Self::DecodeSequenceError(val) |
210 | } |
211 | } |
212 | |
213 | impl From<ExecuteSequencesError> for DecompressBlockError { |
214 | fn from(val: ExecuteSequencesError) -> Self { |
215 | Self::ExecuteSequencesError(val) |
216 | } |
217 | } |
218 | |
219 | #[derive (Debug)] |
220 | #[non_exhaustive ] |
221 | pub enum DecodeBlockContentError { |
222 | DecoderStateIsFailed, |
223 | ExpectedHeaderOfPreviousBlock, |
224 | ReadError { step: BlockType, source: io::Error }, |
225 | DecompressBlockError(DecompressBlockError), |
226 | } |
227 | |
228 | #[cfg (feature = "std" )] |
229 | impl std::error::Error for DecodeBlockContentError { |
230 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
231 | match self { |
232 | DecodeBlockContentError::ReadError { step: _, source: &Error } => Some(source), |
233 | DecodeBlockContentError::DecompressBlockError(source: &DecompressBlockError) => Some(source), |
234 | _ => None, |
235 | } |
236 | } |
237 | } |
238 | |
239 | impl core::fmt::Display for DecodeBlockContentError { |
240 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
241 | match self { |
242 | DecodeBlockContentError::DecoderStateIsFailed => { |
243 | write!( |
244 | f, |
245 | "Can't decode next block if failed along the way. Results will be nonsense" , |
246 | ) |
247 | } |
248 | DecodeBlockContentError::ExpectedHeaderOfPreviousBlock => { |
249 | write!(f, |
250 | "Can't decode next block body, while expecting to decode the header of the previous block. Results will be nonsense" , |
251 | ) |
252 | } |
253 | DecodeBlockContentError::ReadError { step: &BlockType, source: &Error } => { |
254 | write!(f, "Error while reading bytes for {}: {}" , step, source,) |
255 | } |
256 | DecodeBlockContentError::DecompressBlockError(e: &DecompressBlockError) => write!(f, " {:?}" , e), |
257 | } |
258 | } |
259 | } |
260 | |
261 | impl From<DecompressBlockError> for DecodeBlockContentError { |
262 | fn from(val: DecompressBlockError) -> Self { |
263 | Self::DecompressBlockError(val) |
264 | } |
265 | } |
266 | |
267 | /// Create a new [BlockDecoder]. |
268 | pub fn new() -> BlockDecoder { |
269 | BlockDecoder { |
270 | internal_state: DecoderState::ReadyToDecodeNextHeader, |
271 | header_buffer: [0u8; 3], |
272 | } |
273 | } |
274 | |
275 | const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024; |
276 | |
277 | impl BlockDecoder { |
278 | pub fn decode_block_content( |
279 | &mut self, |
280 | header: &BlockHeader, |
281 | workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees |
282 | mut source: impl Read, |
283 | ) -> Result<u64, DecodeBlockContentError> { |
284 | match self.internal_state { |
285 | DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ } |
286 | DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed), |
287 | DecoderState::ReadyToDecodeNextHeader => { |
288 | return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock) |
289 | } |
290 | } |
291 | |
292 | let block_type = header.block_type; |
293 | match block_type { |
294 | BlockType::RLE => { |
295 | const BATCH_SIZE: usize = 512; |
296 | let mut buf = [0u8; BATCH_SIZE]; |
297 | let full_reads = header.decompressed_size / BATCH_SIZE as u32; |
298 | let single_read_size = header.decompressed_size % BATCH_SIZE as u32; |
299 | |
300 | source.read_exact(&mut buf[0..1]).map_err(|err| { |
301 | DecodeBlockContentError::ReadError { |
302 | step: block_type, |
303 | source: err, |
304 | } |
305 | })?; |
306 | self.internal_state = DecoderState::ReadyToDecodeNextHeader; |
307 | |
308 | for i in 1..BATCH_SIZE { |
309 | buf[i] = buf[0]; |
310 | } |
311 | |
312 | for _ in 0..full_reads { |
313 | workspace.buffer.push(&buf[..]); |
314 | } |
315 | let smaller = &mut buf[..single_read_size as usize]; |
316 | workspace.buffer.push(smaller); |
317 | |
318 | Ok(1) |
319 | } |
320 | BlockType::Raw => { |
321 | const BATCH_SIZE: usize = 128 * 1024; |
322 | let mut buf = [0u8; BATCH_SIZE]; |
323 | let full_reads = header.decompressed_size / BATCH_SIZE as u32; |
324 | let single_read_size = header.decompressed_size % BATCH_SIZE as u32; |
325 | |
326 | for _ in 0..full_reads { |
327 | source.read_exact(&mut buf[..]).map_err(|err| { |
328 | DecodeBlockContentError::ReadError { |
329 | step: block_type, |
330 | source: err, |
331 | } |
332 | })?; |
333 | workspace.buffer.push(&buf[..]); |
334 | } |
335 | |
336 | let smaller = &mut buf[..single_read_size as usize]; |
337 | source |
338 | .read_exact(smaller) |
339 | .map_err(|err| DecodeBlockContentError::ReadError { |
340 | step: block_type, |
341 | source: err, |
342 | })?; |
343 | workspace.buffer.push(smaller); |
344 | |
345 | self.internal_state = DecoderState::ReadyToDecodeNextHeader; |
346 | Ok(u64::from(header.decompressed_size)) |
347 | } |
348 | |
349 | BlockType::Reserved => { |
350 | panic!("How did you even get this. The decoder should error out if it detects a reserved-type block" ); |
351 | } |
352 | |
353 | BlockType::Compressed => { |
354 | self.decompress_block(header, workspace, source)?; |
355 | |
356 | self.internal_state = DecoderState::ReadyToDecodeNextHeader; |
357 | Ok(u64::from(header.content_size)) |
358 | } |
359 | } |
360 | } |
361 | |
362 | fn decompress_block( |
363 | &mut self, |
364 | header: &BlockHeader, |
365 | workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees |
366 | mut source: impl Read, |
367 | ) -> Result<(), DecompressBlockError> { |
368 | workspace |
369 | .block_content_buffer |
370 | .resize(header.content_size as usize, 0); |
371 | |
372 | source.read_exact(workspace.block_content_buffer.as_mut_slice())?; |
373 | let raw = workspace.block_content_buffer.as_slice(); |
374 | |
375 | let mut section = LiteralsSection::new(); |
376 | let bytes_in_literals_header = section.parse_from_header(raw)?; |
377 | let raw = &raw[bytes_in_literals_header as usize..]; |
378 | vprintln!( |
379 | "Found {} literalssection with regenerated size: {}, and compressed size: {:?}" , |
380 | section.ls_type, |
381 | section.regenerated_size, |
382 | section.compressed_size |
383 | ); |
384 | |
385 | let upper_limit_for_literals = match section.compressed_size { |
386 | Some(x) => x as usize, |
387 | None => match section.ls_type { |
388 | LiteralsSectionType::RLE => 1, |
389 | LiteralsSectionType::Raw => section.regenerated_size as usize, |
390 | _ => panic!("Bug in this library" ), |
391 | }, |
392 | }; |
393 | |
394 | if raw.len() < upper_limit_for_literals { |
395 | return Err(DecompressBlockError::MalformedSectionHeader { |
396 | expected_len: upper_limit_for_literals, |
397 | remaining_bytes: raw.len(), |
398 | }); |
399 | } |
400 | |
401 | let raw_literals = &raw[..upper_limit_for_literals]; |
402 | vprintln!("Slice for literals: {}" , raw_literals.len()); |
403 | |
404 | workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here |
405 | let bytes_used_in_literals_section = decode_literals( |
406 | §ion, |
407 | &mut workspace.huf, |
408 | raw_literals, |
409 | &mut workspace.literals_buffer, |
410 | )?; |
411 | assert!( |
412 | section.regenerated_size == workspace.literals_buffer.len() as u32, |
413 | "Wrong number of literals: {}, Should have been: {}" , |
414 | workspace.literals_buffer.len(), |
415 | section.regenerated_size |
416 | ); |
417 | assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32); |
418 | |
419 | let raw = &raw[upper_limit_for_literals..]; |
420 | vprintln!("Slice for sequences with headers: {}" , raw.len()); |
421 | |
422 | let mut seq_section = SequencesHeader::new(); |
423 | let bytes_in_sequence_header = seq_section.parse_from_header(raw)?; |
424 | let raw = &raw[bytes_in_sequence_header as usize..]; |
425 | vprintln!( |
426 | "Found sequencessection with sequences: {} and size: {}" , |
427 | seq_section.num_sequences, |
428 | raw.len() |
429 | ); |
430 | |
431 | assert!( |
432 | u32::from(bytes_in_literals_header) |
433 | + bytes_used_in_literals_section |
434 | + u32::from(bytes_in_sequence_header) |
435 | + raw.len() as u32 |
436 | == header.content_size |
437 | ); |
438 | vprintln!("Slice for sequences: {}" , raw.len()); |
439 | |
440 | if seq_section.num_sequences != 0 { |
441 | decode_sequences( |
442 | &seq_section, |
443 | raw, |
444 | &mut workspace.fse, |
445 | &mut workspace.sequences, |
446 | )?; |
447 | vprintln!("Executing sequences" ); |
448 | execute_sequences(workspace)?; |
449 | } else { |
450 | workspace.buffer.push(&workspace.literals_buffer); |
451 | workspace.sequences.clear(); |
452 | } |
453 | |
454 | Ok(()) |
455 | } |
456 | |
457 | pub fn read_block_header( |
458 | &mut self, |
459 | mut r: impl Read, |
460 | ) -> Result<(BlockHeader, u8), BlockHeaderReadError> { |
461 | //match self.internal_state { |
462 | // DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */}, |
463 | // DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")), |
464 | // DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")), |
465 | //} |
466 | |
467 | r.read_exact(&mut self.header_buffer[0..3])?; |
468 | |
469 | let btype = self.block_type()?; |
470 | if let BlockType::Reserved = btype { |
471 | return Err(BlockHeaderReadError::FoundReservedBlock); |
472 | } |
473 | |
474 | let block_size = self.block_content_size()?; |
475 | let decompressed_size = match btype { |
476 | BlockType::Raw => block_size, |
477 | BlockType::RLE => block_size, |
478 | BlockType::Reserved => 0, //should be caught above, this is an error state |
479 | BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb) |
480 | }; |
481 | let content_size = match btype { |
482 | BlockType::Raw => block_size, |
483 | BlockType::Compressed => block_size, |
484 | BlockType::RLE => 1, |
485 | BlockType::Reserved => 0, //should be caught above, this is an error state |
486 | }; |
487 | |
488 | let last_block = self.is_last(); |
489 | |
490 | self.reset_buffer(); |
491 | self.internal_state = DecoderState::ReadyToDecodeNextBody; |
492 | |
493 | //just return 3. Blockheaders always take 3 bytes |
494 | Ok(( |
495 | BlockHeader { |
496 | last_block, |
497 | block_type: btype, |
498 | decompressed_size, |
499 | content_size, |
500 | }, |
501 | 3, |
502 | )) |
503 | } |
504 | |
505 | fn reset_buffer(&mut self) { |
506 | self.header_buffer[0] = 0; |
507 | self.header_buffer[1] = 0; |
508 | self.header_buffer[2] = 0; |
509 | } |
510 | |
511 | fn is_last(&self) -> bool { |
512 | self.header_buffer[0] & 0x1 == 1 |
513 | } |
514 | |
515 | fn block_type(&self) -> Result<BlockType, BlockTypeError> { |
516 | let t = (self.header_buffer[0] >> 1) & 0x3; |
517 | match t { |
518 | 0 => Ok(BlockType::Raw), |
519 | 1 => Ok(BlockType::RLE), |
520 | 2 => Ok(BlockType::Compressed), |
521 | 3 => Ok(BlockType::Reserved), |
522 | other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }), |
523 | } |
524 | } |
525 | |
526 | fn block_content_size(&self) -> Result<u32, BlockSizeError> { |
527 | let val = self.block_content_size_unchecked(); |
528 | if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE { |
529 | Err(BlockSizeError::BlockSizeTooLarge { size: val }) |
530 | } else { |
531 | Ok(val) |
532 | } |
533 | } |
534 | |
535 | fn block_content_size_unchecked(&self) -> u32 { |
536 | u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit |
537 | | (u32::from(self.header_buffer[1]) << 5) |
538 | | (u32::from(self.header_buffer[2]) << 13) |
539 | } |
540 | } |
541 | |