1 | use super::super::blocks::block::BlockHeader; |
2 | use super::super::blocks::block::BlockType; |
3 | use super::super::blocks::literals_section::LiteralsSection; |
4 | use super::super::blocks::literals_section::LiteralsSectionType; |
5 | use super::super::blocks::sequence_section::SequencesHeader; |
6 | use super::literals_section_decoder::{decode_literals, DecompressLiteralsError}; |
7 | use super::sequence_execution::ExecuteSequencesError; |
8 | use super::sequence_section_decoder::decode_sequences; |
9 | use super::sequence_section_decoder::DecodeSequenceError; |
10 | use crate::blocks::literals_section::LiteralsSectionParseError; |
11 | use crate::blocks::sequence_section::SequencesHeaderParseError; |
12 | use crate::decoding::scratch::DecoderScratch; |
13 | use crate::decoding::sequence_execution::execute_sequences; |
14 | use crate::io::{self, Read}; |
15 | |
16 | pub struct BlockDecoder { |
17 | header_buffer: [u8; 3], |
18 | internal_state: DecoderState, |
19 | } |
20 | |
21 | enum DecoderState { |
22 | ReadyToDecodeNextHeader, |
23 | ReadyToDecodeNextBody, |
24 | #[allow (dead_code)] |
25 | Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs |
26 | } |
27 | |
28 | #[derive (Debug, derive_more::Display, derive_more::From)] |
29 | #[cfg_attr (feature = "std" , derive(derive_more::Error))] |
30 | #[non_exhaustive ] |
31 | pub enum BlockHeaderReadError { |
32 | #[display(fmt = "Error while reading the block header" )] |
33 | #[from] |
34 | ReadError(io::Error), |
35 | #[display(fmt = "Reserved block occured. This is considered corruption by the documentation" )] |
36 | FoundReservedBlock, |
37 | #[display(fmt = "Error getting block type: {_0}" )] |
38 | #[from] |
39 | BlockTypeError(BlockTypeError), |
40 | #[display(fmt = "Error getting block content size: {_0}" )] |
41 | #[from] |
42 | BlockSizeError(BlockSizeError), |
43 | } |
44 | |
45 | #[derive (Debug, derive_more::Display)] |
46 | #[cfg_attr (feature = "std" , derive(derive_more::Error))] |
47 | #[non_exhaustive ] |
48 | pub enum BlockTypeError { |
49 | #[display( |
50 | fmt = "Invalid Blocktype number. Is: {num} Should be one of: 0, 1, 2, 3 (3 is reserved though" |
51 | )] |
52 | InvalidBlocktypeNumber { num: u8 }, |
53 | } |
54 | |
55 | #[derive (Debug, derive_more::Display)] |
56 | #[cfg_attr (feature = "std" , derive(derive_more::Error))] |
57 | #[non_exhaustive ] |
58 | pub enum BlockSizeError { |
59 | #[display( |
60 | fmt = "Blocksize was bigger than the absolute maximum {ABSOLUTE_MAXIMUM_BLOCK_SIZE} (128kb). Is: {size}" |
61 | )] |
62 | BlockSizeTooLarge { size: u32 }, |
63 | } |
64 | |
65 | #[derive (Debug, derive_more::Display, derive_more::From)] |
66 | #[cfg_attr (feature = "std" , derive(derive_more::Error))] |
67 | #[non_exhaustive ] |
68 | pub enum DecompressBlockError { |
69 | #[display(fmt = "Error while reading the block content: {_0}" )] |
70 | #[from] |
71 | BlockContentReadError(io::Error), |
72 | #[display( |
73 | fmt = "Malformed section header. Says literals would be this long: {expected_len} but there are only {remaining_bytes} bytes left" |
74 | )] |
75 | MalformedSectionHeader { |
76 | expected_len: usize, |
77 | remaining_bytes: usize, |
78 | }, |
79 | #[display(fmt = "{_0:?}" )] |
80 | #[from] |
81 | DecompressLiteralsError(DecompressLiteralsError), |
82 | #[display(fmt = "{_0:?}" )] |
83 | #[from] |
84 | LiteralsSectionParseError(LiteralsSectionParseError), |
85 | #[display(fmt = "{_0:?}" )] |
86 | #[from] |
87 | SequencesHeaderParseError(SequencesHeaderParseError), |
88 | #[display(fmt = "{_0:?}" )] |
89 | #[from] |
90 | DecodeSequenceError(DecodeSequenceError), |
91 | #[display(fmt = "{_0:?}" )] |
92 | #[from] |
93 | ExecuteSequencesError(ExecuteSequencesError), |
94 | } |
95 | |
96 | #[derive (Debug, derive_more::Display, derive_more::From)] |
97 | #[cfg_attr (feature = "std" , derive(derive_more::Error))] |
98 | #[non_exhaustive ] |
99 | pub enum DecodeBlockContentError { |
100 | #[display(fmt = "Can't decode next block if failed along the way. Results will be nonsense" )] |
101 | DecoderStateIsFailed, |
102 | #[display( |
103 | fmt = "Cant decode next block body, while expecting to decode the header of the previous block. Results will be nonsense" |
104 | )] |
105 | ExpectedHeaderOfPreviousBlock, |
106 | #[display(fmt = "Error while reading bytes for {step}: {source}" )] |
107 | ReadError { step: BlockType, source: io::Error }, |
108 | #[display(fmt = "{_0:?}" )] |
109 | #[from] |
110 | DecompressBlockError(DecompressBlockError), |
111 | } |
112 | |
113 | pub fn new() -> BlockDecoder { |
114 | BlockDecoder { |
115 | internal_state: DecoderState::ReadyToDecodeNextHeader, |
116 | header_buffer: [0u8; 3], |
117 | } |
118 | } |
119 | |
120 | const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024; |
121 | |
122 | impl BlockDecoder { |
123 | pub fn decode_block_content( |
124 | &mut self, |
125 | header: &BlockHeader, |
126 | workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees |
127 | mut source: impl Read, |
128 | ) -> Result<u64, DecodeBlockContentError> { |
129 | match self.internal_state { |
130 | DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ } |
131 | DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed), |
132 | DecoderState::ReadyToDecodeNextHeader => { |
133 | return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock) |
134 | } |
135 | } |
136 | |
137 | let block_type = header.block_type; |
138 | match block_type { |
139 | BlockType::RLE => { |
140 | const BATCH_SIZE: usize = 512; |
141 | let mut buf = [0u8; BATCH_SIZE]; |
142 | let full_reads = header.decompressed_size / BATCH_SIZE as u32; |
143 | let single_read_size = header.decompressed_size % BATCH_SIZE as u32; |
144 | |
145 | source.read_exact(&mut buf[0..1]).map_err(|err| { |
146 | DecodeBlockContentError::ReadError { |
147 | step: block_type, |
148 | source: err, |
149 | } |
150 | })?; |
151 | self.internal_state = DecoderState::ReadyToDecodeNextHeader; |
152 | |
153 | for i in 1..BATCH_SIZE { |
154 | buf[i] = buf[0]; |
155 | } |
156 | |
157 | for _ in 0..full_reads { |
158 | workspace.buffer.push(&buf[..]); |
159 | } |
160 | let smaller = &mut buf[..single_read_size as usize]; |
161 | workspace.buffer.push(smaller); |
162 | |
163 | Ok(1) |
164 | } |
165 | BlockType::Raw => { |
166 | const BATCH_SIZE: usize = 128 * 1024; |
167 | let mut buf = [0u8; BATCH_SIZE]; |
168 | let full_reads = header.decompressed_size / BATCH_SIZE as u32; |
169 | let single_read_size = header.decompressed_size % BATCH_SIZE as u32; |
170 | |
171 | for _ in 0..full_reads { |
172 | source.read_exact(&mut buf[..]).map_err(|err| { |
173 | DecodeBlockContentError::ReadError { |
174 | step: block_type, |
175 | source: err, |
176 | } |
177 | })?; |
178 | workspace.buffer.push(&buf[..]); |
179 | } |
180 | |
181 | let smaller = &mut buf[..single_read_size as usize]; |
182 | source |
183 | .read_exact(smaller) |
184 | .map_err(|err| DecodeBlockContentError::ReadError { |
185 | step: block_type, |
186 | source: err, |
187 | })?; |
188 | workspace.buffer.push(smaller); |
189 | |
190 | self.internal_state = DecoderState::ReadyToDecodeNextHeader; |
191 | Ok(u64::from(header.decompressed_size)) |
192 | } |
193 | |
194 | BlockType::Reserved => { |
195 | panic!("How did you even get this. The decoder should error out if it detects a reserved-type block" ); |
196 | } |
197 | |
198 | BlockType::Compressed => { |
199 | self.decompress_block(header, workspace, source)?; |
200 | |
201 | self.internal_state = DecoderState::ReadyToDecodeNextHeader; |
202 | Ok(u64::from(header.content_size)) |
203 | } |
204 | } |
205 | } |
206 | |
207 | fn decompress_block( |
208 | &mut self, |
209 | header: &BlockHeader, |
210 | workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees |
211 | mut source: impl Read, |
212 | ) -> Result<(), DecompressBlockError> { |
213 | workspace |
214 | .block_content_buffer |
215 | .resize(header.content_size as usize, 0); |
216 | |
217 | source.read_exact(workspace.block_content_buffer.as_mut_slice())?; |
218 | let raw = workspace.block_content_buffer.as_slice(); |
219 | |
220 | let mut section = LiteralsSection::new(); |
221 | let bytes_in_literals_header = section.parse_from_header(raw)?; |
222 | let raw = &raw[bytes_in_literals_header as usize..]; |
223 | vprintln!( |
224 | "Found {} literalssection with regenerated size: {}, and compressed size: {:?}" , |
225 | section.ls_type, |
226 | section.regenerated_size, |
227 | section.compressed_size |
228 | ); |
229 | |
230 | let upper_limit_for_literals = match section.compressed_size { |
231 | Some(x) => x as usize, |
232 | None => match section.ls_type { |
233 | LiteralsSectionType::RLE => 1, |
234 | LiteralsSectionType::Raw => section.regenerated_size as usize, |
235 | _ => panic!("Bug in this library" ), |
236 | }, |
237 | }; |
238 | |
239 | if raw.len() < upper_limit_for_literals { |
240 | return Err(DecompressBlockError::MalformedSectionHeader { |
241 | expected_len: upper_limit_for_literals, |
242 | remaining_bytes: raw.len(), |
243 | }); |
244 | } |
245 | |
246 | let raw_literals = &raw[..upper_limit_for_literals]; |
247 | vprintln!("Slice for literals: {}" , raw_literals.len()); |
248 | |
249 | workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here |
250 | let bytes_used_in_literals_section = decode_literals( |
251 | §ion, |
252 | &mut workspace.huf, |
253 | raw_literals, |
254 | &mut workspace.literals_buffer, |
255 | )?; |
256 | assert!( |
257 | section.regenerated_size == workspace.literals_buffer.len() as u32, |
258 | "Wrong number of literals: {}, Should have been: {}" , |
259 | workspace.literals_buffer.len(), |
260 | section.regenerated_size |
261 | ); |
262 | assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32); |
263 | |
264 | let raw = &raw[upper_limit_for_literals..]; |
265 | vprintln!("Slice for sequences with headers: {}" , raw.len()); |
266 | |
267 | let mut seq_section = SequencesHeader::new(); |
268 | let bytes_in_sequence_header = seq_section.parse_from_header(raw)?; |
269 | let raw = &raw[bytes_in_sequence_header as usize..]; |
270 | vprintln!( |
271 | "Found sequencessection with sequences: {} and size: {}" , |
272 | seq_section.num_sequences, |
273 | raw.len() |
274 | ); |
275 | |
276 | assert!( |
277 | u32::from(bytes_in_literals_header) |
278 | + bytes_used_in_literals_section |
279 | + u32::from(bytes_in_sequence_header) |
280 | + raw.len() as u32 |
281 | == header.content_size |
282 | ); |
283 | vprintln!("Slice for sequences: {}" , raw.len()); |
284 | |
285 | if seq_section.num_sequences != 0 { |
286 | decode_sequences( |
287 | &seq_section, |
288 | raw, |
289 | &mut workspace.fse, |
290 | &mut workspace.sequences, |
291 | )?; |
292 | vprintln!("Executing sequences" ); |
293 | execute_sequences(workspace)?; |
294 | } else { |
295 | workspace.buffer.push(&workspace.literals_buffer); |
296 | workspace.sequences.clear(); |
297 | } |
298 | |
299 | Ok(()) |
300 | } |
301 | |
302 | pub fn read_block_header( |
303 | &mut self, |
304 | mut r: impl Read, |
305 | ) -> Result<(BlockHeader, u8), BlockHeaderReadError> { |
306 | //match self.internal_state { |
307 | // DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */}, |
308 | // DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")), |
309 | // DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")), |
310 | //} |
311 | |
312 | r.read_exact(&mut self.header_buffer[0..3])?; |
313 | |
314 | let btype = self.block_type()?; |
315 | if let BlockType::Reserved = btype { |
316 | return Err(BlockHeaderReadError::FoundReservedBlock); |
317 | } |
318 | |
319 | let block_size = self.block_content_size()?; |
320 | let decompressed_size = match btype { |
321 | BlockType::Raw => block_size, |
322 | BlockType::RLE => block_size, |
323 | BlockType::Reserved => 0, //should be catched above, this is an error state |
324 | BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb) |
325 | }; |
326 | let content_size = match btype { |
327 | BlockType::Raw => block_size, |
328 | BlockType::Compressed => block_size, |
329 | BlockType::RLE => 1, |
330 | BlockType::Reserved => 0, //should be catched above, this is an error state |
331 | }; |
332 | |
333 | let last_block = self.is_last(); |
334 | |
335 | self.reset_buffer(); |
336 | self.internal_state = DecoderState::ReadyToDecodeNextBody; |
337 | |
338 | //just return 3. Blockheaders always take 3 bytes |
339 | Ok(( |
340 | BlockHeader { |
341 | last_block, |
342 | block_type: btype, |
343 | decompressed_size, |
344 | content_size, |
345 | }, |
346 | 3, |
347 | )) |
348 | } |
349 | |
350 | fn reset_buffer(&mut self) { |
351 | self.header_buffer[0] = 0; |
352 | self.header_buffer[1] = 0; |
353 | self.header_buffer[2] = 0; |
354 | } |
355 | |
356 | fn is_last(&self) -> bool { |
357 | self.header_buffer[0] & 0x1 == 1 |
358 | } |
359 | |
360 | fn block_type(&self) -> Result<BlockType, BlockTypeError> { |
361 | let t = (self.header_buffer[0] >> 1) & 0x3; |
362 | match t { |
363 | 0 => Ok(BlockType::Raw), |
364 | 1 => Ok(BlockType::RLE), |
365 | 2 => Ok(BlockType::Compressed), |
366 | 3 => Ok(BlockType::Reserved), |
367 | other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }), |
368 | } |
369 | } |
370 | |
371 | fn block_content_size(&self) -> Result<u32, BlockSizeError> { |
372 | let val = self.block_content_size_unchecked(); |
373 | if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE { |
374 | Err(BlockSizeError::BlockSizeTooLarge { size: val }) |
375 | } else { |
376 | Ok(val) |
377 | } |
378 | } |
379 | |
380 | fn block_content_size_unchecked(&self) -> u32 { |
381 | u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit |
382 | | (u32::from(self.header_buffer[1]) << 5) |
383 | | (u32::from(self.header_buffer[2]) << 13) |
384 | } |
385 | } |
386 | |