1use super::super::blocks::block::BlockHeader;
2use super::super::blocks::block::BlockType;
3use super::super::blocks::literals_section::LiteralsSection;
4use super::super::blocks::literals_section::LiteralsSectionType;
5use super::super::blocks::sequence_section::SequencesHeader;
6use super::literals_section_decoder::{decode_literals, DecompressLiteralsError};
7use super::sequence_execution::ExecuteSequencesError;
8use super::sequence_section_decoder::decode_sequences;
9use super::sequence_section_decoder::DecodeSequenceError;
10use crate::blocks::literals_section::LiteralsSectionParseError;
11use crate::blocks::sequence_section::SequencesHeaderParseError;
12use crate::decoding::scratch::DecoderScratch;
13use crate::decoding::sequence_execution::execute_sequences;
14use crate::io::{self, Read};
15
16pub struct BlockDecoder {
17 header_buffer: [u8; 3],
18 internal_state: DecoderState,
19}
20
21enum DecoderState {
22 ReadyToDecodeNextHeader,
23 ReadyToDecodeNextBody,
24 #[allow(dead_code)]
25 Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs
26}
27
28#[derive(Debug, derive_more::Display, derive_more::From)]
29#[cfg_attr(feature = "std", derive(derive_more::Error))]
30#[non_exhaustive]
31pub enum BlockHeaderReadError {
32 #[display(fmt = "Error while reading the block header")]
33 #[from]
34 ReadError(io::Error),
35 #[display(fmt = "Reserved block occured. This is considered corruption by the documentation")]
36 FoundReservedBlock,
37 #[display(fmt = "Error getting block type: {_0}")]
38 #[from]
39 BlockTypeError(BlockTypeError),
40 #[display(fmt = "Error getting block content size: {_0}")]
41 #[from]
42 BlockSizeError(BlockSizeError),
43}
44
45#[derive(Debug, derive_more::Display)]
46#[cfg_attr(feature = "std", derive(derive_more::Error))]
47#[non_exhaustive]
48pub enum BlockTypeError {
49 #[display(
50 fmt = "Invalid Blocktype number. Is: {num} Should be one of: 0, 1, 2, 3 (3 is reserved though"
51 )]
52 InvalidBlocktypeNumber { num: u8 },
53}
54
55#[derive(Debug, derive_more::Display)]
56#[cfg_attr(feature = "std", derive(derive_more::Error))]
57#[non_exhaustive]
58pub enum BlockSizeError {
59 #[display(
60 fmt = "Blocksize was bigger than the absolute maximum {ABSOLUTE_MAXIMUM_BLOCK_SIZE} (128kb). Is: {size}"
61 )]
62 BlockSizeTooLarge { size: u32 },
63}
64
65#[derive(Debug, derive_more::Display, derive_more::From)]
66#[cfg_attr(feature = "std", derive(derive_more::Error))]
67#[non_exhaustive]
68pub enum DecompressBlockError {
69 #[display(fmt = "Error while reading the block content: {_0}")]
70 #[from]
71 BlockContentReadError(io::Error),
72 #[display(
73 fmt = "Malformed section header. Says literals would be this long: {expected_len} but there are only {remaining_bytes} bytes left"
74 )]
75 MalformedSectionHeader {
76 expected_len: usize,
77 remaining_bytes: usize,
78 },
79 #[display(fmt = "{_0:?}")]
80 #[from]
81 DecompressLiteralsError(DecompressLiteralsError),
82 #[display(fmt = "{_0:?}")]
83 #[from]
84 LiteralsSectionParseError(LiteralsSectionParseError),
85 #[display(fmt = "{_0:?}")]
86 #[from]
87 SequencesHeaderParseError(SequencesHeaderParseError),
88 #[display(fmt = "{_0:?}")]
89 #[from]
90 DecodeSequenceError(DecodeSequenceError),
91 #[display(fmt = "{_0:?}")]
92 #[from]
93 ExecuteSequencesError(ExecuteSequencesError),
94}
95
96#[derive(Debug, derive_more::Display, derive_more::From)]
97#[cfg_attr(feature = "std", derive(derive_more::Error))]
98#[non_exhaustive]
99pub enum DecodeBlockContentError {
100 #[display(fmt = "Can't decode next block if failed along the way. Results will be nonsense")]
101 DecoderStateIsFailed,
102 #[display(
103 fmt = "Cant decode next block body, while expecting to decode the header of the previous block. Results will be nonsense"
104 )]
105 ExpectedHeaderOfPreviousBlock,
106 #[display(fmt = "Error while reading bytes for {step}: {source}")]
107 ReadError { step: BlockType, source: io::Error },
108 #[display(fmt = "{_0:?}")]
109 #[from]
110 DecompressBlockError(DecompressBlockError),
111}
112
113pub fn new() -> BlockDecoder {
114 BlockDecoder {
115 internal_state: DecoderState::ReadyToDecodeNextHeader,
116 header_buffer: [0u8; 3],
117 }
118}
119
120const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = 128 * 1024;
121
122impl BlockDecoder {
123 pub fn decode_block_content(
124 &mut self,
125 header: &BlockHeader,
126 workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
127 mut source: impl Read,
128 ) -> Result<u64, DecodeBlockContentError> {
129 match self.internal_state {
130 DecoderState::ReadyToDecodeNextBody => { /* Happy :) */ }
131 DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
132 DecoderState::ReadyToDecodeNextHeader => {
133 return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
134 }
135 }
136
137 let block_type = header.block_type;
138 match block_type {
139 BlockType::RLE => {
140 const BATCH_SIZE: usize = 512;
141 let mut buf = [0u8; BATCH_SIZE];
142 let full_reads = header.decompressed_size / BATCH_SIZE as u32;
143 let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
144
145 source.read_exact(&mut buf[0..1]).map_err(|err| {
146 DecodeBlockContentError::ReadError {
147 step: block_type,
148 source: err,
149 }
150 })?;
151 self.internal_state = DecoderState::ReadyToDecodeNextHeader;
152
153 for i in 1..BATCH_SIZE {
154 buf[i] = buf[0];
155 }
156
157 for _ in 0..full_reads {
158 workspace.buffer.push(&buf[..]);
159 }
160 let smaller = &mut buf[..single_read_size as usize];
161 workspace.buffer.push(smaller);
162
163 Ok(1)
164 }
165 BlockType::Raw => {
166 const BATCH_SIZE: usize = 128 * 1024;
167 let mut buf = [0u8; BATCH_SIZE];
168 let full_reads = header.decompressed_size / BATCH_SIZE as u32;
169 let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
170
171 for _ in 0..full_reads {
172 source.read_exact(&mut buf[..]).map_err(|err| {
173 DecodeBlockContentError::ReadError {
174 step: block_type,
175 source: err,
176 }
177 })?;
178 workspace.buffer.push(&buf[..]);
179 }
180
181 let smaller = &mut buf[..single_read_size as usize];
182 source
183 .read_exact(smaller)
184 .map_err(|err| DecodeBlockContentError::ReadError {
185 step: block_type,
186 source: err,
187 })?;
188 workspace.buffer.push(smaller);
189
190 self.internal_state = DecoderState::ReadyToDecodeNextHeader;
191 Ok(u64::from(header.decompressed_size))
192 }
193
194 BlockType::Reserved => {
195 panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
196 }
197
198 BlockType::Compressed => {
199 self.decompress_block(header, workspace, source)?;
200
201 self.internal_state = DecoderState::ReadyToDecodeNextHeader;
202 Ok(u64::from(header.content_size))
203 }
204 }
205 }
206
207 fn decompress_block(
208 &mut self,
209 header: &BlockHeader,
210 workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
211 mut source: impl Read,
212 ) -> Result<(), DecompressBlockError> {
213 workspace
214 .block_content_buffer
215 .resize(header.content_size as usize, 0);
216
217 source.read_exact(workspace.block_content_buffer.as_mut_slice())?;
218 let raw = workspace.block_content_buffer.as_slice();
219
220 let mut section = LiteralsSection::new();
221 let bytes_in_literals_header = section.parse_from_header(raw)?;
222 let raw = &raw[bytes_in_literals_header as usize..];
223 vprintln!(
224 "Found {} literalssection with regenerated size: {}, and compressed size: {:?}",
225 section.ls_type,
226 section.regenerated_size,
227 section.compressed_size
228 );
229
230 let upper_limit_for_literals = match section.compressed_size {
231 Some(x) => x as usize,
232 None => match section.ls_type {
233 LiteralsSectionType::RLE => 1,
234 LiteralsSectionType::Raw => section.regenerated_size as usize,
235 _ => panic!("Bug in this library"),
236 },
237 };
238
239 if raw.len() < upper_limit_for_literals {
240 return Err(DecompressBlockError::MalformedSectionHeader {
241 expected_len: upper_limit_for_literals,
242 remaining_bytes: raw.len(),
243 });
244 }
245
246 let raw_literals = &raw[..upper_limit_for_literals];
247 vprintln!("Slice for literals: {}", raw_literals.len());
248
249 workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here
250 let bytes_used_in_literals_section = decode_literals(
251 &section,
252 &mut workspace.huf,
253 raw_literals,
254 &mut workspace.literals_buffer,
255 )?;
256 assert!(
257 section.regenerated_size == workspace.literals_buffer.len() as u32,
258 "Wrong number of literals: {}, Should have been: {}",
259 workspace.literals_buffer.len(),
260 section.regenerated_size
261 );
262 assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32);
263
264 let raw = &raw[upper_limit_for_literals..];
265 vprintln!("Slice for sequences with headers: {}", raw.len());
266
267 let mut seq_section = SequencesHeader::new();
268 let bytes_in_sequence_header = seq_section.parse_from_header(raw)?;
269 let raw = &raw[bytes_in_sequence_header as usize..];
270 vprintln!(
271 "Found sequencessection with sequences: {} and size: {}",
272 seq_section.num_sequences,
273 raw.len()
274 );
275
276 assert!(
277 u32::from(bytes_in_literals_header)
278 + bytes_used_in_literals_section
279 + u32::from(bytes_in_sequence_header)
280 + raw.len() as u32
281 == header.content_size
282 );
283 vprintln!("Slice for sequences: {}", raw.len());
284
285 if seq_section.num_sequences != 0 {
286 decode_sequences(
287 &seq_section,
288 raw,
289 &mut workspace.fse,
290 &mut workspace.sequences,
291 )?;
292 vprintln!("Executing sequences");
293 execute_sequences(workspace)?;
294 } else {
295 workspace.buffer.push(&workspace.literals_buffer);
296 workspace.sequences.clear();
297 }
298
299 Ok(())
300 }
301
302 pub fn read_block_header(
303 &mut self,
304 mut r: impl Read,
305 ) -> Result<(BlockHeader, u8), BlockHeaderReadError> {
306 //match self.internal_state {
307 // DecoderState::ReadyToDecodeNextHeader => {/* Happy :) */},
308 // DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")),
309 // DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")),
310 //}
311
312 r.read_exact(&mut self.header_buffer[0..3])?;
313
314 let btype = self.block_type()?;
315 if let BlockType::Reserved = btype {
316 return Err(BlockHeaderReadError::FoundReservedBlock);
317 }
318
319 let block_size = self.block_content_size()?;
320 let decompressed_size = match btype {
321 BlockType::Raw => block_size,
322 BlockType::RLE => block_size,
323 BlockType::Reserved => 0, //should be catched above, this is an error state
324 BlockType::Compressed => 0, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb)
325 };
326 let content_size = match btype {
327 BlockType::Raw => block_size,
328 BlockType::Compressed => block_size,
329 BlockType::RLE => 1,
330 BlockType::Reserved => 0, //should be catched above, this is an error state
331 };
332
333 let last_block = self.is_last();
334
335 self.reset_buffer();
336 self.internal_state = DecoderState::ReadyToDecodeNextBody;
337
338 //just return 3. Blockheaders always take 3 bytes
339 Ok((
340 BlockHeader {
341 last_block,
342 block_type: btype,
343 decompressed_size,
344 content_size,
345 },
346 3,
347 ))
348 }
349
350 fn reset_buffer(&mut self) {
351 self.header_buffer[0] = 0;
352 self.header_buffer[1] = 0;
353 self.header_buffer[2] = 0;
354 }
355
356 fn is_last(&self) -> bool {
357 self.header_buffer[0] & 0x1 == 1
358 }
359
360 fn block_type(&self) -> Result<BlockType, BlockTypeError> {
361 let t = (self.header_buffer[0] >> 1) & 0x3;
362 match t {
363 0 => Ok(BlockType::Raw),
364 1 => Ok(BlockType::RLE),
365 2 => Ok(BlockType::Compressed),
366 3 => Ok(BlockType::Reserved),
367 other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }),
368 }
369 }
370
371 fn block_content_size(&self) -> Result<u32, BlockSizeError> {
372 let val = self.block_content_size_unchecked();
373 if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE {
374 Err(BlockSizeError::BlockSizeTooLarge { size: val })
375 } else {
376 Ok(val)
377 }
378 }
379
380 fn block_content_size_unchecked(&self) -> u32 {
381 u32::from(self.header_buffer[0] >> 3) //push out type and last_block flags. Retain 5 bit
382 | (u32::from(self.header_buffer[1]) << 5)
383 | (u32::from(self.header_buffer[2]) << 13)
384 }
385}
386