block_decoder.rs source code [crates/ruzstd/src/decoding/block_decoder.rs]

1	use super::super::blocks::block::BlockHeader;
2	use super::super::blocks::block::BlockType;
3	use super::super::blocks::literals_section::LiteralsSection;
4	use super::super::blocks::literals_section::LiteralsSectionType;
5	use super::super::blocks::sequence_section::SequencesHeader;
6	use super::literals_section_decoder::{decode_literals, DecompressLiteralsError};
7	use super::sequence_execution::ExecuteSequencesError;
8	use super::sequence_section_decoder::decode_sequences;
9	use super::sequence_section_decoder::DecodeSequenceError;
10	use crate::blocks::literals_section::LiteralsSectionParseError;
11	use crate::blocks::sequence_section::SequencesHeaderParseError;
12	use crate::decoding::scratch::DecoderScratch;
13	use crate::decoding::sequence_execution::execute_sequences;
14	use crate::io::{self, Read};
15
16	pub struct BlockDecoder {
17	header_buffer: [u8; `3`],
18	internal_state: DecoderState,
19	}
20
21	enum DecoderState {
22	ReadyToDecodeNextHeader,
23	ReadyToDecodeNextBody,
24	#[allow(dead_code)]
25	Failed, //TODO put "self.internal_state = DecoderState::Failed;" everywhere an unresolvable error occurs
26	}
27
28	#[derive(Debug)]
29	#[non_exhaustive]
30	pub enum BlockHeaderReadError {
31	ReadError(io::Error),
32	FoundReservedBlock,
33	BlockTypeError(BlockTypeError),
34	BlockSizeError(BlockSizeError),
35	}
36
37	#[cfg(feature = "std")]
38	impl std::error::Error for BlockHeaderReadError {
39	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
40	match self {
41	BlockHeaderReadError::ReadError(source: &Error) => Some(source),
42	BlockHeaderReadError::BlockTypeError(source: &BlockTypeError) => Some(source),
43	BlockHeaderReadError::BlockSizeError(source: &BlockSizeError) => Some(source),
44	BlockHeaderReadError::FoundReservedBlock => None,
45	}
46	}
47	}
48
49	impl ::core::fmt::Display for BlockHeaderReadError {
50	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> ::core::fmt::Result {
51	match self {
52	BlockHeaderReadError::ReadError(_) => write!(f, "Error while reading the block header"),
53	BlockHeaderReadError::FoundReservedBlock => write!(
54	f,
55	"Reserved block occured. This is considered corruption by the documentation"
56	),
57	BlockHeaderReadError::BlockTypeError(e: &BlockTypeError) => write!(f, "Error getting block type: {}", e),
58	BlockHeaderReadError::BlockSizeError(e: &BlockSizeError) => {
59	write!(f, "Error getting block content size: {}", e)
60	}
61	}
62	}
63	}
64
65	impl From<io::Error> for BlockHeaderReadError {
66	fn from(val: io::Error) -> Self {
67	Self::ReadError(val)
68	}
69	}
70
71	impl From<BlockTypeError> for BlockHeaderReadError {
72	fn from(val: BlockTypeError) -> Self {
73	Self::BlockTypeError(val)
74	}
75	}
76
77	impl From<BlockSizeError> for BlockHeaderReadError {
78	fn from(val: BlockSizeError) -> Self {
79	Self::BlockSizeError(val)
80	}
81	}
82
83	#[derive(Debug)]
84	#[non_exhaustive]
85	pub enum BlockTypeError {
86	InvalidBlocktypeNumber { num: u8 },
87	}
88
89	#[cfg(feature = "std")]
90	impl std::error::Error for BlockTypeError {}
91
92	impl core::fmt::Display for BlockTypeError {
93	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
94	match self {
95	BlockTypeError::InvalidBlocktypeNumber { num: &u8 } => {
96	write!(f,
97	"Invalid Blocktype number. Is: {} Should be one of: 0, 1, 2, 3 (3 is reserved though",
98	num,
99	)
100	}
101	}
102	}
103	}
104
105	#[derive(Debug)]
106	#[non_exhaustive]
107	pub enum BlockSizeError {
108	BlockSizeTooLarge { size: u32 },
109	}
110
111	#[cfg(feature = "std")]
112	impl std::error::Error for BlockSizeError {}
113
114	impl core::fmt::Display for BlockSizeError {
115	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
116	match self {
117	BlockSizeError::BlockSizeTooLarge { size: &u32 } => {
118	write!(
119	f,
120	"Blocksize was bigger than the absolute maximum {} (128kb). Is: {}",
121	ABSOLUTE_MAXIMUM_BLOCK_SIZE, size,
122	)
123	}
124	}
125	}
126	}
127
128	#[derive(Debug)]
129	#[non_exhaustive]
130	pub enum DecompressBlockError {
131	BlockContentReadError(io::Error),
132	MalformedSectionHeader {
133	expected_len: usize,
134	remaining_bytes: usize,
135	},
136	DecompressLiteralsError(DecompressLiteralsError),
137	LiteralsSectionParseError(LiteralsSectionParseError),
138	SequencesHeaderParseError(SequencesHeaderParseError),
139	DecodeSequenceError(DecodeSequenceError),
140	ExecuteSequencesError(ExecuteSequencesError),
141	}
142
143	#[cfg(feature = "std")]
144	impl std::error::Error for DecompressBlockError {
145	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
146	match self {
147	DecompressBlockError::BlockContentReadError(source: &Error) => Some(source),
148	DecompressBlockError::DecompressLiteralsError(source: &DecompressLiteralsError) => Some(source),
149	DecompressBlockError::LiteralsSectionParseError(source: &LiteralsSectionParseError) => Some(source),
150	DecompressBlockError::SequencesHeaderParseError(source: &SequencesHeaderParseError) => Some(source),
151	DecompressBlockError::DecodeSequenceError(source: &DecodeSequenceError) => Some(source),
152	DecompressBlockError::ExecuteSequencesError(source: &ExecuteSequencesError) => Some(source),
153	_ => None,
154	}
155	}
156	}
157
158	impl core::fmt::Display for DecompressBlockError {
159	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
160	match self {
161	DecompressBlockError::BlockContentReadError(e: &Error) => {
162	write!(f, "Error while reading the block content: {}", e)
163	}
164	DecompressBlockError::MalformedSectionHeader {
165	expected_len: &usize,
166	remaining_bytes: &usize,
167	} => {
168	write!(f,
169	"Malformed section header. Says literals would be this long: {} but there are only {} bytes left",
170	expected_len,
171	remaining_bytes,
172	)
173	}
174	DecompressBlockError::DecompressLiteralsError(e: &DecompressLiteralsError) => write!(f, "{:?}", e),
175	DecompressBlockError::LiteralsSectionParseError(e: &LiteralsSectionParseError) => write!(f, "{:?}", e),
176	DecompressBlockError::SequencesHeaderParseError(e: &SequencesHeaderParseError) => write!(f, "{:?}", e),
177	DecompressBlockError::DecodeSequenceError(e: &DecodeSequenceError) => write!(f, "{:?}", e),
178	DecompressBlockError::ExecuteSequencesError(e: &ExecuteSequencesError) => write!(f, "{:?}", e),
179	}
180	}
181	}
182
183	impl From<io::Error> for DecompressBlockError {
184	fn from(val: io::Error) -> Self {
185	Self::BlockContentReadError(val)
186	}
187	}
188
189	impl From<DecompressLiteralsError> for DecompressBlockError {
190	fn from(val: DecompressLiteralsError) -> Self {
191	Self::DecompressLiteralsError(val)
192	}
193	}
194
195	impl From<LiteralsSectionParseError> for DecompressBlockError {
196	fn from(val: LiteralsSectionParseError) -> Self {
197	Self::LiteralsSectionParseError(val)
198	}
199	}
200
201	impl From<SequencesHeaderParseError> for DecompressBlockError {
202	fn from(val: SequencesHeaderParseError) -> Self {
203	Self::SequencesHeaderParseError(val)
204	}
205	}
206
207	impl From<DecodeSequenceError> for DecompressBlockError {
208	fn from(val: DecodeSequenceError) -> Self {
209	Self::DecodeSequenceError(val)
210	}
211	}
212
213	impl From<ExecuteSequencesError> for DecompressBlockError {
214	fn from(val: ExecuteSequencesError) -> Self {
215	Self::ExecuteSequencesError(val)
216	}
217	}
218
219	#[derive(Debug)]
220	#[non_exhaustive]
221	pub enum DecodeBlockContentError {
222	DecoderStateIsFailed,
223	ExpectedHeaderOfPreviousBlock,
224	ReadError { step: BlockType, source: io::Error },
225	DecompressBlockError(DecompressBlockError),
226	}
227
228	#[cfg(feature = "std")]
229	impl std::error::Error for DecodeBlockContentError {
230	fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
231	match self {
232	DecodeBlockContentError::ReadError { step: _, source: &Error } => Some(source),
233	DecodeBlockContentError::DecompressBlockError(source: &DecompressBlockError) => Some(source),
234	_ => None,
235	}
236	}
237	}
238
239	impl core::fmt::Display for DecodeBlockContentError {
240	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
241	match self {
242	DecodeBlockContentError::DecoderStateIsFailed => {
243	write!(
244	f,
245	"Can't decode next block if failed along the way. Results will be nonsense",
246	)
247	}
248	DecodeBlockContentError::ExpectedHeaderOfPreviousBlock => {
249	write!(f,
250	"Can't decode next block body, while expecting to decode the header of the previous block. Results will be nonsense",
251	)
252	}
253	DecodeBlockContentError::ReadError { step: &BlockType, source: &Error } => {
254	write!(f, "Error while reading bytes for {}: {}", step, source,)
255	}
256	DecodeBlockContentError::DecompressBlockError(e: &DecompressBlockError) => write!(f, "{:?}", e),
257	}
258	}
259	}
260
261	impl From<DecompressBlockError> for DecodeBlockContentError {
262	fn from(val: DecompressBlockError) -> Self {
263	Self::DecompressBlockError(val)
264	}
265	}
266
267	/// Create a new [BlockDecoder].
268	pub fn new() -> BlockDecoder {
269	BlockDecoder {
270	internal_state: DecoderState::ReadyToDecodeNextHeader,
271	header_buffer: [`0u8`; `3`],
272	}
273	}
274
275	const ABSOLUTE_MAXIMUM_BLOCK_SIZE: u32 = `128` * `1024`;
276
277	impl BlockDecoder {
278	pub fn decode_block_content(
279	&mut self,
280	header: &BlockHeader,
281	workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
282	mut source: impl Read,
283	) -> Result<u64, DecodeBlockContentError> {
284	match self.internal_state {
285	DecoderState::ReadyToDecodeNextBody => { / Happy :) / }
286	DecoderState::Failed => return Err(DecodeBlockContentError::DecoderStateIsFailed),
287	DecoderState::ReadyToDecodeNextHeader => {
288	return Err(DecodeBlockContentError::ExpectedHeaderOfPreviousBlock)
289	}
290	}
291
292	let block_type = header.block_type;
293	match block_type {
294	BlockType::RLE => {
295	const BATCH_SIZE: usize = `512`;
296	let mut buf = [`0u8`; BATCH_SIZE];
297	let full_reads = header.decompressed_size / BATCH_SIZE as u32;
298	let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
299
300	source.read_exact(&mut buf[`0`..`1`]).map_err(\|err\| {
301	DecodeBlockContentError::ReadError {
302	step: block_type,
303	source: err,
304	}
305	})?;
306	self.internal_state = DecoderState::ReadyToDecodeNextHeader;
307
308	for i in `1`..BATCH_SIZE {
309	buf[i] = buf[`0`];
310	}
311
312	for _ in `0`..full_reads {
313	workspace.buffer.push(&buf[..]);
314	}
315	let smaller = &mut buf[..single_read_size as usize];
316	workspace.buffer.push(smaller);
317
318	Ok(`1`)
319	}
320	BlockType::Raw => {
321	const BATCH_SIZE: usize = `128` * `1024`;
322	let mut buf = [`0u8`; BATCH_SIZE];
323	let full_reads = header.decompressed_size / BATCH_SIZE as u32;
324	let single_read_size = header.decompressed_size % BATCH_SIZE as u32;
325
326	for _ in `0`..full_reads {
327	source.read_exact(&mut buf[..]).map_err(\|err\| {
328	DecodeBlockContentError::ReadError {
329	step: block_type,
330	source: err,
331	}
332	})?;
333	workspace.buffer.push(&buf[..]);
334	}
335
336	let smaller = &mut buf[..single_read_size as usize];
337	source
338	.read_exact(smaller)
339	.map_err(\|err\| DecodeBlockContentError::ReadError {
340	step: block_type,
341	source: err,
342	})?;
343	workspace.buffer.push(smaller);
344
345	self.internal_state = DecoderState::ReadyToDecodeNextHeader;
346	Ok(u64::from(header.decompressed_size))
347	}
348
349	BlockType::Reserved => {
350	panic!("How did you even get this. The decoder should error out if it detects a reserved-type block");
351	}
352
353	BlockType::Compressed => {
354	self.decompress_block(header, workspace, source)?;
355
356	self.internal_state = DecoderState::ReadyToDecodeNextHeader;
357	Ok(u64::from(header.content_size))
358	}
359	}
360	}
361
362	fn decompress_block(
363	&mut self,
364	header: &BlockHeader,
365	workspace: &mut DecoderScratch, //reuse this as often as possible. Not only if the trees are reused but also reuse the allocations when building new trees
366	mut source: impl Read,
367	) -> Result<(), DecompressBlockError> {
368	workspace
369	.block_content_buffer
370	.resize(header.content_size as usize, `0`);
371
372	source.read_exact(workspace.block_content_buffer.as_mut_slice())?;
373	let raw = workspace.block_content_buffer.as_slice();
374
375	let mut section = LiteralsSection::new();
376	let bytes_in_literals_header = section.parse_from_header(raw)?;
377	let raw = &raw[bytes_in_literals_header as usize..];
378	vprintln!(
379	"Found {} literalssection with regenerated size: {}, and compressed size: {:?}",
380	section.ls_type,
381	section.regenerated_size,
382	section.compressed_size
383	);
384
385	let upper_limit_for_literals = match section.compressed_size {
386	Some(x) => x as usize,
387	None => match section.ls_type {
388	LiteralsSectionType::RLE => `1`,
389	LiteralsSectionType::Raw => section.regenerated_size as usize,
390	_ => panic!("Bug in this library"),
391	},
392	};
393
394	if raw.len() < upper_limit_for_literals {
395	return Err(DecompressBlockError::MalformedSectionHeader {
396	expected_len: upper_limit_for_literals,
397	remaining_bytes: raw.len(),
398	});
399	}
400
401	let raw_literals = &raw[..upper_limit_for_literals];
402	vprintln!("Slice for literals: {}", raw_literals.len());
403
404	workspace.literals_buffer.clear(); //all literals of the previous block must have been used in the sequence execution anyways. just be defensive here
405	let bytes_used_in_literals_section = decode_literals(
406	&section,
407	&mut workspace.huf,
408	raw_literals,
409	&mut workspace.literals_buffer,
410	)?;
411	assert!(
412	section.regenerated_size == workspace.literals_buffer.len() as u32,
413	"Wrong number of literals: {}, Should have been: {}",
414	workspace.literals_buffer.len(),
415	section.regenerated_size
416	);
417	assert!(bytes_used_in_literals_section == upper_limit_for_literals as u32);
418
419	let raw = &raw[upper_limit_for_literals..];
420	vprintln!("Slice for sequences with headers: {}", raw.len());
421
422	let mut seq_section = SequencesHeader::new();
423	let bytes_in_sequence_header = seq_section.parse_from_header(raw)?;
424	let raw = &raw[bytes_in_sequence_header as usize..];
425	vprintln!(
426	"Found sequencessection with sequences: {} and size: {}",
427	seq_section.num_sequences,
428	raw.len()
429	);
430
431	assert!(
432	u32::from(bytes_in_literals_header)
433	+ bytes_used_in_literals_section
434	+ u32::from(bytes_in_sequence_header)
435	+ raw.len() as u32
436	== header.content_size
437	);
438	vprintln!("Slice for sequences: {}", raw.len());
439
440	if seq_section.num_sequences != `0` {
441	decode_sequences(
442	&seq_section,
443	raw,
444	&mut workspace.fse,
445	&mut workspace.sequences,
446	)?;
447	vprintln!("Executing sequences");
448	execute_sequences(workspace)?;
449	} else {
450	workspace.buffer.push(&workspace.literals_buffer);
451	workspace.sequences.clear();
452	}
453
454	Ok(())
455	}
456
457	pub fn read_block_header(
458	&mut self,
459	mut r: impl Read,
460	) -> Result<(BlockHeader, u8), BlockHeaderReadError> {
461	//match self.internal_state {
462	// DecoderState::ReadyToDecodeNextHeader => {/ Happy :) /},
463	// DecoderState::Failed => return Err(format!("Cant decode next block if failed along the way. Results will be nonsense")),
464	// DecoderState::ReadyToDecodeNextBody => return Err(format!("Cant decode next block header, while expecting to decode the body of the previous block. Results will be nonsense")),
465	//}
466
467	r.read_exact(&mut self.header_buffer[`0`..`3`])?;
468
469	let btype = self.block_type()?;
470	if let BlockType::Reserved = btype {
471	return Err(BlockHeaderReadError::FoundReservedBlock);
472	}
473
474	let block_size = self.block_content_size()?;
475	let decompressed_size = match btype {
476	BlockType::Raw => block_size,
477	BlockType::RLE => block_size,
478	BlockType::Reserved => `0`, //should be caught above, this is an error state
479	BlockType::Compressed => `0`, //unknown but will be smaller than 128kb (or window_size if that is smaller than 128kb)
480	};
481	let content_size = match btype {
482	BlockType::Raw => block_size,
483	BlockType::Compressed => block_size,
484	BlockType::RLE => `1`,
485	BlockType::Reserved => `0`, //should be caught above, this is an error state
486	};
487
488	let last_block = self.is_last();
489
490	self.reset_buffer();
491	self.internal_state = DecoderState::ReadyToDecodeNextBody;
492
493	//just return 3. Blockheaders always take 3 bytes
494	Ok((
495	BlockHeader {
496	last_block,
497	block_type: btype,
498	decompressed_size,
499	content_size,
500	},
501	`3`,
502	))
503	}
504
505	fn reset_buffer(&mut self) {
506	self.header_buffer[`0`] = `0`;
507	self.header_buffer[`1`] = `0`;
508	self.header_buffer[`2`] = `0`;
509	}
510
511	fn is_last(&self) -> bool {
512	self.header_buffer[`0`] & `0x1` == `1`
513	}
514
515	fn block_type(&self) -> Result<BlockType, BlockTypeError> {
516	let t = (self.header_buffer[`0`] >> `1`) & `0x3`;
517	match t {
518	`0` => Ok(BlockType::Raw),
519	`1` => Ok(BlockType::RLE),
520	`2` => Ok(BlockType::Compressed),
521	`3` => Ok(BlockType::Reserved),
522	other => Err(BlockTypeError::InvalidBlocktypeNumber { num: other }),
523	}
524	}
525
526	fn block_content_size(&self) -> Result<u32, BlockSizeError> {
527	let val = self.block_content_size_unchecked();
528	if val > ABSOLUTE_MAXIMUM_BLOCK_SIZE {
529	Err(BlockSizeError::BlockSizeTooLarge { size: val })
530	} else {
531	Ok(val)
532	}
533	}
534
535	fn block_content_size_unchecked(&self) -> u32 {
536	u32::from(self.header_buffer[`0`] >> `3`) //push out type and last_block flags. Retain 5 bit
537	\| (u32::from(self.header_buffer[`1`]) << `5`)
538	\| (u32::from(self.header_buffer[`2`]) << `13`)
539	}
540	}
541