decoder.rs source code [crates/zune-inflate-0.2.54/src/decoder.rs]

1	#![allow(unused_imports)]
2
3	use alloc::vec::Vec;
4	use alloc::{format, vec};
5
6	use crate::bitstream::BitStreamReader;
7	use crate::constants::{
8	DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN, DEFLATE_BLOCKTYPE_RESERVED, DEFLATE_BLOCKTYPE_STATIC,
9	DEFLATE_BLOCKTYPE_UNCOMPRESSED, DEFLATE_MAX_CODEWORD_LENGTH,
10	DEFLATE_MAX_LITLEN_CODEWORD_LENGTH, DEFLATE_MAX_NUM_SYMS, DEFLATE_MAX_OFFSET_CODEWORD_LENGTH,
11	DEFLATE_MAX_PRE_CODEWORD_LEN, DEFLATE_NUM_LITLEN_SYMS, DEFLATE_NUM_OFFSET_SYMS,
12	DEFLATE_NUM_PRECODE_SYMS, DEFLATE_PRECODE_LENS_PERMUTATION, DELFATE_MAX_LENS_OVERRUN,
13	FASTCOPY_BYTES, FASTLOOP_MAX_BYTES_WRITTEN, HUFFDEC_END_OF_BLOCK, HUFFDEC_EXCEPTIONAL,
14	HUFFDEC_LITERAL, HUFFDEC_SUITABLE_POINTER, LITLEN_DECODE_BITS, LITLEN_DECODE_RESULTS,
15	LITLEN_ENOUGH, LITLEN_TABLE_BITS, OFFSET_DECODE_RESULTS, OFFSET_ENOUGH, OFFSET_TABLEBITS,
16	PRECODE_DECODE_RESULTS, PRECODE_ENOUGH, PRECODE_TABLE_BITS
17	};
18	use crate::errors::{DecodeErrorStatus, InflateDecodeErrors};
19	#[cfg(feature = "gzip")]
20	use crate::gzip_constants::{
21	GZIP_CM_DEFLATE, GZIP_FCOMMENT, GZIP_FEXTRA, GZIP_FHCRC, GZIP_FNAME, GZIP_FOOTER_SIZE,
22	GZIP_FRESERVED, GZIP_ID1, GZIP_ID2
23	};
24	use crate::utils::{copy_rep_matches, fixed_copy_within, make_decode_table_entry};
25
26	struct DeflateHeaderTables
27	{
28	litlen_decode_table: [u32; LITLEN_ENOUGH],
29	offset_decode_table: [u32; OFFSET_ENOUGH]
30	}
31
32	impl Default for DeflateHeaderTables
33	{
34	fn default() -> Self
35	{
36	DeflateHeaderTables {
37	litlen_decode_table: [`0`; LITLEN_ENOUGH],
38	offset_decode_table: [`0`; OFFSET_ENOUGH]
39	}
40	}
41	}
42
43	/// Options that can influence decompression
44	/// in Deflate/Zlib/Gzip
45	///
46	/// To use them, pass a customized options to
47	/// the deflate decoder.
48	#[derive(Copy, Clone)]
49	pub struct DeflateOptions
50	{
51	limit: usize,
52	confirm_checksum: bool,
53	size_hint: usize
54	}
55
56	impl Default for DeflateOptions
57	{
58	fn default() -> Self
59	{
60	DeflateOptions {
61	limit: `1` << `30`,
62	confirm_checksum: `true`,
63	size_hint: `37000`
64	}
65	}
66	}
67
68	impl DeflateOptions
69	{
70	/// Get deflate/zlib limit option
71	///
72	/// The decoder won't extend the inbuilt limit and will
73	/// return an error if the limit is exceeded
74	///
75	/// # Returns
76	/// The currently set limit of the instance
77	/// # Note
78	/// This is provided as a best effort, correctly quiting
79	/// is detrimental to speed and hence this should not be relied too much.
80	pub const fn get_limit(&self) -> usize
81	{
82	self.limit
83	}
84	/// Set a limit to the internal vector
85	/// used to store decoded zlib/deflate output.
86	///
87	/// # Arguments
88	/// limit: The new decompressor limit
89	/// # Returns
90	/// A modified version of DeflateDecoder
91	///
92	/// # Note
93	/// This is provided as a best effort, correctly quiting
94	/// is detrimental to speed and hence this should not be relied too much
95	#[must_use]
96	pub fn set_limit(mut self, limit: usize) -> Self
97	{
98	self.limit = limit;
99	self
100	}
101
102	/// Get whether the decoder will confirm a checksum
103	/// after decoding
104	pub const fn get_confirm_checksum(&self) -> bool
105	{
106	self.confirm_checksum
107	}
108	/// Set whether the decoder should confirm a checksum
109	/// after decoding
110	///
111	/// Note, you should definitely confirm your checksum, use
112	/// this with caution, otherwise data returned may be corrupt
113	///
114	/// # Arguments
115	/// - yes: When true, the decoder will confirm checksum
116	/// when false, the decoder will skip checksum verification
117	/// # Notes
118	/// This does not have an influence for deflate decoding as
119	/// it does not have a checksum
120	pub fn set_confirm_checksum(mut self, yes: bool) -> Self
121	{
122	self.confirm_checksum = yes;
123	self
124	}
125
126	/// Get the default set size hint for the decompressor
127	///
128	/// The decompressor initializes the internal storage for decompressed bytes
129	/// with this size and will reallocate the vec if the decompressed size becomes bigger
130	/// than this, but when the user currently knows how big the output will be, can be used
131	/// to prevent unnecessary re-allocations
132	pub const fn get_size_hint(&self) -> usize
133	{
134	self.size_hint
135	}
136	/// Set the size hint for the decompressor
137	///
138	/// This can be used to prevent multiple re-allocations
139	#[must_use]
140	pub const fn set_size_hint(mut self, hint: usize) -> Self
141	{
142	self.size_hint = hint;
143	self
144	}
145	}
146
147	/// A deflate decoder instance.
148	///
149	/// The decoder manages output buffer as opposed to requiring the caller to provide a pre-allocated buffer
150	/// it tracks number of bytes written and on successfully reaching the
151	/// end of the block, will return a vector with exactly
152	/// the number of decompressed bytes.
153	///
154	/// This means that it may use up huge amounts of memory if not checked, but
155	/// there are [options] that can prevent that
156	///
157	/// [options]: DeflateOptions
158	pub struct DeflateDecoder<'a>
159	{
160	data: &'a [u8],
161	position: usize,
162	stream: BitStreamReader<'a>,
163	is_last_block: bool,
164	static_codes_loaded: bool,
165	deflate_header_tables: DeflateHeaderTables,
166	options: DeflateOptions
167	}
168
169	impl<'a> DeflateDecoder<'a>
170	{
171	/// Create a new decompressor that will read compressed
172	/// data from `data` and return a new vector containing new data
173	///
174	/// # Arguments
175	/// - `data`: The compressed data. Data can be of any type
176	/// gzip,zlib or raw deflate.
177	///
178	/// # Returns
179	/// A decoder instance which will pull compressed data from `data` to inflate the output output
180	///
181	/// # Note
182	///
183	/// The default output size limit is 1 GiB.
184	/// this is to protect the end user against ddos attacks as deflate does not specify it's
185	/// output size upfront
186	///
187	/// The checksum will be verified depending on the called function.
188	/// this only works for zlib and gzip since deflate does not have a checksum
189	///
190	/// These defaults can be overridden via [new_with_options()](Self::new_with_options).
191	pub fn new(data: &'a [u8]) -> DeflateDecoder<'a>
192	{
193	let options = DeflateOptions::default();
194
195	Self::new_with_options(data, options)
196	}
197	/// Create new decoder with specified options
198	///
199	/// This can be used to fine tune the decoder to the user's
200	/// needs.
201	///
202	///
203	/// # Arguments
204	/// - `data`: The compressed data. Data can be of any format i.e
205	/// gzip, zlib or raw deflate.
206	/// - `options` : A set of user defined options which tune how the decompressor
207	///
208	/// # Returns
209	/// A decoder instance which will pull compressed data from `data` to inflate output
210	///
211	/// # Example
212	/// ```no_run
213	/// use zune_inflate::{DeflateDecoder, DeflateOptions};
214	/// let data = [`37`];
215	/// let options = DeflateOptions::default()
216	/// .set_confirm_checksum(`true`) // confirm the checksum for zlib and gzip
217	/// .set_limit(`1000`); // how big I think the input will be
218	/// let mut decoder = DeflateDecoder::new_with_options(&data,options);
219	/// // do some stuff and then call decode
220	/// let data = decoder.decode_zlib();
221	///
222	/// ```
223	pub fn new_with_options(data: &'a [u8], options: DeflateOptions) -> DeflateDecoder<'a>
224	{
225	// create stream
226	DeflateDecoder {
227	data,
228	position: `0`,
229	stream: BitStreamReader::new(data),
230	is_last_block: `false`,
231	static_codes_loaded: `false`,
232	deflate_header_tables: DeflateHeaderTables::default(),
233	options
234	}
235	}
236	/// Decode zlib-encoded data returning the uncompressed in a `Vec<u8>`
237	/// or an error if something went wrong.
238	///
239	/// Bytes consumed will be from the data passed when the
240	/// `new` method was called.
241	///
242	/// # Arguments
243	/// - None
244	/// # Returns
245	/// Result type containing the decoded data.
246	///
247	/// - `Ok(Vec<u8>)`: Decoded vector containing the uncompressed bytes
248	/// - `Err(InflateDecodeErrors)`: Error that occurred during decoding
249	///
250	/// It's possible to recover bytes even after an error occurred, bytes up
251	/// to when error was encountered are stored in [InflateDecodeErrors]
252	///
253	///
254	/// # Note
255	/// This needs the `zlib` feature enabled to be available otherwise it's a
256	/// compile time error
257	///
258	/// [InflateDecodeErrors]:crate::errors::InflateDecodeErrors
259	///
260	#[cfg(feature = "zlib")]
261	pub fn decode_zlib(&mut self) -> Result<Vec<u8>, InflateDecodeErrors>
262	{
263	use crate::utils::calc_adler_hash;
264
265	if self.data.len()
266	< `2` / zlib header /
267	+ `4`
268	/ Deflate /
269	{
270	return Err(InflateDecodeErrors::new_with_error(
271	DecodeErrorStatus::InsufficientData
272	));
273	}
274
275	// Zlib flags
276	// See https://www.ietf.org/rfc/rfc1950.txt for
277	// the RFC
278	let cmf = self.data[`0`];
279	let flg = self.data[`1`];
280
281	let cm = cmf & `0xF`;
282	let cinfo = cmf >> `4`;
283
284	// let fcheck = flg & 0xF;
285	// let fdict = (flg >> 4) & 1;
286	// let flevel = flg >> 5;
287
288	// confirm we have the right deflate methods
289	if cm != `8`
290	{
291	if cm == `15`
292	{
293	return Err(InflateDecodeErrors::new_with_error(DecodeErrorStatus::Generic(
294	"CM of 15 is preserved by the standard,currently don't know how to handle it"
295	)));
296	}
297	return Err(InflateDecodeErrors::new_with_error(
298	DecodeErrorStatus::GenericStr(format!("Unknown zlib compression method {cm}"))
299	));
300	}
301	if cinfo > `7`
302	{
303	return Err(InflateDecodeErrors::new_with_error(
304	DecodeErrorStatus::GenericStr(format!(
305	"Unknown cinfo `{cinfo}` greater than 7, not allowed"
306	))
307	));
308	}
309	let flag_checks = (u16::from(cmf) * `256`) + u16::from(flg);
310
311	if flag_checks % `31` != `0`
312	{
313	return Err(InflateDecodeErrors::new_with_error(
314	DecodeErrorStatus::Generic("FCHECK integrity not preserved")
315	));
316	}
317
318	self.position = `2`;
319
320	let data = self.decode_deflate()?;
321
322	if self.options.confirm_checksum
323	{
324	// Get number of consumed bytes from the input
325	let out_pos = self.stream.get_position() + self.position + self.stream.over_read;
326
327	// read adler
328	if let Some(adler) = self.data.get(out_pos..out_pos + `4`)
329	{
330	let adler_bits: [u8; `4`] = adler.try_into().unwrap();
331
332	let adler32_expected = u32::from_be_bytes(adler_bits);
333
334	let adler32_found = calc_adler_hash(&data);
335
336	if adler32_expected != adler32_found
337	{
338	let err_msg =
339	DecodeErrorStatus::MismatchedAdler(adler32_expected, adler32_found);
340	let err = InflateDecodeErrors::new(err_msg, data);
341
342	return Err(err);
343	}
344	}
345	else
346	{
347	let err = InflateDecodeErrors::new(DecodeErrorStatus::InsufficientData, data);
348
349	return Err(err);
350	}
351	}
352
353	Ok(data)
354	}
355
356	/// Decode a gzip encoded data and return the uncompressed data in a
357	/// `Vec<u8>` or an error if something went wrong
358	///
359	/// Bytes consumed will be from the data passed when the
360	/// `new` method was called.
361	///
362	/// # Arguments
363	/// - None
364	/// # Returns
365	/// Result type containing the decoded data.
366	///
367	/// - `Ok(Vec<u8>)`: Decoded vector containing the uncompressed bytes
368	/// - `Err(InflateDecodeErrors)`: Error that occurred during decoding
369	///
370	/// It's possible to recover bytes even after an error occurred, bytes up
371	/// to when error was encountered are stored in [InflateDecodeErrors]
372	///
373	/// # Note
374	/// This needs the `gzip` feature enabled to be available, otherwise it's a
375	/// compile time error
376	///
377	/// [InflateDecodeErrors]:crate::errors::InflateDecodeErrors
378	///
379	#[cfg(feature = "gzip")]
380	pub fn decode_gzip(&mut self) -> Result<Vec<u8>, InflateDecodeErrors>
381	{
382	if self.data.len() < `18`
383	{
384	return Err(InflateDecodeErrors::new_with_error(
385	DecodeErrorStatus::InsufficientData
386	));
387	}
388
389	if self.data[self.position] != GZIP_ID1
390	{
391	return Err(InflateDecodeErrors::new_with_error(
392	DecodeErrorStatus::CorruptData
393	));
394	}
395	self.position += `1`;
396	if self.data[self.position] != GZIP_ID2
397	{
398	return Err(InflateDecodeErrors::new_with_error(
399	DecodeErrorStatus::CorruptData
400	));
401	}
402	self.position += `1`;
403
404	if self.data[self.position] != GZIP_CM_DEFLATE
405	{
406	return Err(InflateDecodeErrors::new_with_error(
407	DecodeErrorStatus::CorruptData
408	));
409	}
410	self.position += `1`;
411
412	let flg = self.data[self.position];
413	self.position += `1`;
414
415	// skip mtime
416	self.position += `4`;
417	// skip xfl
418	self.position += `1`;
419	// skip os
420	self.position += `1`;
421
422	if (flg & GZIP_FRESERVED) != `0`
423	{
424	return Err(InflateDecodeErrors::new_with_error(
425	DecodeErrorStatus::CorruptData
426	));
427	}
428	// extra field
429	if (flg & GZIP_FEXTRA) != `0`
430	{
431	let len_bytes = self.data[self.position..self.position + `2`]
432	.try_into()
433	.unwrap();
434	let xlen = usize::from(u16::from_le_bytes(len_bytes));
435
436	self.position += `2`;
437
438	if self.data.len().saturating_sub(self.position) < xlen + GZIP_FOOTER_SIZE
439	{
440	return Err(InflateDecodeErrors::new_with_error(
441	DecodeErrorStatus::CorruptData
442	));
443	}
444	self.position += xlen;
445	}
446	// original file name zero terminated
447	if (flg & GZIP_FNAME) != `0`
448	{
449	loop
450	{
451	if let Some(byte) = self.data.get(self.position)
452	{
453	self.position += `1`;
454
455	if *byte == `0`
456	{
457	break;
458	}
459	}
460	else
461	{
462	return Err(InflateDecodeErrors::new_with_error(
463	DecodeErrorStatus::InsufficientData
464	));
465	}
466	}
467	}
468	// File comment zero terminated
469	if (flg & GZIP_FCOMMENT) != `0`
470	{
471	loop
472	{
473	if let Some(byte) = self.data.get(self.position)
474	{
475	self.position += `1`;
476
477	if *byte == `0`
478	{
479	break;
480	}
481	}
482	else
483	{
484	return Err(InflateDecodeErrors::new_with_error(
485	DecodeErrorStatus::InsufficientData
486	));
487	}
488	}
489	}
490	// crc16 for gzip header
491	if (flg & GZIP_FHCRC) != `0`
492	{
493	self.position += `2`;
494	}
495
496	if self.position + GZIP_FOOTER_SIZE > self.data.len()
497	{
498	return Err(InflateDecodeErrors::new_with_error(
499	DecodeErrorStatus::InsufficientData
500	));
501	}
502
503	let data = self.decode_deflate()?;
504
505	let mut out_pos = self.stream.get_position() + self.position + self.stream.over_read;
506
507	if self.options.confirm_checksum
508	{
509	// Get number of consumed bytes from the input
510
511	if let Some(crc) = self.data.get(out_pos..out_pos + `4`)
512	{
513	let crc_bits: [u8; `4`] = crc.try_into().unwrap();
514
515	let crc32_expected = u32::from_le_bytes(crc_bits);
516
517	let crc32_found = !crate::crc::crc32(&data, !`0`);
518
519	if crc32_expected != crc32_found
520	{
521	let err_msg = DecodeErrorStatus::MismatchedCRC(crc32_expected, crc32_found);
522	let err = InflateDecodeErrors::new(err_msg, data);
523
524	return Err(err);
525	}
526	}
527	else
528	{
529	let err = InflateDecodeErrors::new(DecodeErrorStatus::InsufficientData, data);
530
531	return Err(err);
532	}
533	}
534	//checksum
535	out_pos += `4`;
536
537	if let Some(val) = self.data.get(out_pos..out_pos + `4`)
538	{
539	let actual_bytes: [u8; `4`] = val.try_into().unwrap();
540	let ac = u32::from_le_bytes(actual_bytes) as usize;
541
542	if data.len() != ac
543	{
544	let err = DecodeErrorStatus::Generic("ISIZE does not match actual bytes");
545
546	let err = InflateDecodeErrors::new(err, data);
547
548	return Err(err);
549	}
550	}
551	else
552	{
553	let err = InflateDecodeErrors::new(DecodeErrorStatus::InsufficientData, data);
554
555	return Err(err);
556	}
557
558	Ok(data)
559	}
560	/// Decode a deflate stream returning the data as `Vec<u8>` or an error
561	/// indicating what went wrong.
562	/// # Arguments
563	/// - None
564	/// # Returns
565	/// Result type containing the decoded data.
566	///
567	/// - `Ok(Vec<u8>)`: Decoded vector containing the uncompressed bytes
568	/// - `Err(InflateDecodeErrors)`: Error that occurred during decoding
569	///
570	/// It's possible to recover bytes even after an error occurred, bytes up
571	/// to when error was encountered are stored in [InflateDecodeErrors]
572	///
573	///
574	/// # Example
575	/// ```no_run
576	/// let data = [`42`]; // answer to life, the universe and everything
577	///
578	/// let mut decoder = zune_inflate::DeflateDecoder::new(&data);
579	/// let bytes = decoder.decode_deflate().unwrap();
580	/// ```
581	///
582	/// [InflateDecodeErrors]:crate::errors::InflateDecodeErrors
583	pub fn decode_deflate(&mut self) -> Result<Vec<u8>, InflateDecodeErrors>
584	{
585	self.start_deflate_block()
586	}
587	/// Main inner loop for decompressing deflate data
588	#[allow(unused_assignments)]
589	fn start_deflate_block(&mut self) -> Result<Vec<u8>, InflateDecodeErrors>
590	{
591	// start deflate decode
592	// re-read the stream so that we can remove code read by zlib
593	self.stream = BitStreamReader::new(&self.data[self.position..]);
594
595	self.stream.refill();
596
597	// Output space for our decoded bytes.
598	let mut out_block = vec![`0`; self.options.size_hint];
599	// bits used
600
601	let mut src_offset = `0`;
602	let mut dest_offset = `0`;
603
604	loop
605	{
606	self.stream.refill();
607
608	self.is_last_block = self.stream.get_bits(`1`) == `1`;
609	let block_type = self.stream.get_bits(`2`);
610
611	if block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED
612	{
613	/*
614	* Uncompressed block: copy 'len' bytes literally from the input
615	* buffer to the output buffer.
616	*/
617	/*
618	* The RFC says that
619	* skip any remaining bits in current partially
620	* processed byte
621	* read LEN and NLEN (see next section)
622	* copy LEN bytes of data to output
623	*/
624
625	if self.stream.over_read > usize::from(self.stream.get_bits_left() >> `3`)
626	{
627	out_block.truncate(dest_offset);
628
629	let err_msg = DecodeErrorStatus::Generic("over-read stream");
630	let error = InflateDecodeErrors::new(err_msg, out_block);
631
632	return Err(error);
633	}
634	let partial_bits = self.stream.get_bits_left() & `7`;
635
636	self.stream.drop_bits(partial_bits);
637
638	let len = self.stream.get_bits(`16`) as u16;
639	let nlen = self.stream.get_bits(`16`) as u16;
640
641	// copy to deflate
642	if len != !nlen
643	{
644	out_block.truncate(dest_offset);
645
646	let err_msg = DecodeErrorStatus::Generic("Len and nlen do not match");
647	let error = InflateDecodeErrors::new(err_msg, out_block);
648
649	return Err(error);
650	}
651	let len = len as usize;
652
653	let start = self.stream.get_position() + self.position + self.stream.over_read;
654
655	// ensure there is enough space for a fast copy
656	if dest_offset + len + FASTCOPY_BYTES > out_block.len()
657	{
658	// and if there is not, resize
659	let new_len = out_block.len() + RESIZE_BY + len;
660
661	out_block.resize(new_len, `0`);
662	}
663
664	if self.data.get((start + len).saturating_sub(`1`)).is_none()
665	{
666	out_block.truncate(dest_offset);
667
668	let err_msg = DecodeErrorStatus::CorruptData;
669	let error = InflateDecodeErrors::new(err_msg, out_block);
670
671	return Err(error);
672	}
673	if dest_offset > self.options.limit
674	{
675	out_block.truncate(dest_offset);
676
677	let err_msg =
678	DecodeErrorStatus::OutputLimitExceeded(self.options.limit, out_block.len());
679	let error = InflateDecodeErrors::new(err_msg, out_block);
680
681	return Err(error);
682	}
683
684	out_block[dest_offset..dest_offset + len]
685	.copy_from_slice(&self.data[start..start + len]);
686
687	dest_offset += len;
688
689	// get the new position to write.
690	self.stream.position =
691	len + (self.stream.position - usize::from(self.stream.bits_left >> `3`));
692
693	self.stream.reset();
694
695	if self.is_last_block
696	{
697	break;
698	}
699
700	continue;
701	}
702	else if block_type == DEFLATE_BLOCKTYPE_RESERVED
703	{
704	out_block.truncate(dest_offset);
705
706	let err_msg = DecodeErrorStatus::Generic("Reserved block type 0b11 encountered");
707	let error = InflateDecodeErrors::new(err_msg, out_block);
708
709	return Err(error);
710	}
711
712	// build decode tables for static and dynamic tables
713	match self.build_decode_table(block_type)
714	{
715	Ok(_) => (),
716	Err(value) =>
717	{
718	out_block.truncate(dest_offset);
719
720	let err_msg = value;
721	let error = InflateDecodeErrors::new(err_msg, out_block);
722
723	return Err(error);
724	}
725	};
726
727	// Tables are mutated into the struct, so at this point we know the tables
728	// are loaded, take a reference to them
729	let litlen_decode_table = &self.deflate_header_tables.litlen_decode_table;
730	let offset_decode_table = &self.deflate_header_tables.offset_decode_table;
731
732	/*
733	* This is the "fast loop" for decoding literals and matches. It does
734	* bounds checks on in_next and out_next in the loop conditions so that
735	* additional bounds checks aren't needed inside the loop body.
736	*
737	* To reduce latency, the bit-buffer is refilled and the next litlen
738	* decode table entry is preloaded before each loop iteration.
739	*/
740	let (mut literal, mut length, mut offset, mut entry) = (`0`, `0`, `0`, `0`);
741
742	let mut saved_bitbuf;
743
744	'decode: loop
745	{
746	let close_src = `3` * FASTCOPY_BYTES < self.stream.remaining_bytes();
747
748	if close_src
749	{
750	self.stream.refill_inner_loop();
751
752	let lit_mask = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
753
754	entry = litlen_decode_table[lit_mask];
755
756	'sequence: loop
757	{
758	// Resize the output vector here to ensure we can always have
759	// enough space for sloppy copies
760	if dest_offset + FASTLOOP_MAX_BYTES_WRITTEN > out_block.len()
761	{
762	let curr_len = out_block.len();
763	out_block.resize(curr_len + FASTLOOP_MAX_BYTES_WRITTEN + RESIZE_BY, `0`)
764	}
765	// At this point entry contains the next value of the litlen
766	// This will always be the case so meaning all our exit paths need
767	// to load in the next entry.
768
769	// recheck after every sequence
770	// when we hit continue, we need to recheck this
771	// as we are trying to emulate a do while
772	let new_check = self.stream.src.len() < self.stream.position + `8`;
773
774	if new_check
775	{
776	break 'sequence;
777	}
778
779	self.stream.refill_inner_loop();
780	/*
781	* Consume the bits for the litlen decode table entry. Save the
782	* original bit-buf for later, in case the extra match length
783	* bits need to be extracted from it.
784	*/
785	saved_bitbuf = self.stream.buffer;
786
787	self.stream.drop_bits((entry & `0xFF`) as u8);
788
789	/*
790	* Begin by checking for a "fast" literal, i.e. a literal that
791	* doesn't need a subtable.
792	*/
793	if (entry & HUFFDEC_LITERAL) != `0`
794	{
795	/*
796	* On 64-bit platforms, we decode up to 2 extra fast
797	* literals in addition to the primary item, as this
798	* increases performance and still leaves enough bits
799	* remaining for what follows. We could actually do 3,
800	* assuming LITLEN_TABLEBITS=11, but that actually
801	* decreases performance slightly (perhaps by messing
802	* with the branch prediction of the conditional refill
803	* that happens later while decoding the match offset).
804	*/
805
806	literal = entry >> `16`;
807
808	let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
809
810	entry = litlen_decode_table[new_pos];
811	saved_bitbuf = self.stream.buffer;
812
813	self.stream.drop_bits(entry as u8);
814
815	let out: &mut [u8; `2`] = out_block
816	.get_mut(dest_offset..dest_offset + `2`)
817	.unwrap()
818	.try_into()
819	.unwrap();
820
821	out[`0`] = literal as u8;
822	dest_offset += `1`;
823
824	if (entry & HUFFDEC_LITERAL) != `0`
825	{
826	/*
827	* Another fast literal, but this one is in lieu of the
828	* primary item, so it doesn't count as one of the extras.
829	*/
830
831	// load in the next entry.
832	literal = entry >> `16`;
833
834	let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
835
836	entry = litlen_decode_table[new_pos];
837
838	out[`1`] = literal as u8;
839	dest_offset += `1`;
840
841	continue;
842	}
843	}
844	/*
845	* It's not a literal entry, so it can be a length entry, a
846	* subtable pointer entry, or an end-of-block entry. Detect the
847	* two unlikely cases by testing the HUFFDEC_EXCEPTIONAL flag.
848	*/
849	if (entry & HUFFDEC_EXCEPTIONAL) != `0`
850	{
851	// Subtable pointer or end of block entry
852	if (entry & HUFFDEC_END_OF_BLOCK) != `0`
853	{
854	// block done
855	break 'decode;
856	}
857	/*
858	* A subtable is required. Load and consume the
859	* subtable entry. The subtable entry can be of any
860	* type: literal, length, or end-of-block.
861	*/
862	let entry_position = ((entry >> `8`) & `0x3F`) as usize;
863	let mut pos = (entry >> `16`) as usize;
864
865	saved_bitbuf = self.stream.buffer;
866
867	pos += self.stream.peek_var_bits(entry_position);
868	entry = litlen_decode_table[pos.min(LITLEN_ENOUGH - `1`)];
869
870	self.stream.drop_bits(entry as u8);
871
872	if (entry & HUFFDEC_LITERAL) != `0`
873	{
874	// decode a literal that required a sub table
875	let new_pos = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
876
877	literal = entry >> `16`;
878	entry = litlen_decode_table[new_pos];
879
880	out_block.get_mut(dest_offset).unwrap_or(&mut* `0`) =
881	(literal & `0xFF`) as u8;
882
883	dest_offset += `1`;
884
885	continue;
886	}
887
888	if (entry & HUFFDEC_END_OF_BLOCK) != `0`
889	{
890	break 'decode;
891	}
892	}
893
894	// At this point,we dropped at most 22 bits(LITLEN_DECODE is 11 and we
895	// can do it twice), we now just have 34 bits min remaining.
896
897	/*
898	* Decode the match length: the length base value associated
899	* with the litlen symbol (which we extract from the decode
900	* table entry), plus the extra length bits. We don't need to
901	* consume the extra length bits here, as they were included in
902	* the bits consumed by the entry earlier. We also don't need
903	* to check for too-long matches here, as this is inside the
904	* fast loop where it's already been verified that the output
905	* buffer has enough space remaining to copy a max-length match.
906	*/
907	let entry_dup = entry;
908
909	entry = offset_decode_table[self.stream.peek_bits::<OFFSET_TABLEBITS>()];
910	length = (entry_dup >> `16`) as usize;
911
912	let mask = (`1` << entry_dup as u8) - `1`;
913
914	length += (saved_bitbuf & mask) as usize >> ((entry_dup >> `8`) as u8);
915
916	// offset requires a subtable
917	if (entry & HUFFDEC_EXCEPTIONAL) != `0`
918	{
919	self.stream.drop_bits(OFFSET_TABLEBITS as u8);
920	let extra = self.stream.peek_var_bits(((entry >> `8`) & `0x3F`) as usize);
921	entry = offset_decode_table[((entry >> `16`) as usize + extra) & `511`];
922	// refill to handle some weird edge case where we have
923	// less bits than needed for reading the lit-len
924	}
925	saved_bitbuf = self.stream.buffer;
926
927	self.stream.drop_bits((entry & `0xFF`) as u8);
928
929	let mask = (`1` << entry as u8) - `1`;
930
931	offset = (entry >> `16`) as usize;
932	offset += (saved_bitbuf & mask) as usize >> (((entry >> `8`) & `0xFF`) as u8);
933
934	if offset > dest_offset
935	{
936	out_block.truncate(dest_offset);
937
938	let err_msg = DecodeErrorStatus::CorruptData;
939	let error = InflateDecodeErrors::new(err_msg, out_block);
940
941	return Err(error);
942	}
943
944	src_offset = dest_offset - offset;
945
946	if self.stream.bits_left < `11`
947	{
948	self.stream.refill_inner_loop();
949	}
950	// Copy some bytes unconditionally
951	// This makes us copy smaller match lengths quicker because we don't need
952	// a loop + don't send too much pressure to the Memory unit.
953	fixed_copy_within::<FASTCOPY_BYTES>(
954	&mut out_block,
955	src_offset,
956	dest_offset
957	);
958
959	entry = litlen_decode_table[self.stream.peek_bits::<LITLEN_DECODE_BITS>()];
960
961	let mut current_position = dest_offset;
962
963	dest_offset += length;
964
965	if offset == `1`
966	{
967	// RLE fill with a single byte
968	let byte_to_repeat = out_block[src_offset];
969	out_block[current_position..dest_offset].fill(byte_to_repeat);
970	}
971	else if offset <= FASTCOPY_BYTES
972	&& current_position + offset < dest_offset
973	{
974	// The second conditional ensures we only come
975	// here if the first copy didn't succeed to copy just enough bytes for a rep
976	// match to be valid, i.e we want this path to be taken the least amount
977	// of times possible
978
979	// the unconditional copy above copied some bytes
980	// don't let it go into waste
981	// Increment the position we are in by the number of correct bytes
982	// currently copied
983	let mut src_position = src_offset + offset;
984	let mut dest_position = current_position + offset;
985
986	// loop copying offset bytes in place
987	// notice this loop does fixed copies but increments in offset bytes :)
988	// that is intentional.
989	loop
990	{
991	fixed_copy_within::<FASTCOPY_BYTES>(
992	&mut out_block,
993	src_position,
994	dest_position
995	);
996
997	src_position += offset;
998	dest_position += offset;
999
1000	if dest_position > dest_offset
1001	{
1002	break;
1003	}
1004	}
1005	}
1006	else if length > FASTCOPY_BYTES
1007	{
1008	current_position += FASTCOPY_BYTES;
1009	// fast non-overlapping copy
1010	//
1011	// We have enough space to write the ML+FAST_COPY bytes ahead
1012	// so we know this won't come to shoot us in the foot.
1013	//
1014	// An optimization is to copy FAST_COPY_BITS per invocation
1015	// Currently FASTCOPY_BYTES is 16, this fits in nicely as we
1016	// it's a single SIMD instruction on a lot of things, i.e x86,Arm and even
1017	// wasm.
1018
1019	// current position of the match
1020	let mut dest_src_offset = src_offset + FASTCOPY_BYTES;
1021
1022	// Number of bytes we are to copy
1023	// copy in batches of FAST_BYTES
1024	'match_lengths: loop
1025	{
1026	// Safety: We resized out_block hence we know it can handle
1027	// sloppy copies without it being out of bounds
1028	//
1029	// Reason: This is a latency critical loop, even branches start
1030	// to matter
1031	fixed_copy_within::<FASTCOPY_BYTES>(
1032	&mut out_block,
1033	dest_src_offset,
1034	current_position
1035	);
1036
1037	dest_src_offset += FASTCOPY_BYTES;
1038	current_position += FASTCOPY_BYTES;
1039
1040	if current_position > dest_offset
1041	{
1042	break 'match_lengths;
1043	}
1044	}
1045	}
1046
1047	if dest_offset > self.options.limit
1048	{
1049	out_block.truncate(dest_offset);
1050
1051	let err_msg = DecodeErrorStatus::OutputLimitExceeded(
1052	self.options.limit,
1053	dest_offset
1054	);
1055	let error = InflateDecodeErrors::new(err_msg, out_block);
1056
1057	return Err(error);
1058	}
1059
1060	if self.stream.src.len() < self.stream.position + `8`
1061	{
1062	// close to input end, move to the slower one
1063	break 'sequence;
1064	}
1065	}
1066	}
1067	// generic loop that does things a bit slower but it's okay since it doesn't
1068	// deal with a lot of things
1069	// We can afford to be more careful here, checking that we do
1070	// not drop non-existent bits etc etc as we do not have the
1071	// assurances of the fast loop bits above.
1072	loop
1073	{
1074	self.stream.refill();
1075
1076	if self.stream.over_read > usize::from(self.stream.bits_left >> `3`)
1077	{
1078	out_block.truncate(dest_offset);
1079
1080	let err_msg = DecodeErrorStatus::CorruptData;
1081	let error = InflateDecodeErrors::new(err_msg, out_block);
1082
1083	return Err(error);
1084	}
1085
1086	let literal_mask = self.stream.peek_bits::<LITLEN_DECODE_BITS>();
1087
1088	entry = litlen_decode_table[literal_mask];
1089
1090	saved_bitbuf = self.stream.buffer;
1091
1092	self.stream.drop_bits((entry & `0xFF`) as u8);
1093
1094	if (entry & HUFFDEC_SUITABLE_POINTER) != `0`
1095	{
1096	let extra = self.stream.peek_var_bits(((entry >> `8`) & `0x3F`) as usize);
1097
1098	entry = litlen_decode_table[(entry >> `16`) as usize + extra];
1099	saved_bitbuf = self.stream.buffer;
1100
1101	self.stream.drop_bits((entry & `0xFF`) as u8);
1102	}
1103
1104	length = (entry >> `16`) as usize;
1105
1106	if (entry & HUFFDEC_LITERAL) != `0`
1107	{
1108	resize_and_push(&mut out_block, dest_offset, length as u8);
1109
1110	dest_offset += `1`;
1111
1112	continue;
1113	}
1114
1115	if (entry & HUFFDEC_END_OF_BLOCK) != `0`
1116	{
1117	break 'decode;
1118	}
1119
1120	let mask = (`1` << entry as u8) - `1`;
1121
1122	length += (saved_bitbuf & mask) as usize >> ((entry >> `8`) as u8);
1123
1124	self.stream.refill();
1125
1126	entry = offset_decode_table[self.stream.peek_bits::<OFFSET_TABLEBITS>()];
1127
1128	if (entry & HUFFDEC_EXCEPTIONAL) != `0`
1129	{
1130	// offset requires a subtable
1131	self.stream.drop_bits(OFFSET_TABLEBITS as u8);
1132
1133	let extra = self.stream.peek_var_bits(((entry >> `8`) & `0x3F`) as usize);
1134
1135	entry = offset_decode_table[((entry >> `16`) as usize + extra) & `511`];
1136	}
1137
1138	// ensure there is enough space for a fast copy
1139	if dest_offset + length + FASTCOPY_BYTES > out_block.len()
1140	{
1141	let new_len = out_block.len() + RESIZE_BY + length;
1142	out_block.resize(new_len, `0`);
1143	}
1144	saved_bitbuf = self.stream.buffer;
1145
1146	let mask = (`1` << (entry & `0xFF`) as u8) - `1`;
1147
1148	offset = (entry >> `16`) as usize;
1149	offset += (saved_bitbuf & mask) as usize >> ((entry >> `8`) as u8);
1150
1151	if offset > dest_offset
1152	{
1153	out_block.truncate(dest_offset);
1154
1155	let err_msg = DecodeErrorStatus::CorruptData;
1156	let error = InflateDecodeErrors::new(err_msg, out_block);
1157
1158	return Err(error);
1159	}
1160
1161	src_offset = dest_offset - offset;
1162
1163	self.stream.drop_bits(entry as u8);
1164
1165	let (dest_src, dest_ptr) = out_block.split_at_mut(dest_offset);
1166
1167	if src_offset + length + FASTCOPY_BYTES > dest_offset
1168	{
1169	// overlapping copy
1170	// do a simple rep match
1171	copy_rep_matches(&mut out_block, src_offset, dest_offset, length);
1172	}
1173	else
1174	{
1175	dest_ptr[`0`..length]
1176	.copy_from_slice(&dest_src[src_offset..src_offset + length]);
1177	}
1178
1179	dest_offset += length;
1180
1181	if dest_offset > self.options.limit
1182	{
1183	out_block.truncate(dest_offset);
1184
1185	let err_msg =
1186	DecodeErrorStatus::OutputLimitExceeded(self.options.limit, dest_offset);
1187	let error = InflateDecodeErrors::new(err_msg, out_block);
1188
1189	return Err(error);
1190	}
1191	}
1192	}
1193	/*
1194	* If any of the implicit appended zero bytes were consumed (not just
1195	* refilled) before hitting end of stream, then the data is bad.
1196	*/
1197	if self.stream.over_read > usize::from(self.stream.bits_left >> `3`)
1198	{
1199	out_block.truncate(dest_offset);
1200
1201	let err_msg = DecodeErrorStatus::CorruptData;
1202	let error = InflateDecodeErrors::new(err_msg, out_block);
1203
1204	return Err(error);
1205	}
1206
1207	if self.is_last_block
1208	{
1209	break;
1210	}
1211	}
1212
1213	// decompression. DONE
1214	// Truncate data to match the number of actual
1215	// bytes written.
1216	out_block.truncate(dest_offset);
1217
1218	Ok(out_block)
1219	}
1220
1221	/// Build decode tables for static and dynamic
1222	/// huffman blocks.
1223	fn build_decode_table(&mut self, block_type: u64) -> Result<(), DecodeErrorStatus>
1224	{
1225	const COUNT: usize =
1226	DEFLATE_NUM_LITLEN_SYMS + DEFLATE_NUM_OFFSET_SYMS + DELFATE_MAX_LENS_OVERRUN;
1227
1228	let mut lens = [`0_u8`; COUNT];
1229	let mut precode_lens = [`0`; DEFLATE_NUM_PRECODE_SYMS];
1230	let mut precode_decode_table = [`0_u32`; PRECODE_ENOUGH];
1231	let mut litlen_decode_table = [`0_u32`; LITLEN_ENOUGH];
1232	let mut offset_decode_table = [`0`; OFFSET_ENOUGH];
1233
1234	let mut num_litlen_syms = `0`;
1235	let mut num_offset_syms = `0`;
1236
1237	if block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN
1238	{
1239	const SINGLE_PRECODE: usize = `3`;
1240
1241	self.static_codes_loaded = `false`;
1242
1243	// Dynamic Huffman block
1244	// Read codeword lengths
1245	if !self.stream.has(`5` + `5` + `4`)
1246	{
1247	return Err(DecodeErrorStatus::InsufficientData);
1248	}
1249
1250	num_litlen_syms = `257` + (self.stream.get_bits(`5`)) as usize;
1251	num_offset_syms = `1` + (self.stream.get_bits(`5`)) as usize;
1252
1253	let num_explicit_precode_lens = `4` + (self.stream.get_bits(`4`)) as usize;
1254
1255	self.stream.refill();
1256
1257	if !self.stream.has(`3`)
1258	{
1259	return Err(DecodeErrorStatus::InsufficientData);
1260	}
1261
1262	let first_precode = self.stream.get_bits(`3`) as u8;
1263	let expected = (SINGLE_PRECODE * num_explicit_precode_lens.saturating_sub(`1`)) as u8;
1264
1265	precode_lens[usize::from(DEFLATE_PRECODE_LENS_PERMUTATION[`0`])] = first_precode;
1266
1267	self.stream.refill();
1268
1269	if !self.stream.has(expected)
1270	{
1271	return Err(DecodeErrorStatus::InsufficientData);
1272	}
1273
1274	for i in DEFLATE_PRECODE_LENS_PERMUTATION[`1`..]
1275	.iter()
1276	.take(num_explicit_precode_lens - `1`)
1277	{
1278	let bits = self.stream.get_bits(`3`) as u8;
1279
1280	precode_lens[usize::from(*i)] = bits;
1281	}
1282
1283	self.build_decode_table_inner(
1284	&precode_lens,
1285	&PRECODE_DECODE_RESULTS,
1286	&mut precode_decode_table,
1287	PRECODE_TABLE_BITS,
1288	DEFLATE_NUM_PRECODE_SYMS,
1289	DEFLATE_MAX_CODEWORD_LENGTH
1290	)?;
1291
1292	/ Decode the litlen and offset codeword lengths. /
1293
1294	let mut i = `0`;
1295
1296	loop
1297	{
1298	if i >= num_litlen_syms + num_offset_syms
1299	{
1300	// confirm here since with a continue loop stuff
1301	// breaks
1302	break;
1303	}
1304
1305	let rep_val: u8;
1306	let rep_count: u64;
1307
1308	if !self.stream.has(DEFLATE_MAX_PRE_CODEWORD_LEN + `7`)
1309	{
1310	self.stream.refill();
1311	}
1312	// decode next pre-code symbol
1313	let entry_pos = self
1314	.stream
1315	.peek_bits::<{ DEFLATE_MAX_PRE_CODEWORD_LEN as usize }>();
1316
1317	let entry = precode_decode_table[entry_pos];
1318	let presym = entry >> `16`;
1319
1320	if !self.stream.has(entry as u8)
1321	{
1322	return Err(DecodeErrorStatus::InsufficientData);
1323	}
1324
1325	self.stream.drop_bits(entry as u8);
1326
1327	if presym < `16`
1328	{
1329	// explicit codeword length
1330	lens[i] = presym as u8;
1331	i += `1`;
1332	continue;
1333	}
1334
1335	/ Run-length encoded codeword lengths /
1336
1337	/*
1338	* Note: we don't need verify that the repeat count
1339	* doesn't overflow the number of elements, since we've
1340	* sized the lens array to have enough extra space to
1341	* allow for the worst-case overrun (138 zeroes when
1342	* only 1 length was remaining).
1343	*
1344	* In the case of the small repeat counts (presyms 16
1345	* and 17), it is fastest to always write the maximum
1346	* number of entries. That gets rid of branches that
1347	* would otherwise be required.
1348	*
1349	* It is not just because of the numerical order that
1350	* our checks go in the order 'presym < 16', 'presym ==
1351	* 16', and 'presym == 17'. For typical data this is
1352	* ordered from most frequent to least frequent case.
1353	*/
1354	if presym == `16`
1355	{
1356	if i == `0`
1357	{
1358	return Err(DecodeErrorStatus::CorruptData);
1359	}
1360
1361	if !self.stream.has(`2`)
1362	{
1363	return Err(DecodeErrorStatus::InsufficientData);
1364	}
1365
1366	// repeat previous length three to 6 times
1367	rep_val = lens[i - `1`];
1368	rep_count = `3` + self.stream.get_bits(`2`);
1369	lens[i..i + `6`].fill(rep_val);
1370	i += rep_count as usize;
1371	}
1372	else if presym == `17`
1373	{
1374	if !self.stream.has(`3`)
1375	{
1376	return Err(DecodeErrorStatus::InsufficientData);
1377	}
1378	/ Repeat zero 3 - 10 times. /
1379	rep_count = `3` + self.stream.get_bits(`3`);
1380	lens[i..i + `10`].fill(`0`);
1381	i += rep_count as usize;
1382	}
1383	else
1384	{
1385	if !self.stream.has(`7`)
1386	{
1387	return Err(DecodeErrorStatus::InsufficientData);
1388	}
1389	// repeat zero 11-138 times.
1390	rep_count = `11` + self.stream.get_bits(`7`);
1391	lens[i..i + rep_count as usize].fill(`0`);
1392	i += rep_count as usize;
1393	}
1394
1395	if i >= num_litlen_syms + num_offset_syms
1396	{
1397	break;
1398	}
1399	}
1400	}
1401	else if block_type == DEFLATE_BLOCKTYPE_STATIC
1402	{
1403	if self.static_codes_loaded
1404	{
1405	return Ok(());
1406	}
1407
1408	self.static_codes_loaded = `true`;
1409
1410	lens[`000`..`144`].fill(`8`);
1411	lens[`144`..`256`].fill(`9`);
1412	lens[`256`..`280`].fill(`7`);
1413	lens[`280`..`288`].fill(`8`);
1414	lens[`288`..].fill(`5`);
1415
1416	num_litlen_syms = `288`;
1417	num_offset_syms = `32`;
1418	}
1419	// build offset decode table
1420	self.build_decode_table_inner(
1421	&lens[num_litlen_syms..],
1422	&OFFSET_DECODE_RESULTS,
1423	&mut offset_decode_table,
1424	OFFSET_TABLEBITS,
1425	num_offset_syms,
1426	DEFLATE_MAX_OFFSET_CODEWORD_LENGTH
1427	)?;
1428
1429	self.build_decode_table_inner(
1430	&lens,
1431	&LITLEN_DECODE_RESULTS,
1432	&mut litlen_decode_table,
1433	LITLEN_TABLE_BITS,
1434	num_litlen_syms,
1435	DEFLATE_MAX_LITLEN_CODEWORD_LENGTH
1436	)?;
1437
1438	self.deflate_header_tables.offset_decode_table = offset_decode_table;
1439	self.deflate_header_tables.litlen_decode_table = litlen_decode_table;
1440
1441	Ok(())
1442	}
1443	/// Build the decode table for the precode
1444	#[allow(clippy::needless_range_loop)]
1445	fn build_decode_table_inner(
1446	&mut self, lens: &[u8], decode_results: &[u32], decode_table: &mut [u32],
1447	table_bits: usize, num_syms: usize, mut max_codeword_len: usize
1448	) -> Result<(), DecodeErrorStatus>
1449	{
1450	const BITS: u32 = usize::BITS - `1`;
1451
1452	let mut len_counts: [u32; DEFLATE_MAX_CODEWORD_LENGTH + `1`] =
1453	[`0`; DEFLATE_MAX_CODEWORD_LENGTH + `1`];
1454	let mut offsets: [u32; DEFLATE_MAX_CODEWORD_LENGTH + `1`] =
1455	[`0`; DEFLATE_MAX_CODEWORD_LENGTH + `1`];
1456	let mut sorted_syms: [u16; DEFLATE_MAX_NUM_SYMS] = [`0`; DEFLATE_MAX_NUM_SYMS];
1457
1458	let mut i;
1459
1460	// count how many codewords have each length, including 0.
1461	for sym in `0`..num_syms
1462	{
1463	len_counts[usize::from(lens[sym])] += `1`;
1464	}
1465
1466	/*
1467	* Determine the actual maximum codeword length that was used, and
1468	* decrease table_bits to it if allowed.
1469	*/
1470	while max_codeword_len > `1` && len_counts[max_codeword_len] == `0`
1471	{
1472	max_codeword_len -= `1`;
1473	}
1474	/*
1475	* Sort the symbols primarily by increasing codeword length and
1476	* A temporary array of length @num_syms.
1477	* secondarily by increasing symbol value; or equivalently by their
1478	* codewords in lexicographic order, since a canonical code is assumed.
1479	*
1480	* For efficiency, also compute 'codespace_used' in the same pass over
1481	* 'len_counts[]' used to build 'offsets[]' for sorting.
1482	*/
1483	offsets[`0`] = `0`;
1484	offsets[`1`] = len_counts[`0`];
1485
1486	let mut codespace_used = `0_u32`;
1487
1488	for len in `1`..max_codeword_len
1489	{
1490	offsets[len + `1`] = offsets[len] + len_counts[len];
1491	codespace_used = (codespace_used << `1`) + len_counts[len];
1492	}
1493	codespace_used = (codespace_used << `1`) + len_counts[max_codeword_len];
1494
1495	for sym in `0`..num_syms
1496	{
1497	let pos = usize::from(lens[sym]);
1498	sorted_syms[offsets[pos] as usize] = sym as u16;
1499	offsets[pos] += `1`;
1500	}
1501	i = (offsets[`0`]) as usize;
1502
1503	/*
1504	* Check whether the lengths form a complete code (exactly fills the
1505	* codespace), an incomplete code (doesn't fill the codespace), or an
1506	* overfull code (overflows the codespace). A codeword of length 'n'
1507	* uses proportion '1/(2^n)' of the codespace. An overfull code is
1508	* nonsensical, so is considered invalid. An incomplete code is
1509	* considered valid only in two specific cases; see below.
1510	*/
1511
1512	// Overfull code
1513	if codespace_used > `1` << max_codeword_len
1514	{
1515	return Err(DecodeErrorStatus::Generic("Overflown code"));
1516	}
1517	// incomplete code
1518	if codespace_used < `1` << max_codeword_len
1519	{
1520	let entry = if codespace_used == `0`
1521	{
1522	/*
1523	* An empty code is allowed. This can happen for the
1524	* offset code in DEFLATE, since a dynamic Huffman block
1525	* need not contain any matches.
1526	*/
1527
1528	/ sym=0, len=1 (arbitrary) /
1529	make_decode_table_entry(decode_results, `0`, `1`)
1530	}
1531	else
1532	{
1533	/*
1534	* Allow codes with a single used symbol, with codeword
1535	* length 1. The DEFLATE RFC is unclear regarding this
1536	* case. What zlib's decompressor does is permit this
1537	* for the litlen and offset codes and assume the
1538	* codeword is '0' rather than '1'. We do the same
1539	* except we allow this for precodes too, since there's
1540	* no convincing reason to treat the codes differently.
1541	* We also assign both codewords '0' and '1' to the
1542	* symbol to avoid having to handle '1' specially.
1543	*/
1544	if codespace_used != `1` << (max_codeword_len - `1`) \|\| len_counts[`1`] != `1`
1545	{
1546	return Err(DecodeErrorStatus::Generic(
1547	"Cannot work with empty pre-code table"
1548	));
1549	}
1550	make_decode_table_entry(decode_results, usize::from(sorted_syms[i]), `1`)
1551	};
1552	/*
1553	* Note: the decode table still must be fully initialized, in
1554	* case the stream is malformed and contains bits from the part
1555	* of the codespace the incomplete code doesn't use.
1556	*/
1557	decode_table.fill(entry);
1558	return Ok(());
1559	}
1560
1561	/*
1562	* The lengths form a complete code. Now, enumerate the codewords in
1563	* lexicographic order and fill the decode table entries for each one.
1564	*
1565	* First, process all codewords with len <= table_bits. Each one gets
1566	* '2^(table_bits-len)' direct entries in the table.
1567	*
1568	* Since DEFLATE uses bit-reversed codewords, these entries aren't
1569	* consecutive but rather are spaced '2^len' entries apart. This makes
1570	* filling them naively somewhat awkward and inefficient, since strided
1571	* stores are less cache-friendly and preclude the use of word or
1572	* vector-at-a-time stores to fill multiple entries per instruction.
1573	*
1574	* To optimize this, we incrementally double the table size. When
1575	* processing codewords with length 'len', the table is treated as
1576	* having only '2^len' entries, so each codeword uses just one entry.
1577	* Then, each time 'len' is incremented, the table size is doubled and
1578	* the first half is copied to the second half. This significantly
1579	* improves performance over naively doing strided stores.
1580	*
1581	* Note that some entries copied for each table doubling may not have
1582	* been initialized yet, but it doesn't matter since they're guaranteed
1583	* to be initialized later (because the Huffman code is complete).
1584	*/
1585	let mut codeword = `0`;
1586	let mut len = `1`;
1587	let mut count = len_counts[`1`];
1588
1589	while count == `0`
1590	{
1591	len += `1`;
1592
1593	if len >= len_counts.len()
1594	{
1595	break;
1596	}
1597	count = len_counts[len];
1598	}
1599
1600	let mut curr_table_end = `1` << len;
1601
1602	while len <= table_bits
1603	{
1604	// Process all count codewords with length len
1605	loop
1606	{
1607	let entry = make_decode_table_entry(
1608	decode_results,
1609	usize::from(sorted_syms[i]),
1610	len as u32
1611	);
1612	i += `1`;
1613	// fill first entry for current codeword
1614	decode_table[codeword] = entry;
1615
1616	if codeword == curr_table_end - `1`
1617	{
1618	// last codeword (all 1's)
1619	for _ in len..table_bits
1620	{
1621	decode_table.copy_within(`0`..curr_table_end, curr_table_end);
1622
1623	curr_table_end <<= `1`;
1624	}
1625	return Ok(());
1626	}
1627	/*
1628	* To advance to the lexicographically next codeword in
1629	* the canonical code, the codeword must be incremented,
1630	* then 0's must be appended to the codeword as needed
1631	* to match the next codeword's length.
1632	*
1633	* Since the codeword is bit-reversed, appending 0's is
1634	* a no-op. However, incrementing it is nontrivial. To
1635	* do so efficiently, use the 'bsr' instruction to find
1636	* the last (highest order) 0 bit in the codeword, set
1637	* it, and clear any later (higher order) 1 bits. But
1638	* 'bsr' actually finds the highest order 1 bit, so to
1639	* use it first flip all bits in the codeword by XOR' ing
1640	* it with (1U << len) - 1 == cur_table_end - 1.
1641	*/
1642
1643	let adv = BITS - (codeword ^ (curr_table_end - `1`)).leading_zeros();
1644	let bit = `1` << adv;
1645
1646	codeword &= bit - `1`;
1647	codeword \|= bit;
1648	count -= `1`;
1649
1650	if count == `0`
1651	{
1652	break;
1653	}
1654	}
1655	// advance to the next codeword length
1656	loop
1657	{
1658	len += `1`;
1659
1660	if len <= table_bits
1661	{
1662	// dest is decode_table[curr_table_end]
1663	// source is decode_table(start of table);
1664	// size is curr_table;
1665
1666	decode_table.copy_within(`0`..curr_table_end, curr_table_end);
1667
1668	//decode_table.copy_within(range, curr_table_end);
1669	curr_table_end <<= `1`;
1670	}
1671	count = len_counts[len];
1672
1673	if count != `0`
1674	{
1675	break;
1676	}
1677	}
1678	}
1679	// process codewords with len > table_bits.
1680	// Require sub-tables
1681	curr_table_end = `1` << table_bits;
1682
1683	let mut subtable_prefix = usize::MAX;
1684	let mut subtable_start = `0`;
1685	let mut subtable_bits;
1686
1687	loop
1688	{
1689	/*
1690	* Start a new sub-table if the first 'table_bits' bits of the
1691	* codeword don't match the prefix of the current subtable.
1692	*/
1693	if codeword & ((`1_usize` << table_bits) - `1`) != subtable_prefix
1694	{
1695	subtable_prefix = codeword & ((`1` << table_bits) - `1`);
1696	subtable_start = curr_table_end;
1697
1698	/*
1699	* Calculate the subtable length. If the codeword has
1700	* length 'table_bits + n', then the subtable needs
1701	* '2^n' entries. But it may need more; if fewer than
1702	* '2^n' codewords of length 'table_bits + n' remain,
1703	* then the length will need to be incremented to bring
1704	* in longer codewords until the subtable can be
1705	* completely filled. Note that because the Huffman
1706	* code is complete, it will always be possible to fill
1707	* the sub-table eventually.
1708	*/
1709	subtable_bits = len - table_bits;
1710	codespace_used = count;
1711
1712	while codespace_used < (`1` << subtable_bits)
1713	{
1714	subtable_bits += `1`;
1715
1716	if subtable_bits + table_bits > `15`
1717	{
1718	return Err(DecodeErrorStatus::CorruptData);
1719	}
1720
1721	codespace_used = (codespace_used << `1`) + len_counts[table_bits + subtable_bits];
1722	}
1723
1724	/*
1725	* Create the entry that points from the main table to
1726	* the subtable.
1727	*/
1728	decode_table[subtable_prefix] = (subtable_start as u32) << `16`
1729	\| HUFFDEC_EXCEPTIONAL
1730	\| HUFFDEC_SUITABLE_POINTER
1731	\| (subtable_bits as u32) << `8`
1732	\| table_bits as u32;
1733
1734	curr_table_end = subtable_start + (`1` << subtable_bits);
1735	}
1736
1737	/ Fill the sub-table entries for the current codeword. /
1738
1739	let stride = `1` << (len - table_bits);
1740
1741	let mut j = subtable_start + (codeword >> table_bits);
1742
1743	let entry = make_decode_table_entry(
1744	decode_results,
1745	sorted_syms[i] as usize,
1746	(len - table_bits) as u32
1747	);
1748	i += `1`;
1749
1750	while j < curr_table_end
1751	{
1752	decode_table[j] = entry;
1753	j += stride;
1754	}
1755	//advance to the next codeword
1756	if codeword == (`1` << len) - `1`
1757	{
1758	// last codeword
1759	return Ok(());
1760	}
1761
1762	let adv = BITS - (codeword ^ ((`1` << len) - `1`)).leading_zeros();
1763	let bit = `1` << adv;
1764
1765	codeword &= bit - `1`;
1766	codeword \|= bit;
1767	count -= `1`;
1768
1769	while count == `0`
1770	{
1771	len += `1`;
1772	count = len_counts[len];
1773	}
1774	}
1775	}
1776	}
1777
1778	const RESIZE_BY: usize = `1024` * `4`; // 4 kb
1779
1780	/// Resize vector if its current space wont
1781	/// be able to store a new byte and then push an element to that new space
1782	#[inline(always)]
1783	fn resize_and_push(buf: &mut Vec<u8>, position: usize, elm: u8)
1784	{
1785	if buf.len() <= position
1786	{
1787	let new_len: usize = buf.len() + RESIZE_BY;
1788	buf.resize(new_len, value:`0`);
1789	}
1790	buf[position] = elm;
1791	}
1792