zlib.rs source code [crates/png/src/decoder/zlib.rs]

1	use super::{stream::FormatErrorInner, DecodingError, CHUNK_BUFFER_SIZE};
2
3	use fdeflate::Decompressor;
4
5	/// Ergonomics wrapper around `miniz_oxide::inflate::stream` for zlib compressed data.
6	pub(super) struct ZlibStream {
7	/// Current decoding state.
8	state: Box<fdeflate::Decompressor>,
9	/// If there has been a call to decompress already.
10	started: bool,
11	/// Remaining buffered decoded bytes.
12	/// The decoder sometimes wants inspect some already finished bytes for further decoding. So we
13	/// keep a total of 32KB of decoded data available as long as more data may be appended.
14	out_buffer: Vec<u8>,
15	/// The first index of `out_buffer` where new data can be written.
16	out_pos: usize,
17	/// The first index of `out_buffer` that hasn't yet been passed to our client
18	/// (i.e. not yet appended to the `image_data` parameter of `fn decompress` or `fn
19	/// finish_compressed_chunks`).
20	read_pos: usize,
21	/// Limit on how many bytes can be decompressed in total. This field is mostly used for
22	/// performance optimizations (e.g. to avoid allocating and zeroing out large buffers when only
23	/// a small image is being decoded).
24	max_total_output: usize,
25	/// Ignore and do not calculate the Adler-32 checksum. Defaults to `true`.
26	///
27	/// This flag overrides `TINFL_FLAG_COMPUTE_ADLER32`.
28	///
29	/// This flag should not be modified after decompression has started.
30	ignore_adler32: bool,
31	}
32
33	impl ZlibStream {
34	pub(crate) fn new() -> Self {
35	ZlibStream {
36	state: Box::new(Decompressor::new()),
37	started: `false`,
38	out_buffer: Vec::new(),
39	out_pos: `0`,
40	read_pos: `0`,
41	max_total_output: usize::MAX,
42	ignore_adler32: `true`,
43	}
44	}
45
46	pub(crate) fn reset(&mut self) {
47	self.started = `false`;
48	self.out_buffer.clear();
49	self.out_pos = `0`;
50	self.read_pos = `0`;
51	self.max_total_output = usize::MAX;
52	*self.state = Decompressor::new();
53	}
54
55	pub(crate) fn set_max_total_output(&mut self, n: usize) {
56	self.max_total_output = n;
57	}
58
59	/// Set the `ignore_adler32` flag and return `true` if the flag was
60	/// successfully set.
61	///
62	/// The default is `true`.
63	///
64	/// This flag cannot be modified after decompression has started until the
65	/// [ZlibStream] is reset.
66	pub(crate) fn set_ignore_adler32(&mut self, flag: bool) -> bool {
67	if !self.started {
68	self.ignore_adler32 = flag;
69	`true`
70	} else {
71	`false`
72	}
73	}
74
75	/// Return the `ignore_adler32` flag.
76	pub(crate) fn ignore_adler32(&self) -> bool {
77	self.ignore_adler32
78	}
79
80	/// Fill the decoded buffer as far as possible from `data`.
81	/// On success returns the number of consumed input bytes.
82	pub(crate) fn decompress(
83	&mut self,
84	data: &[u8],
85	image_data: &mut Vec<u8>,
86	) -> Result<usize, DecodingError> {
87	// There may be more data past the adler32 checksum at the end of the deflate stream. We
88	// match libpng's default behavior and ignore any trailing data. In the future we may want
89	// to add a flag to control this behavior.
90	if self.state.is_done() {
91	return Ok(data.len());
92	}
93
94	self.prepare_vec_for_appending();
95
96	if !self.started && self.ignore_adler32 {
97	self.state.ignore_adler32();
98	}
99
100	let (in_consumed, out_consumed) = self
101	.state
102	.read(data, self.out_buffer.as_mut_slice(), self.out_pos, `false`)
103	.map_err(\|err\| {
104	DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
105	})?;
106
107	self.started = `true`;
108	self.out_pos += out_consumed;
109	self.transfer_finished_data(image_data);
110	self.compact_out_buffer_if_needed();
111
112	Ok(in_consumed)
113	}
114
115	/// Called after all consecutive IDAT chunks were handled.
116	///
117	/// The compressed stream can be split on arbitrary byte boundaries. This enables some cleanup
118	/// within the decompressor and flushing additional data which may have been kept back in case
119	/// more data were passed to it.
120	pub(crate) fn finish_compressed_chunks(
121	&mut self,
122	image_data: &mut Vec<u8>,
123	) -> Result<(), DecodingError> {
124	if !self.started {
125	return Ok(());
126	}
127
128	while !self.state.is_done() {
129	self.prepare_vec_for_appending();
130	let (_in_consumed, out_consumed) = self
131	.state
132	.read(&[], self.out_buffer.as_mut_slice(), self.out_pos, `true`)
133	.map_err(\|err\| {
134	DecodingError::Format(FormatErrorInner::CorruptFlateStream { err }.into())
135	})?;
136
137	self.out_pos += out_consumed;
138
139	if !self.state.is_done() {
140	let transferred = self.transfer_finished_data(image_data);
141	assert!(
142	transferred > `0` \|\| out_consumed > `0`,
143	"No more forward progress made in stream decoding."
144	);
145	self.compact_out_buffer_if_needed();
146	}
147	}
148
149	self.transfer_finished_data(image_data);
150	self.out_buffer.clear();
151	Ok(())
152	}
153
154	/// Resize the vector to allow allocation of more data.
155	fn prepare_vec_for_appending(&mut self) {
156	// The `debug_assert` below explains why we can use `>=` instead of `>` in the condition
157	// that compares `self.out_post >= self.max_total_output` in the next `if` statement.
158	debug_assert!(!self.state.is_done());
159	if self.out_pos >= self.max_total_output {
160	// This can happen when the `max_total_output` was miscalculated (e.g.
161	// because the `IHDR` chunk was malformed and didn't match the `IDAT` chunk). In
162	// this case, let's reset `self.max_total_output` before further calculations.
163	self.max_total_output = usize::MAX;
164	}
165
166	let current_len = self.out_buffer.len();
167	let desired_len = self
168	.out_pos
169	.saturating_add(CHUNK_BUFFER_SIZE)
170	.min(self.max_total_output);
171	if current_len >= desired_len {
172	return;
173	}
174
175	let buffered_len = self.decoding_size(self.out_buffer.len());
176	debug_assert!(self.out_buffer.len() <= buffered_len);
177	self.out_buffer.resize(buffered_len, `0u8`);
178	}
179
180	fn decoding_size(&self, len: usize) -> usize {
181	// Allocate one more chunk size than currently or double the length while ensuring that the
182	// allocation is valid and that any cursor within it will be valid.
183	len
184	// This keeps the buffer size a power-of-two, required by miniz_oxide.
185	.saturating_add(CHUNK_BUFFER_SIZE.max(len))
186	// Ensure all buffer indices are valid cursor positions.
187	// Note: both cut off and zero extension give correct results.
188	.min(u64::MAX as usize)
189	// Ensure the allocation request is valid.
190	// TODO: maximum allocation limits?
191	.min(isize::MAX as usize)
192	// Don't unnecessarily allocate more than `max_total_output`.
193	.min(self.max_total_output)
194	}
195
196	fn transfer_finished_data(&mut self, image_data: &mut Vec<u8>) -> usize {
197	let transferred = &self.out_buffer[self.read_pos..self.out_pos];
198	image_data.extend_from_slice(transferred);
199	self.read_pos = self.out_pos;
200	transferred.len()
201	}
202
203	fn compact_out_buffer_if_needed(&mut self) {
204	// [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#10Compression) says that
205	// "deflate/inflate compression with a sliding window (which is an upper bound on the
206	// distances appearing in the deflate stream) of at most 32768 bytes".
207	//
208	// `fdeflate` requires that we keep this many most recently decompressed bytes in the
209	// `out_buffer` - this allows referring back to them when handling "length and distance
210	// codes" in the deflate stream).
211	const LOOKBACK_SIZE: usize = `32768`;
212
213	// Compact `self.out_buffer` when "needed". Doing this conditionally helps to put an upper
214	// bound on the amortized cost of copying the data within `self.out_buffer`.
215	//
216	// TODO: The factor of 4 is an ad-hoc heuristic. Consider measuring and using a different
217	// factor. (Early experiments seem to indicate that factor of 4 is faster than a factor of
218	// 2 and 4 `LOOKBACK_SIZE` seems like an acceptable memory trade-off. Higher factors*
219	// result in higher memory usage, but the compaction cost is lower - factor of 4 means
220	// that 1 byte gets copied during compaction for 3 decompressed bytes.)
221	if self.out_pos > LOOKBACK_SIZE * `4` {
222	// Only preserve the `lookback_buffer` and "throw away" the earlier prefix.
223	let lookback_buffer = self.out_pos.saturating_sub(LOOKBACK_SIZE)..self.out_pos;
224	let preserved_len = lookback_buffer.len();
225	self.out_buffer.copy_within(lookback_buffer, `0`);
226	self.read_pos = preserved_len;
227	self.out_pos = preserved_len;
228	}
229	}
230	}
231