mcu.rs source code [crates/zune_jpeg/src/mcu.rs]

1	/*
2	* Copyright (c) 2023.
3	*
4	* This software is free software;
5	*
6	* You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7	*/
8
9	use alloc::{format, vec};
10	use core::cmp::min;
11
12	use zune_core::bytestream::ZReaderTrait;
13	use zune_core::colorspace::ColorSpace;
14	use zune_core::colorspace::ColorSpace::Luma;
15	use zune_core::log::{error, trace, warn};
16
17	use crate::bitstream::BitStream;
18	use crate::components::SampleRatios;
19	use crate::decoder::MAX_COMPONENTS;
20	use crate::errors::DecodeErrors;
21	use crate::marker::Marker;
22	use crate::misc::{calculate_padded_width, setup_component_params};
23	use crate::worker::{color_convert, upsample};
24	use crate::JpegDecoder;
25
26	/// The size of a DC block for a MCU.
27
28	pub const DCT_BLOCK: usize = `64`;
29
30	impl<T: ZReaderTrait> JpegDecoder<T> {
31	/// Check for existence of DC and AC Huffman Tables
32	pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> {
33	// check that dc and AC tables exist outside the hot path
34	for component in &self.components {
35	let _ = &self
36	.dc_huffman_tables
37	.get(component.dc_huff_table)
38	.as_ref()
39	.ok_or_else(\|\| {
40	DecodeErrors::HuffmanDecode(format!(
41	"No Huffman DC table for component {:?} ",
42	component.component_id
43	))
44	})?
45	.as_ref()
46	.ok_or_else(\|\| {
47	DecodeErrors::HuffmanDecode(format!(
48	"No DC table for component {:?}",
49	component.component_id
50	))
51	})?;
52
53	let _ = &self
54	.ac_huffman_tables
55	.get(component.ac_huff_table)
56	.as_ref()
57	.ok_or_else(\|\| {
58	DecodeErrors::HuffmanDecode(format!(
59	"No Huffman AC table for component {:?} ",
60	component.component_id
61	))
62	})?
63	.as_ref()
64	.ok_or_else(\|\| {
65	DecodeErrors::HuffmanDecode(format!(
66	"No AC table for component {:?}",
67	component.component_id
68	))
69	})?;
70	}
71	Ok(())
72	}
73
74	/// Decode MCUs and carry out post processing.
75	///
76	/// This is the main decoder loop for the library, the hot path.
77	///
78	/// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch
79	/// here.
80	#[allow(
81	clippy::similar_names,
82	clippy::too_many_lines,
83	clippy::cast_possible_truncation
84	)]
85	#[inline(never)]
86	pub(crate) fn decode_mcu_ycbcr_baseline(
87	&mut self, pixels: &mut [u8]
88	) -> Result<(), DecodeErrors> {
89	setup_component_params(self)?;
90
91	// check dc and AC tables
92	self.check_tables()?;
93
94	let (mut mcu_width, mut mcu_height);
95
96	if self.is_interleaved {
97	// set upsampling functions
98	self.set_upsampling()?;
99
100	mcu_width = self.mcu_x;
101	mcu_height = self.mcu_y;
102	} else {
103	// For non-interleaved images( (11) subsampling)*
104	// number of MCU's are the widths (+7 to account for paddings) divided bu 8.
105	mcu_width = ((self.info.width + `7`) / `8`) as usize;
106	mcu_height = ((self.info.height + `7`) / `8`) as usize;
107	}
108	if self.is_interleaved
109	&& self.input_colorspace.num_components() > `1`
110	&& self.options.jpeg_get_out_colorspace().num_components() == `1`
111	&& (self.sub_sample_ratio == SampleRatios::V
112	\|\| self.sub_sample_ratio == SampleRatios::HV)
113	{
114	// For a specific set of images, e.g interleaved,
115	// when converting from YcbCr to grayscale, we need to
116	// take into account mcu height since the MCU decoding needs to take
117	// it into account for padding purposes and the post processor
118	// parses two rows per mcu width.
119	//
120	// set coeff to be 2 to ensure that we increment two rows
121	// for every mcu processed also
122	mcu_height *= self.v_max;
123	mcu_height /= self.h_max;
124	self.coeff = `2`;
125	}
126
127	if self.input_colorspace.num_components() > self.components.len() {
128	let msg = format!(
129	" Expected {} number of components but found {}",
130	self.input_colorspace.num_components(),
131	self.components.len()
132	);
133	return Err(DecodeErrors::Format(msg));
134	}
135
136	if self.input_colorspace == ColorSpace::Luma && self.is_interleaved {
137	warn!("Grayscale image with down-sampled component, resetting component details");
138
139	self.reset_params();
140
141	mcu_width = ((self.info.width + `7`) / `8`) as usize;
142	mcu_height = ((self.info.height + `7`) / `8`) as usize;
143	}
144	let width = usize::from(self.info.width);
145
146	let padded_width = calculate_padded_width(width, self.sub_sample_ratio);
147
148	let mut stream = BitStream::new();
149	let mut tmp = [`0_i32`; DCT_BLOCK];
150
151	let comp_len = self.components.len();
152
153	for (pos, comp) in self.components.iter_mut().enumerate() {
154	// Allocate only needed components.
155	//
156	// For special colorspaces i.e YCCK and CMYK, just allocate all of the needed
157	// components.
158	if min(
159	self.options.jpeg_get_out_colorspace().num_components() - `1`,
160	pos
161	) == pos
162	\|\| comp_len == `4`
163	// Special colorspace
164	{
165	// allocate enough space to hold a whole MCU width
166	// this means we should take into account sampling ratios
167	// `8` is because each MCU spans 8 widths.*
168	let len = comp.width_stride * comp.vertical_sample * `8`;
169
170	comp.needed = `true`;
171	comp.raw_coeff = vec![`0`; len];
172	} else {
173	comp.needed = `false`;
174	}
175	}
176
177	let mut pixels_written = `0`;
178
179	let is_hv = usize::from(self.is_interleaved);
180	let upsampler_scratch_size = is_hv * self.components[`0`].width_stride;
181	let mut upsampler_scratch_space = vec![`0`; upsampler_scratch_size];
182
183	for i in `0`..mcu_height {
184	// Report if we have no more bytes
185	// This may generate false negatives since we over-read bytes
186	// hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem)
187	if stream.overread_by > `37`
188	// favourite number :)
189	{
190	if self.options.get_strict_mode() {
191	return Err(DecodeErrors::FormatStatic("Premature end of buffer"));
192	};
193
194	error!("Premature end of buffer");
195	break;
196	}
197	// decode a whole MCU width,
198	// this takes into account interleaved components.
199	self.decode_mcu_width(mcu_width, &mut tmp, &mut stream)?;
200	// process that width up until it's impossible
201	self.post_process(
202	pixels,
203	i,
204	mcu_height,
205	width,
206	padded_width,
207	&mut pixels_written,
208	&mut upsampler_scratch_space
209	)?;
210	}
211	// it may happen that some images don't have the whole buffer
212	// so we can't panic in case of that
213	// assert_eq!(pixels_written, pixels.len());
214
215	trace!("Finished decoding image");
216
217	Ok(())
218	}
219	fn decode_mcu_width(
220	&mut self, mcu_width: usize, tmp: &mut [i32; `64`], stream: &mut BitStream
221	) -> Result<(), DecodeErrors> {
222	for j in `0`..mcu_width {
223	// iterate over components
224	for component in &mut self.components {
225	let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS]
226	.as_ref()
227	.unwrap();
228
229	let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS]
230	.as_ref()
231	.unwrap();
232
233	let qt_table = &component.quantization_table;
234	let channel = &mut component.raw_coeff;
235
236	// If image is interleaved iterate over scan components,
237	// otherwise if it-s non-interleaved, these routines iterate in
238	// trivial scanline order(Y,Cb,Cr)
239	for v_samp in `0`..component.vertical_sample {
240	for h_samp in `0`..component.horizontal_sample {
241	// Fill the array with zeroes, decode_mcu_block expects
242	// a zero based array.
243	tmp.fill(`0`);
244
245	stream.decode_mcu_block(
246	&mut self.stream,
247	dc_table,
248	ac_table,
249	qt_table,
250	tmp,
251	&mut component.dc_pred
252	)?;
253
254	if component.needed {
255	let idct_position = {
256	// derived from stb and rewritten for my tastes
257	let c2 = v_samp * `8`;
258	let c3 = ((j * component.horizontal_sample) + h_samp) * `8`;
259
260	component.width_stride * c2 + c3
261	};
262
263	let idct_pos = channel.get_mut(idct_position..).unwrap();
264	// call idct.
265	(self.idct_func)(tmp, idct_pos, component.width_stride);
266	}
267	}
268	}
269	}
270	self.todo = self.todo.saturating_sub(`1`);
271	// After all interleaved components, that's an MCU
272	// handle stream markers
273	//
274	// In some corrupt images, it may occur that header markers occur in the stream.
275	// The spec EXPLICITLY FORBIDS this, specifically, in
276	// routine F.2.2.5 it says
277	// `The only valid marker which may occur within the Huffman coded data is the RSTm marker.`
278	//
279	// But libjpeg-turbo allows it because of some weird reason. so I'll also
280	// allow it because of some weird reason.
281	if let Some(m) = stream.marker {
282	if m == Marker::EOI {
283	// acknowledge and ignore EOI marker.
284	stream.marker.take();
285	trace!("Found EOI marker");
286	} else if let Marker::RST(_) = m {
287	if self.todo == `0` {
288	self.handle_rst(stream)?;
289	}
290	} else {
291	if self.options.get_strict_mode() {
292	return Err(DecodeErrors::Format(format!(
293	"Marker {m:?} found where not expected"
294	)));
295	}
296	error!(
297	"Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg",
298	m
299	);
300
301	self.parse_marker_inner(m)?;
302	}
303	}
304	}
305	Ok(())
306	}
307	// handle RST markers.
308	// No-op if not using restarts
309	// this routine is shared with mcu_prog
310	#[cold]
311	pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> {
312	self.todo = self.restart_interval;
313
314	if let Some(marker) = stream.marker {
315	// Found a marker
316	// Read stream and see what marker is stored there
317	match marker {
318	Marker::RST(_) => {
319	// reset stream
320	stream.reset();
321	// Initialize dc predictions to zero for all components
322	self.components.iter_mut().for_each(\|x\| x.dc_pred = `0`);
323	// Start iterating again. from position.
324	}
325	Marker::EOI => {
326	// silent pass
327	}
328	_ => {
329	return Err(DecodeErrors::MCUError(format!(
330	"Marker {marker:?} found in bitstream, possibly corrupt jpeg"
331	)));
332	}
333	}
334	}
335	Ok(())
336	}
337	#[allow(clippy::too_many_lines, clippy::too_many_arguments)]
338	pub(crate) fn post_process(
339	&mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize,
340	padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16]
341	) -> Result<(), DecodeErrors> {
342	let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components();
343
344	let mut px = *pixels_written;
345	// indicates whether image is vertically up-sampled
346	let is_vertically_sampled = self
347	.components
348	.iter()
349	.any(\|c\| c.sample_ratio == SampleRatios::HV \|\| c.sample_ratio == SampleRatios::V);
350
351	let mut comp_len = self.components.len();
352
353	// If we are moving from YCbCr-> Luma, we do not allocate storage for other components, so we
354	// will panic when we are trying to read samples, so for that case,
355	// hardcode it so that we don't panic when doing
356	// samp = &samples[j][pos * padded_width..(pos + 1) * padded_width]*
357	if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma {
358	comp_len = out_colorspace_components;
359	}
360	let mut color_conv_function =
361	\|num_iters: usize, samples: [&[i16]; `4`]\| -> Result<(), DecodeErrors> {
362	for (pos, output) in pixels[px..]
363	.chunks_exact_mut(width * out_colorspace_components)
364	.take(num_iters)
365	.enumerate()
366	{
367	let mut raw_samples: [&[i16]; `4`] = [&[], &[], &[], &[]];
368
369	// iterate over each line, since color-convert needs only
370	// one line
371	for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) {
372	samp = &samples[j][pos padded_width..(pos + `1`) * padded_width]
373	}
374	color_convert(
375	&raw_samples,
376	self.color_convert_16,
377	self.input_colorspace,
378	self.options.jpeg_get_out_colorspace(),
379	output,
380	width,
381	padded_width
382	)?;
383	px += width * out_colorspace_components;
384	}
385	Ok(())
386	};
387
388	let comps = &mut self.components[..];
389
390	if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma {
391	{
392	// duplicated so that we can check that samples match
393	// Fixes bug https://github.com/etemesi254/zune-image/issues/151
394	let mut samples: [&[i16]; `4`] = [&[], &[], &[], &[]];
395
396	for (samp, component) in samples.iter_mut().zip(comps.iter()) {
397	*samp = if component.sample_ratio == SampleRatios::None {
398	&component.raw_coeff
399	} else {
400	&component.upsample_dest
401	};
402	}
403	}
404	for comp in comps.iter_mut() {
405	upsample(
406	comp,
407	mcu_height,
408	i,
409	upsampler_scratch_space,
410	is_vertically_sampled
411	);
412	}
413
414	if is_vertically_sampled {
415	if i > `0` {
416	// write the last line, it wasn't up-sampled as we didn't have row_down
417	// yet
418	let mut samples: [&[i16]; `4`] = [&[], &[], &[], &[]];
419
420	for (samp, component) in samples.iter_mut().zip(comps.iter()) {
421	*samp = &component.first_row_upsample_dest;
422	}
423
424	// ensure length matches for all samples
425	let first_len = samples[`0`].len();
426	for samp in samples.iter().take(comp_len) {
427	assert_eq!(first_len, samp.len());
428	}
429	let num_iters = self.coeff * self.v_max;
430
431	color_conv_function(num_iters, samples)?;
432	}
433
434	// After up-sampling the last row, save any row that can be used for
435	// a later up-sampling,
436	//
437	// E.g the Y sample is not sampled but we haven't finished upsampling the last row of
438	// the previous mcu, since we don't have the down row, so save it
439	for component in comps.iter_mut() {
440	if component.sample_ratio != SampleRatios::H {
441	// We don't care about H sampling factors, since it's copied in the workers function
442
443	// copy last row to be used for the next color conversion
444	let size = component.vertical_sample
445	* component.width_stride
446	* component.sample_ratio.sample();
447
448	let last_bytes =
449	component.raw_coeff.rchunks_exact_mut(size).next().unwrap();
450
451	component
452	.first_row_upsample_dest
453	.copy_from_slice(last_bytes);
454	}
455	}
456	}
457
458	let mut samples: [&[i16]; `4`] = [&[], &[], &[], &[]];
459
460	for (samp, component) in samples.iter_mut().zip(comps.iter()) {
461	*samp = if component.sample_ratio == SampleRatios::None {
462	&component.raw_coeff
463	} else {
464	&component.upsample_dest
465	};
466	}
467
468	// we either do 7 or 8 MCU's depending on the state, this only applies to
469	// vertically sampled images
470	//
471	// for rows up until the last MCU, we do not upsample the last stride of the MCU
472	// which means that the number of iterations should take that into account is one less the
473	// up-sampled size
474	//
475	// For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we
476	// should sample full raw coeffs
477	let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(`1`));
478
479	let num_iters = (`8` - usize::from(is_last_considered)) * self.coeff * self.v_max;
480
481	color_conv_function(num_iters, samples)?;
482	} else {
483	let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS];
484
485	self.components
486	.iter()
487	.enumerate()
488	.for_each(\|(pos, x)\| channels_ref[pos] = &x.raw_coeff);
489
490	color_conv_function(`8` * self.coeff, channels_ref)?;
491	}
492
493	*pixels_written = px;
494	Ok(())
495	}
496	}