worker.rs source code [crates/zune_jpeg/src/worker.rs]

1	/*
2	* Copyright (c) 2023.
3	*
4	* This software is free software;
5	*
6	* You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7	*/
8
9	use alloc::format;
10	use core::convert::TryInto;
11
12	use zune_core::colorspace::ColorSpace;
13
14	use crate::color_convert::ycbcr_to_grayscale;
15	use crate::components::{Components, SampleRatios};
16	use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
17	use crate::errors::DecodeErrors;
18
19	/// fast 0..255 0..255 => 0..255 rounded multiplication*
20	///
21	/// Borrowed from stb
22	#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
23	#[inline]
24	fn blinn_8x8(in_val: u8, y: u8) -> u8 {
25	let t: i32 = i32::from(in_val) * i32::from(y) + `128`;
26	return ((t + (t >> `8`)) >> `8`) as u8;
27	}
28
29	#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
30	pub(crate) fn color_convert(
31	unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
32	input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
33	padded_width: usize
34	) -> Result<(), DecodeErrors> // so many parameters..
35	{
36	// maximum sampling factors are in Y-channel, no need to pass them.
37
38	if input_colorspace.num_components() == `3` && input_colorspace == output_colorspace {
39	// sort things like RGB to RGB conversion
40	copy_removing_padding(unprocessed, width, padded_width, output);
41	return Ok(());
42	}
43	if input_colorspace.num_components() == `4` && input_colorspace == output_colorspace {
44	copy_removing_padding_4x(unprocessed, width, padded_width, output);
45	return Ok(());
46	}
47	// color convert
48	match (input_colorspace, output_colorspace) {
49	(ColorSpace::YCbCr \| ColorSpace::Luma, ColorSpace::Luma) => {
50	ycbcr_to_grayscale(unprocessed[`0`], width, padded_width, output);
51	}
52	(
53	ColorSpace::YCbCr,
54	ColorSpace::RGB \| ColorSpace::RGBA \| ColorSpace::BGR \| ColorSpace::BGRA
55	) => {
56	color_convert_ycbcr(
57	unprocessed,
58	width,
59	padded_width,
60	output_colorspace,
61	color_convert_16,
62	output
63	);
64	}
65	(ColorSpace::YCCK, ColorSpace::RGB) => {
66	color_convert_ycck_to_rgb::<`3`>(
67	unprocessed,
68	width,
69	padded_width,
70	output_colorspace,
71	color_convert_16,
72	output
73	);
74	}
75
76	(ColorSpace::YCCK, ColorSpace::RGBA) => {
77	color_convert_ycck_to_rgb::<`4`>(
78	unprocessed,
79	width,
80	padded_width,
81	output_colorspace,
82	color_convert_16,
83	output
84	);
85	}
86	(ColorSpace::CMYK, ColorSpace::RGB) => {
87	color_convert_cymk_to_rgb::<`3`>(unprocessed, width, padded_width, output);
88	}
89	(ColorSpace::CMYK, ColorSpace::RGBA) => {
90	color_convert_cymk_to_rgb::<`4`>(unprocessed, width, padded_width, output);
91	}
92	// For the other components we do nothing(currently)
93	_ => {
94	let msg = format!(
95	"Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
96
97	return Err(DecodeErrors::Format(msg));
98	}
99	}
100	Ok(())
101	}
102
103	/// Copy a block to output removing padding bytes from input
104	/// if necessary
105	#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
106	fn copy_removing_padding(
107	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
108	) {
109	for (((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]) in outputimpl Iterator
110	.chunks_exact_mut(chunk_size:width * `3`)
111	.zip(mcu_block[`0`].chunks_exact(chunk_size:padded_width))
112	.zip(mcu_block[`1`].chunks_exact(chunk_size:padded_width))
113	.zip(mcu_block[`2`].chunks_exact(chunk_size:padded_width))
114	{
115	for (((pix: &mut [u8], c: &i16), y: &i16), m: &i16) in pix_w.chunks_exact_mut(chunk_size:`3`).zip(c_w).zip(m_w).zip(y_w) {
116	pix[`0`] = *c as u8;
117	pix[`1`] = *y as u8;
118	pix[`2`] = *m as u8;
119	}
120	}
121	}
122	fn copy_removing_padding_4x(
123	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
124	) {
125	for ((((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]), k_w: &[i16]) in outputimpl Iterator
126	.chunks_exact_mut(chunk_size:width * `4`)
127	.zip(mcu_block[`0`].chunks_exact(chunk_size:padded_width))
128	.zip(mcu_block[`1`].chunks_exact(chunk_size:padded_width))
129	.zip(mcu_block[`2`].chunks_exact(chunk_size:padded_width))
130	.zip(mcu_block[`3`].chunks_exact(chunk_size:padded_width))
131	{
132	for ((((pix: &mut [u8], c: &i16), y: &i16), m: &i16), k: &i16) in pix_wimpl Iterator
133	.chunks_exact_mut(chunk_size:`4`)
134	.zip(c_w)
135	.zip(m_w)
136	.zip(y_w)
137	.zip(k_w)
138	{
139	pix[`0`] = *c as u8;
140	pix[`1`] = *y as u8;
141	pix[`2`] = *m as u8;
142	pix[`3`] = *k as u8;
143	}
144	}
145	}
146	/// Convert YCCK image to rgb
147	#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
148	fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
149	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
150	output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
151	) {
152	color_convert_ycbcr(
153	mcu_block,
154	width,
155	padded_width,
156	output_colorspace,
157	color_convert_16,
158	output
159	);
160	for (pix_w: &mut [u8], m_w: &[i16]) in outputChunksExactMut<'_, u8>
161	.chunks_exact_mut(chunk_size:width * `3`)
162	.zip(mcu_block[`3`].chunks_exact(chunk_size:padded_width))
163	{
164	for (pix: &mut [u8], m: &i16) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
165	let m: u8 = (*m) as u8;
166	pix[`0`] = blinn_8x8(in_val:`255` - pix[`0`], y:m);
167	pix[`1`] = blinn_8x8(in_val:`255` - pix[`1`], y:m);
168	pix[`2`] = blinn_8x8(in_val:`255` - pix[`2`], y:m);
169	}
170	}
171	}
172
173	#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
174	fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
175	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
176	) {
177	for ((((pix_w, c_w), m_w), y_w), k_w) in output
178	.chunks_exact_mut(width * NUM_COMPONENTS)
179	.zip(mcu_block[`0`].chunks_exact(padded_width))
180	.zip(mcu_block[`1`].chunks_exact(padded_width))
181	.zip(mcu_block[`2`].chunks_exact(padded_width))
182	.zip(mcu_block[`3`].chunks_exact(padded_width))
183	{
184	for ((((pix, c), m), y), k) in pix_w
185	.chunks_exact_mut(`3`)
186	.zip(c_w)
187	.zip(m_w)
188	.zip(y_w)
189	.zip(k_w)
190	{
191	let c = *c as u8;
192	let m = *m as u8;
193	let y = *y as u8;
194	let k = *k as u8;
195
196	pix[`0`] = blinn_8x8(c, k);
197	pix[`1`] = blinn_8x8(m, k);
198	pix[`2`] = blinn_8x8(y, k);
199	}
200	}
201	}
202
203	/// Do color-conversion for interleaved MCU
204	#[allow(
205	clippy::similar_names,
206	clippy::too_many_arguments,
207	clippy::needless_pass_by_value,
208	clippy::unwrap_used
209	)]
210	fn color_convert_ycbcr(
211	mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
212	output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
213	) {
214	let num_components = output_colorspace.num_components();
215
216	let stride = width * num_components;
217	// Allocate temporary buffer for small widths less than 16.
218	let mut temp = [`0`; `64`];
219	// We need to chunk per width to ensure we can discard extra values at the end of the width.
220	// Since the encoder may pad bits to ensure the width is a multiple of 8.
221	for (((y_width, cb_width), cr_width), out) in mcu_block[`0`]
222	.chunks_exact(padded_width)
223	.zip(mcu_block[`1`].chunks_exact(padded_width))
224	.zip(mcu_block[`2`].chunks_exact(padded_width))
225	.zip(output.chunks_exact_mut(stride))
226	{
227	if width < `16` {
228	// allocate temporary buffers for the values received from idct
229	let mut y_out = [`0`; `16`];
230	let mut cb_out = [`0`; `16`];
231	let mut cr_out = [`0`; `16`];
232	// copy those small widths to that buffer
233	y_out[`0`..y_width.len()].copy_from_slice(y_width);
234	cb_out[`0`..cb_width.len()].copy_from_slice(cb_width);
235	cr_out[`0`..cr_width.len()].copy_from_slice(cr_width);
236	// we handle widths less than 16 a bit differently, allocating a temporary
237	// buffer and writing to that and then flushing to the out buffer
238	// because of the optimizations applied below,
239	(color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut `0`);
240	// copy to stride
241	out[`0`..width * num_components].copy_from_slice(&temp[`0`..width * num_components]);
242	// next
243	continue;
244	}
245
246	// Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
247	for (((y, cb), cr), out_c) in y_width
248	.chunks_exact(`16`)
249	.zip(cb_width.chunks_exact(`16`))
250	.zip(cr_width.chunks_exact(`16`))
251	.zip(out.chunks_exact_mut(`16` * num_components))
252	{
253	(color_convert_16)(
254	y.try_into().unwrap(),
255	cb.try_into().unwrap(),
256	cr.try_into().unwrap(),
257	out_c,
258	&mut `0`
259	);
260	}
261	//we have more pixels in the end that can't be handled by the main loop.
262	//move pointer back a little bit to get last 16 bytes,
263	//color convert, and overwrite
264	//This means some values will be color converted twice.
265	for ((y, cb), cr) in y_width[width - `16`..]
266	.chunks_exact(`16`)
267	.zip(cb_width[width - `16`..].chunks_exact(`16`))
268	.zip(cr_width[width - `16`..].chunks_exact(`16`))
269	.take(`1`)
270	{
271	(color_convert_16)(
272	y.try_into().unwrap(),
273	cb.try_into().unwrap(),
274	cr.try_into().unwrap(),
275	&mut temp,
276	&mut `0`
277	);
278	}
279
280	let rem = out[(width - `16`) * num_components..]
281	.chunks_exact_mut(`16` * num_components)
282	.next()
283	.unwrap();
284
285	rem.copy_from_slice(&temp[`0`..rem.len()]);
286	}
287	}
288	pub(crate) fn upsample(
289	component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
290	has_vertical_sample: bool
291	) {
292	match component.sample_ratio {
293	SampleRatios::V \| SampleRatios::HV => {
294	/*
295	When upsampling vertically sampled images, we have a certain problem
296	which is that we do not have all MCU's decoded, this usually sucks at boundaries
297	e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
298
299	To solve this we need to do two things
300
301	1. Carry over coefficients when we lack enough data to upsample
302	2. Upsample when we have enough data
303
304	To achieve (1), we store a previous row, and the current row in components themselves
305	which will later be used to make (2)
306
307	To achieve (2), we take the stored previous row(second last MCU row),
308	current row(last mcu row) and row down(first row of newly decoded MCU)
309
310	and upsample that and store it in first_row_upsample_dest, this contains
311	up-sampled coefficients for the last for the previous decoded mcu row.
312
313	The caller is then expected to process first_row_upsample_dest before processing data
314	in component.upsample_dest which stores the up-sampled components excluding the last row
315	*/
316
317	let mut dest_start = `0`;
318	let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
319
320	if i > `0` {
321	// Handle the last MCU of the previous row
322	// This wasn't up-sampled as we didn't have the row_down
323	// so we do it now
324
325	let stride = component.width_stride;
326
327	let dest = &mut component.first_row_upsample_dest[`0`..stride_bytes_written];
328
329	// get current row
330	let row = &component.row[..];
331	let row_up = &component.row_up[..];
332	let row_down = &component.raw_coeff[`0`..stride];
333	(component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
334	}
335
336	// we have the Y component width stride.
337	// this may be higher than the actual width,(2x because vertical sampling)
338	//
339	// This will not upsample the last row
340
341	// if false, do not upsample.
342	// set to false on the last row of an mcu
343	let mut upsample = `true`;
344
345	let stride = component.width_stride * component.vertical_sample;
346	let stop_offset = component.raw_coeff.len() / component.width_stride;
347	for (pos, curr_row) in component
348	.raw_coeff
349	.chunks_exact(component.width_stride)
350	.enumerate()
351	{
352	let mut dest: &mut [i16] = &mut [];
353	let mut row_up: &[i16] = &[];
354	// row below current sample
355	let mut row_down: &[i16] = &[];
356
357	// Order of ifs matters
358
359	if i == `0` && pos == `0` {
360	// first IMAGE row, row_up is the same as current row
361	// row_down is the row below.
362	row_up = &component.raw_coeff[pos * stride..(pos + `1`) * stride];
363	row_down = &component.raw_coeff[(pos + `1`) * stride..(pos + `2`) * stride];
364	} else if i > `0` && pos == `0` {
365	// first row of a new mcu, previous row was copied so use that
366	row_up = &component.row[..];
367	row_down = &component.raw_coeff[(pos + `1`) * stride..(pos + `2`) * stride];
368	} else if i == mcu_height.saturating_sub(`1`) && pos == stop_offset - `1` {
369	// last IMAGE row, adjust pointer to use previous row and current row
370	row_up = &component.raw_coeff[(pos - `1`) * stride..pos * stride];
371	row_down = &component.raw_coeff[pos * stride..(pos + `1`) * stride];
372	} else if pos > `0` && pos < stop_offset - `1` {
373	// other rows, get row up and row down relative to our current row
374	// ignore last row of each mcu
375	row_up = &component.raw_coeff[(pos - `1`) * stride..pos * stride];
376	row_down = &component.raw_coeff[(pos + `1`) * stride..(pos + `2`) * stride];
377	} else if pos == stop_offset - `1` {
378	// last MCU in a row
379	//
380	// we need a row at the next MCU but we haven't decoded that MCU yet
381	// so we should save this and when we have the next MCU,
382	// do the upsampling
383
384	// store the current row and previous row in a buffer
385	let prev_row = &component.raw_coeff[(pos - `1`) * stride..pos * stride];
386
387	component.row_up.copy_from_slice(prev_row);
388	component.row.copy_from_slice(curr_row);
389	upsample = `false`;
390	} else {
391	unreachable!("Uh oh!");
392	}
393	if upsample {
394	dest =
395	&mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
396	dest_start += stride_bytes_written;
397	}
398
399	if upsample {
400	// upsample
401	(component.up_sampler)(
402	curr_row,
403	row_up,
404	row_down,
405	upsampler_scratch_space,
406	dest
407	);
408	}
409	}
410	}
411	SampleRatios::H => {
412	assert_eq!(component.raw_coeff.len() * `2`, component.upsample_dest.len());
413
414	let raw_coeff = &component.raw_coeff;
415	let dest_coeff = &mut component.upsample_dest;
416
417	if has_vertical_sample {
418	/*
419	There have been images that have the following configurations.
420
421	Component ID:Y HS:2 VS:2 QT:0
422	Component ID:Cb HS:1 VS:1 QT:1
423	Component ID:Cr HS:1 VS:2 QT:1
424
425	This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
426	of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
427	HV sampled with respect to the image sampling factors.
428
429	So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
430	save a single line, since it doesn't suffer from boundary issues.
431
432	Now this takes care of that, saving the last MCU row in case it will be needed.
433	We save the previous row before up-sampling this row because the boundary issue is in
434	the last MCU row of the previous MCU.
435
436	PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
437	*/
438	let length = component.first_row_upsample_dest.len();
439	component
440	.first_row_upsample_dest
441	.copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
442	}
443	// up-sample each row
444	for (single_row, output_stride) in raw_coeff
445	.chunks_exact(component.width_stride)
446	.zip(dest_coeff.chunks_exact_mut(component.width_stride * `2`))
447	{
448	// upsample using the fn pointer, should only be H, so no need for
449	// row up and row down
450	(component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
451	}
452	}
453	SampleRatios::None => {}
454	};
455	}
456