1/*
2 * Copyright (c) 2023.
3 *
4 * This software is free software;
5 *
6 * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license
7 */
8
9use alloc::format;
10use core::convert::TryInto;
11
12use zune_core::colorspace::ColorSpace;
13
14use crate::color_convert::ycbcr_to_grayscale;
15use crate::components::{Components, SampleRatios};
16use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS};
17use crate::errors::DecodeErrors;
18
19/// fast 0..255 * 0..255 => 0..255 rounded multiplication
20///
21/// Borrowed from stb
22#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
23#[inline]
24fn blinn_8x8(in_val: u8, y: u8) -> u8 {
25 let t: i32 = i32::from(in_val) * i32::from(y) + 128;
26 return ((t + (t >> 8)) >> 8) as u8;
27}
28
29#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
30pub(crate) fn color_convert(
31 unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr,
32 input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize,
33 padded_width: usize
34) -> Result<(), DecodeErrors> // so many parameters..
35{
36 // maximum sampling factors are in Y-channel, no need to pass them.
37
38 if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace {
39 // sort things like RGB to RGB conversion
40 copy_removing_padding(unprocessed, width, padded_width, output);
41 return Ok(());
42 }
43 if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace {
44 copy_removing_padding_4x(unprocessed, width, padded_width, output);
45 return Ok(());
46 }
47 // color convert
48 match (input_colorspace, output_colorspace) {
49 (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => {
50 ycbcr_to_grayscale(unprocessed[0], width, padded_width, output);
51 }
52 (
53 ColorSpace::YCbCr,
54 ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA
55 ) => {
56 color_convert_ycbcr(
57 unprocessed,
58 width,
59 padded_width,
60 output_colorspace,
61 color_convert_16,
62 output
63 );
64 }
65 (ColorSpace::YCCK, ColorSpace::RGB) => {
66 color_convert_ycck_to_rgb::<3>(
67 unprocessed,
68 width,
69 padded_width,
70 output_colorspace,
71 color_convert_16,
72 output
73 );
74 }
75
76 (ColorSpace::YCCK, ColorSpace::RGBA) => {
77 color_convert_ycck_to_rgb::<4>(
78 unprocessed,
79 width,
80 padded_width,
81 output_colorspace,
82 color_convert_16,
83 output
84 );
85 }
86 (ColorSpace::CMYK, ColorSpace::RGB) => {
87 color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output);
88 }
89 (ColorSpace::CMYK, ColorSpace::RGBA) => {
90 color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output);
91 }
92 // For the other components we do nothing(currently)
93 _ => {
94 let msg = format!(
95 "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}");
96
97 return Err(DecodeErrors::Format(msg));
98 }
99 }
100 Ok(())
101}
102
103/// Copy a block to output removing padding bytes from input
104/// if necessary
105#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
106fn copy_removing_padding(
107 mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
108) {
109 for (((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]) in outputimpl Iterator
110 .chunks_exact_mut(chunk_size:width * 3)
111 .zip(mcu_block[0].chunks_exact(chunk_size:padded_width))
112 .zip(mcu_block[1].chunks_exact(chunk_size:padded_width))
113 .zip(mcu_block[2].chunks_exact(chunk_size:padded_width))
114 {
115 for (((pix: &mut [u8], c: &i16), y: &i16), m: &i16) in pix_w.chunks_exact_mut(chunk_size:3).zip(c_w).zip(m_w).zip(y_w) {
116 pix[0] = *c as u8;
117 pix[1] = *y as u8;
118 pix[2] = *m as u8;
119 }
120 }
121}
122fn copy_removing_padding_4x(
123 mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
124) {
125 for ((((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]), k_w: &[i16]) in outputimpl Iterator
126 .chunks_exact_mut(chunk_size:width * 4)
127 .zip(mcu_block[0].chunks_exact(chunk_size:padded_width))
128 .zip(mcu_block[1].chunks_exact(chunk_size:padded_width))
129 .zip(mcu_block[2].chunks_exact(chunk_size:padded_width))
130 .zip(mcu_block[3].chunks_exact(chunk_size:padded_width))
131 {
132 for ((((pix: &mut [u8], c: &i16), y: &i16), m: &i16), k: &i16) in pix_wimpl Iterator
133 .chunks_exact_mut(chunk_size:4)
134 .zip(c_w)
135 .zip(m_w)
136 .zip(y_w)
137 .zip(k_w)
138 {
139 pix[0] = *c as u8;
140 pix[1] = *y as u8;
141 pix[2] = *m as u8;
142 pix[3] = *k as u8;
143 }
144 }
145}
146/// Convert YCCK image to rgb
147#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
148fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>(
149 mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
150 output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
151) {
152 color_convert_ycbcr(
153 mcu_block,
154 width,
155 padded_width,
156 output_colorspace,
157 color_convert_16,
158 output
159 );
160 for (pix_w: &mut [u8], m_w: &[i16]) in outputChunksExactMut<'_, u8>
161 .chunks_exact_mut(chunk_size:width * 3)
162 .zip(mcu_block[3].chunks_exact(chunk_size:padded_width))
163 {
164 for (pix: &mut [u8], m: &i16) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) {
165 let m: u8 = (*m) as u8;
166 pix[0] = blinn_8x8(in_val:255 - pix[0], y:m);
167 pix[1] = blinn_8x8(in_val:255 - pix[1], y:m);
168 pix[2] = blinn_8x8(in_val:255 - pix[2], y:m);
169 }
170 }
171}
172
173#[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)]
174fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>(
175 mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8]
176) {
177 for ((((pix_w, c_w), m_w), y_w), k_w) in output
178 .chunks_exact_mut(width * NUM_COMPONENTS)
179 .zip(mcu_block[0].chunks_exact(padded_width))
180 .zip(mcu_block[1].chunks_exact(padded_width))
181 .zip(mcu_block[2].chunks_exact(padded_width))
182 .zip(mcu_block[3].chunks_exact(padded_width))
183 {
184 for ((((pix, c), m), y), k) in pix_w
185 .chunks_exact_mut(3)
186 .zip(c_w)
187 .zip(m_w)
188 .zip(y_w)
189 .zip(k_w)
190 {
191 let c = *c as u8;
192 let m = *m as u8;
193 let y = *y as u8;
194 let k = *k as u8;
195
196 pix[0] = blinn_8x8(c, k);
197 pix[1] = blinn_8x8(m, k);
198 pix[2] = blinn_8x8(y, k);
199 }
200 }
201}
202
203/// Do color-conversion for interleaved MCU
204#[allow(
205 clippy::similar_names,
206 clippy::too_many_arguments,
207 clippy::needless_pass_by_value,
208 clippy::unwrap_used
209)]
210fn color_convert_ycbcr(
211 mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize,
212 output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8]
213) {
214 let num_components = output_colorspace.num_components();
215
216 let stride = width * num_components;
217 // Allocate temporary buffer for small widths less than 16.
218 let mut temp = [0; 64];
219 // We need to chunk per width to ensure we can discard extra values at the end of the width.
220 // Since the encoder may pad bits to ensure the width is a multiple of 8.
221 for (((y_width, cb_width), cr_width), out) in mcu_block[0]
222 .chunks_exact(padded_width)
223 .zip(mcu_block[1].chunks_exact(padded_width))
224 .zip(mcu_block[2].chunks_exact(padded_width))
225 .zip(output.chunks_exact_mut(stride))
226 {
227 if width < 16 {
228 // allocate temporary buffers for the values received from idct
229 let mut y_out = [0; 16];
230 let mut cb_out = [0; 16];
231 let mut cr_out = [0; 16];
232 // copy those small widths to that buffer
233 y_out[0..y_width.len()].copy_from_slice(y_width);
234 cb_out[0..cb_width.len()].copy_from_slice(cb_width);
235 cr_out[0..cr_width.len()].copy_from_slice(cr_width);
236 // we handle widths less than 16 a bit differently, allocating a temporary
237 // buffer and writing to that and then flushing to the out buffer
238 // because of the optimizations applied below,
239 (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0);
240 // copy to stride
241 out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]);
242 // next
243 continue;
244 }
245
246 // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's.
247 for (((y, cb), cr), out_c) in y_width
248 .chunks_exact(16)
249 .zip(cb_width.chunks_exact(16))
250 .zip(cr_width.chunks_exact(16))
251 .zip(out.chunks_exact_mut(16 * num_components))
252 {
253 (color_convert_16)(
254 y.try_into().unwrap(),
255 cb.try_into().unwrap(),
256 cr.try_into().unwrap(),
257 out_c,
258 &mut 0
259 );
260 }
261 //we have more pixels in the end that can't be handled by the main loop.
262 //move pointer back a little bit to get last 16 bytes,
263 //color convert, and overwrite
264 //This means some values will be color converted twice.
265 for ((y, cb), cr) in y_width[width - 16..]
266 .chunks_exact(16)
267 .zip(cb_width[width - 16..].chunks_exact(16))
268 .zip(cr_width[width - 16..].chunks_exact(16))
269 .take(1)
270 {
271 (color_convert_16)(
272 y.try_into().unwrap(),
273 cb.try_into().unwrap(),
274 cr.try_into().unwrap(),
275 &mut temp,
276 &mut 0
277 );
278 }
279
280 let rem = out[(width - 16) * num_components..]
281 .chunks_exact_mut(16 * num_components)
282 .next()
283 .unwrap();
284
285 rem.copy_from_slice(&temp[0..rem.len()]);
286 }
287}
288pub(crate) fn upsample(
289 component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16],
290 has_vertical_sample: bool
291) {
292 match component.sample_ratio {
293 SampleRatios::V | SampleRatios::HV => {
294 /*
295 When upsampling vertically sampled images, we have a certain problem
296 which is that we do not have all MCU's decoded, this usually sucks at boundaries
297 e.g we can't upsample the last mcu row, since the row_down currently doesn't exist
298
299 To solve this we need to do two things
300
301 1. Carry over coefficients when we lack enough data to upsample
302 2. Upsample when we have enough data
303
304 To achieve (1), we store a previous row, and the current row in components themselves
305 which will later be used to make (2)
306
307 To achieve (2), we take the stored previous row(second last MCU row),
308 current row(last mcu row) and row down(first row of newly decoded MCU)
309
310 and upsample that and store it in first_row_upsample_dest, this contains
311 up-sampled coefficients for the last for the previous decoded mcu row.
312
313 The caller is then expected to process first_row_upsample_dest before processing data
314 in component.upsample_dest which stores the up-sampled components excluding the last row
315 */
316
317 let mut dest_start = 0;
318 let stride_bytes_written = component.width_stride * component.sample_ratio.sample();
319
320 if i > 0 {
321 // Handle the last MCU of the previous row
322 // This wasn't up-sampled as we didn't have the row_down
323 // so we do it now
324
325 let stride = component.width_stride;
326
327 let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written];
328
329 // get current row
330 let row = &component.row[..];
331 let row_up = &component.row_up[..];
332 let row_down = &component.raw_coeff[0..stride];
333 (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest);
334 }
335
336 // we have the Y component width stride.
337 // this may be higher than the actual width,(2x because vertical sampling)
338 //
339 // This will not upsample the last row
340
341 // if false, do not upsample.
342 // set to false on the last row of an mcu
343 let mut upsample = true;
344
345 let stride = component.width_stride * component.vertical_sample;
346 let stop_offset = component.raw_coeff.len() / component.width_stride;
347 for (pos, curr_row) in component
348 .raw_coeff
349 .chunks_exact(component.width_stride)
350 .enumerate()
351 {
352 let mut dest: &mut [i16] = &mut [];
353 let mut row_up: &[i16] = &[];
354 // row below current sample
355 let mut row_down: &[i16] = &[];
356
357 // Order of ifs matters
358
359 if i == 0 && pos == 0 {
360 // first IMAGE row, row_up is the same as current row
361 // row_down is the row below.
362 row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride];
363 row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
364 } else if i > 0 && pos == 0 {
365 // first row of a new mcu, previous row was copied so use that
366 row_up = &component.row[..];
367 row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
368 } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 {
369 // last IMAGE row, adjust pointer to use previous row and current row
370 row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
371 row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride];
372 } else if pos > 0 && pos < stop_offset - 1 {
373 // other rows, get row up and row down relative to our current row
374 // ignore last row of each mcu
375 row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride];
376 row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride];
377 } else if pos == stop_offset - 1 {
378 // last MCU in a row
379 //
380 // we need a row at the next MCU but we haven't decoded that MCU yet
381 // so we should save this and when we have the next MCU,
382 // do the upsampling
383
384 // store the current row and previous row in a buffer
385 let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride];
386
387 component.row_up.copy_from_slice(prev_row);
388 component.row.copy_from_slice(curr_row);
389 upsample = false;
390 } else {
391 unreachable!("Uh oh!");
392 }
393 if upsample {
394 dest =
395 &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written];
396 dest_start += stride_bytes_written;
397 }
398
399 if upsample {
400 // upsample
401 (component.up_sampler)(
402 curr_row,
403 row_up,
404 row_down,
405 upsampler_scratch_space,
406 dest
407 );
408 }
409 }
410 }
411 SampleRatios::H => {
412 assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len());
413
414 let raw_coeff = &component.raw_coeff;
415 let dest_coeff = &mut component.upsample_dest;
416
417 if has_vertical_sample {
418 /*
419 There have been images that have the following configurations.
420
421 Component ID:Y HS:2 VS:2 QT:0
422 Component ID:Cb HS:1 VS:1 QT:1
423 Component ID:Cr HS:1 VS:2 QT:1
424
425 This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because
426 of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is
427 HV sampled with respect to the image sampling factors.
428
429 So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to
430 save a single line, since it doesn't suffer from boundary issues.
431
432 Now this takes care of that, saving the last MCU row in case it will be needed.
433 We save the previous row before up-sampling this row because the boundary issue is in
434 the last MCU row of the previous MCU.
435
436 PS(cae): I can't add the image to the repo as it is nsfw, but can send if required
437 */
438 let length = component.first_row_upsample_dest.len();
439 component
440 .first_row_upsample_dest
441 .copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap());
442 }
443 // up-sample each row
444 for (single_row, output_stride) in raw_coeff
445 .chunks_exact(component.width_stride)
446 .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2))
447 {
448 // upsample using the fn pointer, should only be H, so no need for
449 // row up and row down
450 (component.up_sampler)(single_row, &[], &[], &mut [], output_stride);
451 }
452 }
453 SampleRatios::None => {}
454 };
455}
456