1 | /* |
2 | * Copyright (c) 2023. |
3 | * |
4 | * This software is free software; |
5 | * |
6 | * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license |
7 | */ |
8 | |
9 | use alloc::format; |
10 | use core::convert::TryInto; |
11 | |
12 | use zune_core::colorspace::ColorSpace; |
13 | |
14 | use crate::color_convert::ycbcr_to_grayscale; |
15 | use crate::components::{Components, SampleRatios}; |
16 | use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS}; |
17 | use crate::errors::DecodeErrors; |
18 | |
19 | /// fast 0..255 * 0..255 => 0..255 rounded multiplication |
20 | /// |
21 | /// Borrowed from stb |
22 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
23 | #[inline ] |
24 | fn blinn_8x8(in_val: u8, y: u8) -> u8 { |
25 | let t: i32 = i32::from(in_val) * i32::from(y) + 128; |
26 | return ((t + (t >> 8)) >> 8) as u8; |
27 | } |
28 | |
29 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
30 | pub(crate) fn color_convert( |
31 | unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr, |
32 | input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize, |
33 | padded_width: usize |
34 | ) -> Result<(), DecodeErrors> // so many parameters.. |
35 | { |
36 | // maximum sampling factors are in Y-channel, no need to pass them. |
37 | |
38 | if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace { |
39 | // sort things like RGB to RGB conversion |
40 | copy_removing_padding(unprocessed, width, padded_width, output); |
41 | return Ok(()); |
42 | } |
43 | if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace { |
44 | copy_removing_padding_4x(unprocessed, width, padded_width, output); |
45 | return Ok(()); |
46 | } |
47 | // color convert |
48 | match (input_colorspace, output_colorspace) { |
49 | (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => { |
50 | ycbcr_to_grayscale(unprocessed[0], width, padded_width, output); |
51 | } |
52 | ( |
53 | ColorSpace::YCbCr, |
54 | ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA |
55 | ) => { |
56 | color_convert_ycbcr( |
57 | unprocessed, |
58 | width, |
59 | padded_width, |
60 | output_colorspace, |
61 | color_convert_16, |
62 | output |
63 | ); |
64 | } |
65 | (ColorSpace::YCCK, ColorSpace::RGB) => { |
66 | color_convert_ycck_to_rgb::<3>( |
67 | unprocessed, |
68 | width, |
69 | padded_width, |
70 | output_colorspace, |
71 | color_convert_16, |
72 | output |
73 | ); |
74 | } |
75 | |
76 | (ColorSpace::YCCK, ColorSpace::RGBA) => { |
77 | color_convert_ycck_to_rgb::<4>( |
78 | unprocessed, |
79 | width, |
80 | padded_width, |
81 | output_colorspace, |
82 | color_convert_16, |
83 | output |
84 | ); |
85 | } |
86 | (ColorSpace::CMYK, ColorSpace::RGB) => { |
87 | color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output); |
88 | } |
89 | (ColorSpace::CMYK, ColorSpace::RGBA) => { |
90 | color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output); |
91 | } |
92 | // For the other components we do nothing(currently) |
93 | _ => { |
94 | let msg = format!( |
95 | "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}" ); |
96 | |
97 | return Err(DecodeErrors::Format(msg)); |
98 | } |
99 | } |
100 | Ok(()) |
101 | } |
102 | |
103 | /// Copy a block to output removing padding bytes from input |
104 | /// if necessary |
105 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
106 | fn copy_removing_padding( |
107 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] |
108 | ) { |
109 | for (((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]) in outputimpl Iterator |
110 | .chunks_exact_mut(chunk_size:width * 3) |
111 | .zip(mcu_block[0].chunks_exact(chunk_size:padded_width)) |
112 | .zip(mcu_block[1].chunks_exact(chunk_size:padded_width)) |
113 | .zip(mcu_block[2].chunks_exact(chunk_size:padded_width)) |
114 | { |
115 | for (((pix: &mut [u8], c: &i16), y: &i16), m: &i16) in pix_w.chunks_exact_mut(chunk_size:3).zip(c_w).zip(m_w).zip(y_w) { |
116 | pix[0] = *c as u8; |
117 | pix[1] = *y as u8; |
118 | pix[2] = *m as u8; |
119 | } |
120 | } |
121 | } |
122 | fn copy_removing_padding_4x( |
123 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] |
124 | ) { |
125 | for ((((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]), k_w: &[i16]) in outputimpl Iterator |
126 | .chunks_exact_mut(chunk_size:width * 4) |
127 | .zip(mcu_block[0].chunks_exact(chunk_size:padded_width)) |
128 | .zip(mcu_block[1].chunks_exact(chunk_size:padded_width)) |
129 | .zip(mcu_block[2].chunks_exact(chunk_size:padded_width)) |
130 | .zip(mcu_block[3].chunks_exact(chunk_size:padded_width)) |
131 | { |
132 | for ((((pix: &mut [u8], c: &i16), y: &i16), m: &i16), k: &i16) in pix_wimpl Iterator |
133 | .chunks_exact_mut(chunk_size:4) |
134 | .zip(c_w) |
135 | .zip(m_w) |
136 | .zip(y_w) |
137 | .zip(k_w) |
138 | { |
139 | pix[0] = *c as u8; |
140 | pix[1] = *y as u8; |
141 | pix[2] = *m as u8; |
142 | pix[3] = *k as u8; |
143 | } |
144 | } |
145 | } |
146 | /// Convert YCCK image to rgb |
147 | #[allow (clippy::cast_possible_truncation, clippy::cast_sign_loss)] |
148 | fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>( |
149 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, |
150 | output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8] |
151 | ) { |
152 | color_convert_ycbcr( |
153 | mcu_block, |
154 | width, |
155 | padded_width, |
156 | output_colorspace, |
157 | color_convert_16, |
158 | output |
159 | ); |
160 | for (pix_w: &mut [u8], m_w: &[i16]) in outputChunksExactMut<'_, u8> |
161 | .chunks_exact_mut(chunk_size:width * 3) |
162 | .zip(mcu_block[3].chunks_exact(chunk_size:padded_width)) |
163 | { |
164 | for (pix: &mut [u8], m: &i16) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) { |
165 | let m: u8 = (*m) as u8; |
166 | pix[0] = blinn_8x8(in_val:255 - pix[0], y:m); |
167 | pix[1] = blinn_8x8(in_val:255 - pix[1], y:m); |
168 | pix[2] = blinn_8x8(in_val:255 - pix[2], y:m); |
169 | } |
170 | } |
171 | } |
172 | |
173 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
174 | fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>( |
175 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] |
176 | ) { |
177 | for ((((pix_w, c_w), m_w), y_w), k_w) in output |
178 | .chunks_exact_mut(width * NUM_COMPONENTS) |
179 | .zip(mcu_block[0].chunks_exact(padded_width)) |
180 | .zip(mcu_block[1].chunks_exact(padded_width)) |
181 | .zip(mcu_block[2].chunks_exact(padded_width)) |
182 | .zip(mcu_block[3].chunks_exact(padded_width)) |
183 | { |
184 | for ((((pix, c), m), y), k) in pix_w |
185 | .chunks_exact_mut(3) |
186 | .zip(c_w) |
187 | .zip(m_w) |
188 | .zip(y_w) |
189 | .zip(k_w) |
190 | { |
191 | let c = *c as u8; |
192 | let m = *m as u8; |
193 | let y = *y as u8; |
194 | let k = *k as u8; |
195 | |
196 | pix[0] = blinn_8x8(c, k); |
197 | pix[1] = blinn_8x8(m, k); |
198 | pix[2] = blinn_8x8(y, k); |
199 | } |
200 | } |
201 | } |
202 | |
203 | /// Do color-conversion for interleaved MCU |
204 | #[allow ( |
205 | clippy::similar_names, |
206 | clippy::too_many_arguments, |
207 | clippy::needless_pass_by_value, |
208 | clippy::unwrap_used |
209 | )] |
210 | fn color_convert_ycbcr( |
211 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, |
212 | output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8] |
213 | ) { |
214 | let num_components = output_colorspace.num_components(); |
215 | |
216 | let stride = width * num_components; |
217 | // Allocate temporary buffer for small widths less than 16. |
218 | let mut temp = [0; 64]; |
219 | // We need to chunk per width to ensure we can discard extra values at the end of the width. |
220 | // Since the encoder may pad bits to ensure the width is a multiple of 8. |
221 | for (((y_width, cb_width), cr_width), out) in mcu_block[0] |
222 | .chunks_exact(padded_width) |
223 | .zip(mcu_block[1].chunks_exact(padded_width)) |
224 | .zip(mcu_block[2].chunks_exact(padded_width)) |
225 | .zip(output.chunks_exact_mut(stride)) |
226 | { |
227 | if width < 16 { |
228 | // allocate temporary buffers for the values received from idct |
229 | let mut y_out = [0; 16]; |
230 | let mut cb_out = [0; 16]; |
231 | let mut cr_out = [0; 16]; |
232 | // copy those small widths to that buffer |
233 | y_out[0..y_width.len()].copy_from_slice(y_width); |
234 | cb_out[0..cb_width.len()].copy_from_slice(cb_width); |
235 | cr_out[0..cr_width.len()].copy_from_slice(cr_width); |
236 | // we handle widths less than 16 a bit differently, allocating a temporary |
237 | // buffer and writing to that and then flushing to the out buffer |
238 | // because of the optimizations applied below, |
239 | (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0); |
240 | // copy to stride |
241 | out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]); |
242 | // next |
243 | continue; |
244 | } |
245 | |
246 | // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's. |
247 | for (((y, cb), cr), out_c) in y_width |
248 | .chunks_exact(16) |
249 | .zip(cb_width.chunks_exact(16)) |
250 | .zip(cr_width.chunks_exact(16)) |
251 | .zip(out.chunks_exact_mut(16 * num_components)) |
252 | { |
253 | (color_convert_16)( |
254 | y.try_into().unwrap(), |
255 | cb.try_into().unwrap(), |
256 | cr.try_into().unwrap(), |
257 | out_c, |
258 | &mut 0 |
259 | ); |
260 | } |
261 | //we have more pixels in the end that can't be handled by the main loop. |
262 | //move pointer back a little bit to get last 16 bytes, |
263 | //color convert, and overwrite |
264 | //This means some values will be color converted twice. |
265 | for ((y, cb), cr) in y_width[width - 16..] |
266 | .chunks_exact(16) |
267 | .zip(cb_width[width - 16..].chunks_exact(16)) |
268 | .zip(cr_width[width - 16..].chunks_exact(16)) |
269 | .take(1) |
270 | { |
271 | (color_convert_16)( |
272 | y.try_into().unwrap(), |
273 | cb.try_into().unwrap(), |
274 | cr.try_into().unwrap(), |
275 | &mut temp, |
276 | &mut 0 |
277 | ); |
278 | } |
279 | |
280 | let rem = out[(width - 16) * num_components..] |
281 | .chunks_exact_mut(16 * num_components) |
282 | .next() |
283 | .unwrap(); |
284 | |
285 | rem.copy_from_slice(&temp[0..rem.len()]); |
286 | } |
287 | } |
288 | pub(crate) fn upsample( |
289 | component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16], |
290 | has_vertical_sample: bool |
291 | ) { |
292 | match component.sample_ratio { |
293 | SampleRatios::V | SampleRatios::HV => { |
294 | /* |
295 | When upsampling vertically sampled images, we have a certain problem |
296 | which is that we do not have all MCU's decoded, this usually sucks at boundaries |
297 | e.g we can't upsample the last mcu row, since the row_down currently doesn't exist |
298 | |
299 | To solve this we need to do two things |
300 | |
301 | 1. Carry over coefficients when we lack enough data to upsample |
302 | 2. Upsample when we have enough data |
303 | |
304 | To achieve (1), we store a previous row, and the current row in components themselves |
305 | which will later be used to make (2) |
306 | |
307 | To achieve (2), we take the stored previous row(second last MCU row), |
308 | current row(last mcu row) and row down(first row of newly decoded MCU) |
309 | |
310 | and upsample that and store it in first_row_upsample_dest, this contains |
311 | up-sampled coefficients for the last for the previous decoded mcu row. |
312 | |
313 | The caller is then expected to process first_row_upsample_dest before processing data |
314 | in component.upsample_dest which stores the up-sampled components excluding the last row |
315 | */ |
316 | |
317 | let mut dest_start = 0; |
318 | let stride_bytes_written = component.width_stride * component.sample_ratio.sample(); |
319 | |
320 | if i > 0 { |
321 | // Handle the last MCU of the previous row |
322 | // This wasn't up-sampled as we didn't have the row_down |
323 | // so we do it now |
324 | |
325 | let stride = component.width_stride; |
326 | |
327 | let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written]; |
328 | |
329 | // get current row |
330 | let row = &component.row[..]; |
331 | let row_up = &component.row_up[..]; |
332 | let row_down = &component.raw_coeff[0..stride]; |
333 | (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest); |
334 | } |
335 | |
336 | // we have the Y component width stride. |
337 | // this may be higher than the actual width,(2x because vertical sampling) |
338 | // |
339 | // This will not upsample the last row |
340 | |
341 | // if false, do not upsample. |
342 | // set to false on the last row of an mcu |
343 | let mut upsample = true; |
344 | |
345 | let stride = component.width_stride * component.vertical_sample; |
346 | let stop_offset = component.raw_coeff.len() / component.width_stride; |
347 | for (pos, curr_row) in component |
348 | .raw_coeff |
349 | .chunks_exact(component.width_stride) |
350 | .enumerate() |
351 | { |
352 | let mut dest: &mut [i16] = &mut []; |
353 | let mut row_up: &[i16] = &[]; |
354 | // row below current sample |
355 | let mut row_down: &[i16] = &[]; |
356 | |
357 | // Order of ifs matters |
358 | |
359 | if i == 0 && pos == 0 { |
360 | // first IMAGE row, row_up is the same as current row |
361 | // row_down is the row below. |
362 | row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride]; |
363 | row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; |
364 | } else if i > 0 && pos == 0 { |
365 | // first row of a new mcu, previous row was copied so use that |
366 | row_up = &component.row[..]; |
367 | row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; |
368 | } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 { |
369 | // last IMAGE row, adjust pointer to use previous row and current row |
370 | row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride]; |
371 | row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride]; |
372 | } else if pos > 0 && pos < stop_offset - 1 { |
373 | // other rows, get row up and row down relative to our current row |
374 | // ignore last row of each mcu |
375 | row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride]; |
376 | row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; |
377 | } else if pos == stop_offset - 1 { |
378 | // last MCU in a row |
379 | // |
380 | // we need a row at the next MCU but we haven't decoded that MCU yet |
381 | // so we should save this and when we have the next MCU, |
382 | // do the upsampling |
383 | |
384 | // store the current row and previous row in a buffer |
385 | let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride]; |
386 | |
387 | component.row_up.copy_from_slice(prev_row); |
388 | component.row.copy_from_slice(curr_row); |
389 | upsample = false; |
390 | } else { |
391 | unreachable!("Uh oh!" ); |
392 | } |
393 | if upsample { |
394 | dest = |
395 | &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written]; |
396 | dest_start += stride_bytes_written; |
397 | } |
398 | |
399 | if upsample { |
400 | // upsample |
401 | (component.up_sampler)( |
402 | curr_row, |
403 | row_up, |
404 | row_down, |
405 | upsampler_scratch_space, |
406 | dest |
407 | ); |
408 | } |
409 | } |
410 | } |
411 | SampleRatios::H => { |
412 | assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len()); |
413 | |
414 | let raw_coeff = &component.raw_coeff; |
415 | let dest_coeff = &mut component.upsample_dest; |
416 | |
417 | if has_vertical_sample { |
418 | /* |
419 | There have been images that have the following configurations. |
420 | |
421 | Component ID:Y HS:2 VS:2 QT:0 |
422 | Component ID:Cb HS:1 VS:1 QT:1 |
423 | Component ID:Cr HS:1 VS:2 QT:1 |
424 | |
425 | This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because |
426 | of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is |
427 | HV sampled with respect to the image sampling factors. |
428 | |
429 | So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to |
430 | save a single line, since it doesn't suffer from boundary issues. |
431 | |
432 | Now this takes care of that, saving the last MCU row in case it will be needed. |
433 | We save the previous row before up-sampling this row because the boundary issue is in |
434 | the last MCU row of the previous MCU. |
435 | |
436 | PS(cae): I can't add the image to the repo as it is nsfw, but can send if required |
437 | */ |
438 | let length = component.first_row_upsample_dest.len(); |
439 | component |
440 | .first_row_upsample_dest |
441 | .copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap()); |
442 | } |
443 | // up-sample each row |
444 | for (single_row, output_stride) in raw_coeff |
445 | .chunks_exact(component.width_stride) |
446 | .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2)) |
447 | { |
448 | // upsample using the fn pointer, should only be H, so no need for |
449 | // row up and row down |
450 | (component.up_sampler)(single_row, &[], &[], &mut [], output_stride); |
451 | } |
452 | } |
453 | SampleRatios::None => {} |
454 | }; |
455 | } |
456 | |