| 1 | /* |
| 2 | * Copyright (c) 2023. |
| 3 | * |
| 4 | * This software is free software; |
| 5 | * |
| 6 | * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license |
| 7 | */ |
| 8 | |
| 9 | use alloc::format; |
| 10 | use core::convert::TryInto; |
| 11 | |
| 12 | use zune_core::colorspace::ColorSpace; |
| 13 | |
| 14 | use crate::color_convert::ycbcr_to_grayscale; |
| 15 | use crate::components::{Components, SampleRatios}; |
| 16 | use crate::decoder::{ColorConvert16Ptr, MAX_COMPONENTS}; |
| 17 | use crate::errors::DecodeErrors; |
| 18 | |
| 19 | /// fast 0..255 * 0..255 => 0..255 rounded multiplication |
| 20 | /// |
| 21 | /// Borrowed from stb |
| 22 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
| 23 | #[inline ] |
| 24 | fn blinn_8x8(in_val: u8, y: u8) -> u8 { |
| 25 | let t: i32 = i32::from(in_val) * i32::from(y) + 128; |
| 26 | return ((t + (t >> 8)) >> 8) as u8; |
| 27 | } |
| 28 | |
| 29 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
| 30 | pub(crate) fn color_convert( |
| 31 | unprocessed: &[&[i16]; MAX_COMPONENTS], color_convert_16: ColorConvert16Ptr, |
| 32 | input_colorspace: ColorSpace, output_colorspace: ColorSpace, output: &mut [u8], width: usize, |
| 33 | padded_width: usize |
| 34 | ) -> Result<(), DecodeErrors> // so many parameters.. |
| 35 | { |
| 36 | // maximum sampling factors are in Y-channel, no need to pass them. |
| 37 | |
| 38 | if input_colorspace.num_components() == 3 && input_colorspace == output_colorspace { |
| 39 | // sort things like RGB to RGB conversion |
| 40 | copy_removing_padding(unprocessed, width, padded_width, output); |
| 41 | return Ok(()); |
| 42 | } |
| 43 | if input_colorspace.num_components() == 4 && input_colorspace == output_colorspace { |
| 44 | copy_removing_padding_4x(unprocessed, width, padded_width, output); |
| 45 | return Ok(()); |
| 46 | } |
| 47 | // color convert |
| 48 | match (input_colorspace, output_colorspace) { |
| 49 | (ColorSpace::YCbCr | ColorSpace::Luma, ColorSpace::Luma) => { |
| 50 | ycbcr_to_grayscale(unprocessed[0], width, padded_width, output); |
| 51 | } |
| 52 | ( |
| 53 | ColorSpace::YCbCr, |
| 54 | ColorSpace::RGB | ColorSpace::RGBA | ColorSpace::BGR | ColorSpace::BGRA |
| 55 | ) => { |
| 56 | color_convert_ycbcr( |
| 57 | unprocessed, |
| 58 | width, |
| 59 | padded_width, |
| 60 | output_colorspace, |
| 61 | color_convert_16, |
| 62 | output |
| 63 | ); |
| 64 | } |
| 65 | (ColorSpace::YCCK, ColorSpace::RGB) => { |
| 66 | color_convert_ycck_to_rgb::<3>( |
| 67 | unprocessed, |
| 68 | width, |
| 69 | padded_width, |
| 70 | output_colorspace, |
| 71 | color_convert_16, |
| 72 | output |
| 73 | ); |
| 74 | } |
| 75 | |
| 76 | (ColorSpace::YCCK, ColorSpace::RGBA) => { |
| 77 | color_convert_ycck_to_rgb::<4>( |
| 78 | unprocessed, |
| 79 | width, |
| 80 | padded_width, |
| 81 | output_colorspace, |
| 82 | color_convert_16, |
| 83 | output |
| 84 | ); |
| 85 | } |
| 86 | (ColorSpace::CMYK, ColorSpace::RGB) => { |
| 87 | color_convert_cymk_to_rgb::<3>(unprocessed, width, padded_width, output); |
| 88 | } |
| 89 | (ColorSpace::CMYK, ColorSpace::RGBA) => { |
| 90 | color_convert_cymk_to_rgb::<4>(unprocessed, width, padded_width, output); |
| 91 | } |
| 92 | // For the other components we do nothing(currently) |
| 93 | _ => { |
| 94 | let msg = format!( |
| 95 | "Unimplemented colorspace mapping from {input_colorspace:?} to {output_colorspace:?}" ); |
| 96 | |
| 97 | return Err(DecodeErrors::Format(msg)); |
| 98 | } |
| 99 | } |
| 100 | Ok(()) |
| 101 | } |
| 102 | |
| 103 | /// Copy a block to output removing padding bytes from input |
| 104 | /// if necessary |
| 105 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
| 106 | fn copy_removing_padding( |
| 107 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] |
| 108 | ) { |
| 109 | for (((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]) in outputimpl Iterator |
| 110 | .chunks_exact_mut(chunk_size:width * 3) |
| 111 | .zip(mcu_block[0].chunks_exact(chunk_size:padded_width)) |
| 112 | .zip(mcu_block[1].chunks_exact(chunk_size:padded_width)) |
| 113 | .zip(mcu_block[2].chunks_exact(chunk_size:padded_width)) |
| 114 | { |
| 115 | for (((pix: &mut [u8], c: &i16), y: &i16), m: &i16) in pix_w.chunks_exact_mut(chunk_size:3).zip(c_w).zip(m_w).zip(y_w) { |
| 116 | pix[0] = *c as u8; |
| 117 | pix[1] = *y as u8; |
| 118 | pix[2] = *m as u8; |
| 119 | } |
| 120 | } |
| 121 | } |
| 122 | fn copy_removing_padding_4x( |
| 123 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] |
| 124 | ) { |
| 125 | for ((((pix_w: &mut [u8], c_w: &[i16]), m_w: &[i16]), y_w: &[i16]), k_w: &[i16]) in outputimpl Iterator |
| 126 | .chunks_exact_mut(chunk_size:width * 4) |
| 127 | .zip(mcu_block[0].chunks_exact(chunk_size:padded_width)) |
| 128 | .zip(mcu_block[1].chunks_exact(chunk_size:padded_width)) |
| 129 | .zip(mcu_block[2].chunks_exact(chunk_size:padded_width)) |
| 130 | .zip(mcu_block[3].chunks_exact(chunk_size:padded_width)) |
| 131 | { |
| 132 | for ((((pix: &mut [u8], c: &i16), y: &i16), m: &i16), k: &i16) in pix_wimpl Iterator |
| 133 | .chunks_exact_mut(chunk_size:4) |
| 134 | .zip(c_w) |
| 135 | .zip(m_w) |
| 136 | .zip(y_w) |
| 137 | .zip(k_w) |
| 138 | { |
| 139 | pix[0] = *c as u8; |
| 140 | pix[1] = *y as u8; |
| 141 | pix[2] = *m as u8; |
| 142 | pix[3] = *k as u8; |
| 143 | } |
| 144 | } |
| 145 | } |
| 146 | /// Convert YCCK image to rgb |
| 147 | #[allow (clippy::cast_possible_truncation, clippy::cast_sign_loss)] |
| 148 | fn color_convert_ycck_to_rgb<const NUM_COMPONENTS: usize>( |
| 149 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, |
| 150 | output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8] |
| 151 | ) { |
| 152 | color_convert_ycbcr( |
| 153 | mcu_block, |
| 154 | width, |
| 155 | padded_width, |
| 156 | output_colorspace, |
| 157 | color_convert_16, |
| 158 | output |
| 159 | ); |
| 160 | for (pix_w: &mut [u8], m_w: &[i16]) in outputChunksExactMut<'_, u8> |
| 161 | .chunks_exact_mut(chunk_size:width * 3) |
| 162 | .zip(mcu_block[3].chunks_exact(chunk_size:padded_width)) |
| 163 | { |
| 164 | for (pix: &mut [u8], m: &i16) in pix_w.chunks_exact_mut(NUM_COMPONENTS).zip(m_w) { |
| 165 | let m: u8 = (*m) as u8; |
| 166 | pix[0] = blinn_8x8(in_val:255 - pix[0], y:m); |
| 167 | pix[1] = blinn_8x8(in_val:255 - pix[1], y:m); |
| 168 | pix[2] = blinn_8x8(in_val:255 - pix[2], y:m); |
| 169 | } |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | #[allow (clippy::cast_sign_loss, clippy::cast_possible_truncation)] |
| 174 | fn color_convert_cymk_to_rgb<const NUM_COMPONENTS: usize>( |
| 175 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, output: &mut [u8] |
| 176 | ) { |
| 177 | for ((((pix_w, c_w), m_w), y_w), k_w) in output |
| 178 | .chunks_exact_mut(width * NUM_COMPONENTS) |
| 179 | .zip(mcu_block[0].chunks_exact(padded_width)) |
| 180 | .zip(mcu_block[1].chunks_exact(padded_width)) |
| 181 | .zip(mcu_block[2].chunks_exact(padded_width)) |
| 182 | .zip(mcu_block[3].chunks_exact(padded_width)) |
| 183 | { |
| 184 | for ((((pix, c), m), y), k) in pix_w |
| 185 | .chunks_exact_mut(3) |
| 186 | .zip(c_w) |
| 187 | .zip(m_w) |
| 188 | .zip(y_w) |
| 189 | .zip(k_w) |
| 190 | { |
| 191 | let c = *c as u8; |
| 192 | let m = *m as u8; |
| 193 | let y = *y as u8; |
| 194 | let k = *k as u8; |
| 195 | |
| 196 | pix[0] = blinn_8x8(c, k); |
| 197 | pix[1] = blinn_8x8(m, k); |
| 198 | pix[2] = blinn_8x8(y, k); |
| 199 | } |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /// Do color-conversion for interleaved MCU |
| 204 | #[allow ( |
| 205 | clippy::similar_names, |
| 206 | clippy::too_many_arguments, |
| 207 | clippy::needless_pass_by_value, |
| 208 | clippy::unwrap_used |
| 209 | )] |
| 210 | fn color_convert_ycbcr( |
| 211 | mcu_block: &[&[i16]; MAX_COMPONENTS], width: usize, padded_width: usize, |
| 212 | output_colorspace: ColorSpace, color_convert_16: ColorConvert16Ptr, output: &mut [u8] |
| 213 | ) { |
| 214 | let num_components = output_colorspace.num_components(); |
| 215 | |
| 216 | let stride = width * num_components; |
| 217 | // Allocate temporary buffer for small widths less than 16. |
| 218 | let mut temp = [0; 64]; |
| 219 | // We need to chunk per width to ensure we can discard extra values at the end of the width. |
| 220 | // Since the encoder may pad bits to ensure the width is a multiple of 8. |
| 221 | for (((y_width, cb_width), cr_width), out) in mcu_block[0] |
| 222 | .chunks_exact(padded_width) |
| 223 | .zip(mcu_block[1].chunks_exact(padded_width)) |
| 224 | .zip(mcu_block[2].chunks_exact(padded_width)) |
| 225 | .zip(output.chunks_exact_mut(stride)) |
| 226 | { |
| 227 | if width < 16 { |
| 228 | // allocate temporary buffers for the values received from idct |
| 229 | let mut y_out = [0; 16]; |
| 230 | let mut cb_out = [0; 16]; |
| 231 | let mut cr_out = [0; 16]; |
| 232 | // copy those small widths to that buffer |
| 233 | y_out[0..y_width.len()].copy_from_slice(y_width); |
| 234 | cb_out[0..cb_width.len()].copy_from_slice(cb_width); |
| 235 | cr_out[0..cr_width.len()].copy_from_slice(cr_width); |
| 236 | // we handle widths less than 16 a bit differently, allocating a temporary |
| 237 | // buffer and writing to that and then flushing to the out buffer |
| 238 | // because of the optimizations applied below, |
| 239 | (color_convert_16)(&y_out, &cb_out, &cr_out, &mut temp, &mut 0); |
| 240 | // copy to stride |
| 241 | out[0..width * num_components].copy_from_slice(&temp[0..width * num_components]); |
| 242 | // next |
| 243 | continue; |
| 244 | } |
| 245 | |
| 246 | // Chunk in outputs of 16 to pass to color_convert as an array of 16 i16's. |
| 247 | for (((y, cb), cr), out_c) in y_width |
| 248 | .chunks_exact(16) |
| 249 | .zip(cb_width.chunks_exact(16)) |
| 250 | .zip(cr_width.chunks_exact(16)) |
| 251 | .zip(out.chunks_exact_mut(16 * num_components)) |
| 252 | { |
| 253 | (color_convert_16)( |
| 254 | y.try_into().unwrap(), |
| 255 | cb.try_into().unwrap(), |
| 256 | cr.try_into().unwrap(), |
| 257 | out_c, |
| 258 | &mut 0 |
| 259 | ); |
| 260 | } |
| 261 | //we have more pixels in the end that can't be handled by the main loop. |
| 262 | //move pointer back a little bit to get last 16 bytes, |
| 263 | //color convert, and overwrite |
| 264 | //This means some values will be color converted twice. |
| 265 | for ((y, cb), cr) in y_width[width - 16..] |
| 266 | .chunks_exact(16) |
| 267 | .zip(cb_width[width - 16..].chunks_exact(16)) |
| 268 | .zip(cr_width[width - 16..].chunks_exact(16)) |
| 269 | .take(1) |
| 270 | { |
| 271 | (color_convert_16)( |
| 272 | y.try_into().unwrap(), |
| 273 | cb.try_into().unwrap(), |
| 274 | cr.try_into().unwrap(), |
| 275 | &mut temp, |
| 276 | &mut 0 |
| 277 | ); |
| 278 | } |
| 279 | |
| 280 | let rem = out[(width - 16) * num_components..] |
| 281 | .chunks_exact_mut(16 * num_components) |
| 282 | .next() |
| 283 | .unwrap(); |
| 284 | |
| 285 | rem.copy_from_slice(&temp[0..rem.len()]); |
| 286 | } |
| 287 | } |
| 288 | pub(crate) fn upsample( |
| 289 | component: &mut Components, mcu_height: usize, i: usize, upsampler_scratch_space: &mut [i16], |
| 290 | has_vertical_sample: bool |
| 291 | ) { |
| 292 | match component.sample_ratio { |
| 293 | SampleRatios::V | SampleRatios::HV => { |
| 294 | /* |
| 295 | When upsampling vertically sampled images, we have a certain problem |
| 296 | which is that we do not have all MCU's decoded, this usually sucks at boundaries |
| 297 | e.g we can't upsample the last mcu row, since the row_down currently doesn't exist |
| 298 | |
| 299 | To solve this we need to do two things |
| 300 | |
| 301 | 1. Carry over coefficients when we lack enough data to upsample |
| 302 | 2. Upsample when we have enough data |
| 303 | |
| 304 | To achieve (1), we store a previous row, and the current row in components themselves |
| 305 | which will later be used to make (2) |
| 306 | |
| 307 | To achieve (2), we take the stored previous row(second last MCU row), |
| 308 | current row(last mcu row) and row down(first row of newly decoded MCU) |
| 309 | |
| 310 | and upsample that and store it in first_row_upsample_dest, this contains |
| 311 | up-sampled coefficients for the last for the previous decoded mcu row. |
| 312 | |
| 313 | The caller is then expected to process first_row_upsample_dest before processing data |
| 314 | in component.upsample_dest which stores the up-sampled components excluding the last row |
| 315 | */ |
| 316 | |
| 317 | let mut dest_start = 0; |
| 318 | let stride_bytes_written = component.width_stride * component.sample_ratio.sample(); |
| 319 | |
| 320 | if i > 0 { |
| 321 | // Handle the last MCU of the previous row |
| 322 | // This wasn't up-sampled as we didn't have the row_down |
| 323 | // so we do it now |
| 324 | |
| 325 | let stride = component.width_stride; |
| 326 | |
| 327 | let dest = &mut component.first_row_upsample_dest[0..stride_bytes_written]; |
| 328 | |
| 329 | // get current row |
| 330 | let row = &component.row[..]; |
| 331 | let row_up = &component.row_up[..]; |
| 332 | let row_down = &component.raw_coeff[0..stride]; |
| 333 | (component.up_sampler)(row, row_up, row_down, upsampler_scratch_space, dest); |
| 334 | } |
| 335 | |
| 336 | // we have the Y component width stride. |
| 337 | // this may be higher than the actual width,(2x because vertical sampling) |
| 338 | // |
| 339 | // This will not upsample the last row |
| 340 | |
| 341 | // if false, do not upsample. |
| 342 | // set to false on the last row of an mcu |
| 343 | let mut upsample = true; |
| 344 | |
| 345 | let stride = component.width_stride * component.vertical_sample; |
| 346 | let stop_offset = component.raw_coeff.len() / component.width_stride; |
| 347 | for (pos, curr_row) in component |
| 348 | .raw_coeff |
| 349 | .chunks_exact(component.width_stride) |
| 350 | .enumerate() |
| 351 | { |
| 352 | let mut dest: &mut [i16] = &mut []; |
| 353 | let mut row_up: &[i16] = &[]; |
| 354 | // row below current sample |
| 355 | let mut row_down: &[i16] = &[]; |
| 356 | |
| 357 | // Order of ifs matters |
| 358 | |
| 359 | if i == 0 && pos == 0 { |
| 360 | // first IMAGE row, row_up is the same as current row |
| 361 | // row_down is the row below. |
| 362 | row_up = &component.raw_coeff[pos * stride..(pos + 1) * stride]; |
| 363 | row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; |
| 364 | } else if i > 0 && pos == 0 { |
| 365 | // first row of a new mcu, previous row was copied so use that |
| 366 | row_up = &component.row[..]; |
| 367 | row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; |
| 368 | } else if i == mcu_height.saturating_sub(1) && pos == stop_offset - 1 { |
| 369 | // last IMAGE row, adjust pointer to use previous row and current row |
| 370 | row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride]; |
| 371 | row_down = &component.raw_coeff[pos * stride..(pos + 1) * stride]; |
| 372 | } else if pos > 0 && pos < stop_offset - 1 { |
| 373 | // other rows, get row up and row down relative to our current row |
| 374 | // ignore last row of each mcu |
| 375 | row_up = &component.raw_coeff[(pos - 1) * stride..pos * stride]; |
| 376 | row_down = &component.raw_coeff[(pos + 1) * stride..(pos + 2) * stride]; |
| 377 | } else if pos == stop_offset - 1 { |
| 378 | // last MCU in a row |
| 379 | // |
| 380 | // we need a row at the next MCU but we haven't decoded that MCU yet |
| 381 | // so we should save this and when we have the next MCU, |
| 382 | // do the upsampling |
| 383 | |
| 384 | // store the current row and previous row in a buffer |
| 385 | let prev_row = &component.raw_coeff[(pos - 1) * stride..pos * stride]; |
| 386 | |
| 387 | component.row_up.copy_from_slice(prev_row); |
| 388 | component.row.copy_from_slice(curr_row); |
| 389 | upsample = false; |
| 390 | } else { |
| 391 | unreachable!("Uh oh!" ); |
| 392 | } |
| 393 | if upsample { |
| 394 | dest = |
| 395 | &mut component.upsample_dest[dest_start..dest_start + stride_bytes_written]; |
| 396 | dest_start += stride_bytes_written; |
| 397 | } |
| 398 | |
| 399 | if upsample { |
| 400 | // upsample |
| 401 | (component.up_sampler)( |
| 402 | curr_row, |
| 403 | row_up, |
| 404 | row_down, |
| 405 | upsampler_scratch_space, |
| 406 | dest |
| 407 | ); |
| 408 | } |
| 409 | } |
| 410 | } |
| 411 | SampleRatios::H => { |
| 412 | assert_eq!(component.raw_coeff.len() * 2, component.upsample_dest.len()); |
| 413 | |
| 414 | let raw_coeff = &component.raw_coeff; |
| 415 | let dest_coeff = &mut component.upsample_dest; |
| 416 | |
| 417 | if has_vertical_sample { |
| 418 | /* |
| 419 | There have been images that have the following configurations. |
| 420 | |
| 421 | Component ID:Y HS:2 VS:2 QT:0 |
| 422 | Component ID:Cb HS:1 VS:1 QT:1 |
| 423 | Component ID:Cr HS:1 VS:2 QT:1 |
| 424 | |
| 425 | This brings out a nasty case of misaligned sampling factors. Cr will need to save a row because |
| 426 | of the way we process boundaries but Cb won't since Cr is horizontally sampled while Cb is |
| 427 | HV sampled with respect to the image sampling factors. |
| 428 | |
| 429 | So during decoding of one MCU, we could only do 7 and not 8 rows, but the SampleRatio::H never had to |
| 430 | save a single line, since it doesn't suffer from boundary issues. |
| 431 | |
| 432 | Now this takes care of that, saving the last MCU row in case it will be needed. |
| 433 | We save the previous row before up-sampling this row because the boundary issue is in |
| 434 | the last MCU row of the previous MCU. |
| 435 | |
| 436 | PS(cae): I can't add the image to the repo as it is nsfw, but can send if required |
| 437 | */ |
| 438 | let length = component.first_row_upsample_dest.len(); |
| 439 | component |
| 440 | .first_row_upsample_dest |
| 441 | .copy_from_slice(&dest_coeff.rchunks_exact(length).next().unwrap()); |
| 442 | } |
| 443 | // up-sample each row |
| 444 | for (single_row, output_stride) in raw_coeff |
| 445 | .chunks_exact(component.width_stride) |
| 446 | .zip(dest_coeff.chunks_exact_mut(component.width_stride * 2)) |
| 447 | { |
| 448 | // upsample using the fn pointer, should only be H, so no need for |
| 449 | // row up and row down |
| 450 | (component.up_sampler)(single_row, &[], &[], &mut [], output_stride); |
| 451 | } |
| 452 | } |
| 453 | SampleRatios::None => {} |
| 454 | }; |
| 455 | } |
| 456 | |