| 1 | /* |
| 2 | * Copyright (c) 2023. |
| 3 | * |
| 4 | * This software is free software; |
| 5 | * |
| 6 | * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license |
| 7 | */ |
| 8 | |
| 9 | use alloc::{format, vec}; |
| 10 | use core::cmp::min; |
| 11 | |
| 12 | use zune_core::bytestream::ZReaderTrait; |
| 13 | use zune_core::colorspace::ColorSpace; |
| 14 | use zune_core::colorspace::ColorSpace::Luma; |
| 15 | use zune_core::log::{error, trace, warn}; |
| 16 | |
| 17 | use crate::bitstream::BitStream; |
| 18 | use crate::components::SampleRatios; |
| 19 | use crate::decoder::MAX_COMPONENTS; |
| 20 | use crate::errors::DecodeErrors; |
| 21 | use crate::marker::Marker; |
| 22 | use crate::misc::{calculate_padded_width, setup_component_params}; |
| 23 | use crate::worker::{color_convert, upsample}; |
| 24 | use crate::JpegDecoder; |
| 25 | |
| 26 | /// The size of a DC block for a MCU. |
| 27 | |
| 28 | pub const DCT_BLOCK: usize = 64; |
| 29 | |
| 30 | impl<T: ZReaderTrait> JpegDecoder<T> { |
| 31 | /// Check for existence of DC and AC Huffman Tables |
| 32 | pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> { |
| 33 | // check that dc and AC tables exist outside the hot path |
| 34 | for component in &self.components { |
| 35 | let _ = &self |
| 36 | .dc_huffman_tables |
| 37 | .get(component.dc_huff_table) |
| 38 | .as_ref() |
| 39 | .ok_or_else(|| { |
| 40 | DecodeErrors::HuffmanDecode(format!( |
| 41 | "No Huffman DC table for component {:?} " , |
| 42 | component.component_id |
| 43 | )) |
| 44 | })? |
| 45 | .as_ref() |
| 46 | .ok_or_else(|| { |
| 47 | DecodeErrors::HuffmanDecode(format!( |
| 48 | "No DC table for component {:?}" , |
| 49 | component.component_id |
| 50 | )) |
| 51 | })?; |
| 52 | |
| 53 | let _ = &self |
| 54 | .ac_huffman_tables |
| 55 | .get(component.ac_huff_table) |
| 56 | .as_ref() |
| 57 | .ok_or_else(|| { |
| 58 | DecodeErrors::HuffmanDecode(format!( |
| 59 | "No Huffman AC table for component {:?} " , |
| 60 | component.component_id |
| 61 | )) |
| 62 | })? |
| 63 | .as_ref() |
| 64 | .ok_or_else(|| { |
| 65 | DecodeErrors::HuffmanDecode(format!( |
| 66 | "No AC table for component {:?}" , |
| 67 | component.component_id |
| 68 | )) |
| 69 | })?; |
| 70 | } |
| 71 | Ok(()) |
| 72 | } |
| 73 | |
| 74 | /// Decode MCUs and carry out post processing. |
| 75 | /// |
| 76 | /// This is the main decoder loop for the library, the hot path. |
| 77 | /// |
| 78 | /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch |
| 79 | /// here. |
| 80 | #[allow ( |
| 81 | clippy::similar_names, |
| 82 | clippy::too_many_lines, |
| 83 | clippy::cast_possible_truncation |
| 84 | )] |
| 85 | #[inline (never)] |
| 86 | pub(crate) fn decode_mcu_ycbcr_baseline( |
| 87 | &mut self, pixels: &mut [u8] |
| 88 | ) -> Result<(), DecodeErrors> { |
| 89 | setup_component_params(self)?; |
| 90 | |
| 91 | // check dc and AC tables |
| 92 | self.check_tables()?; |
| 93 | |
| 94 | let (mut mcu_width, mut mcu_height); |
| 95 | |
| 96 | if self.is_interleaved { |
| 97 | // set upsampling functions |
| 98 | self.set_upsampling()?; |
| 99 | |
| 100 | mcu_width = self.mcu_x; |
| 101 | mcu_height = self.mcu_y; |
| 102 | } else { |
| 103 | // For non-interleaved images( (1*1) subsampling) |
| 104 | // number of MCU's are the widths (+7 to account for paddings) divided bu 8. |
| 105 | mcu_width = ((self.info.width + 7) / 8) as usize; |
| 106 | mcu_height = ((self.info.height + 7) / 8) as usize; |
| 107 | } |
| 108 | if self.is_interleaved |
| 109 | && self.input_colorspace.num_components() > 1 |
| 110 | && self.options.jpeg_get_out_colorspace().num_components() == 1 |
| 111 | && (self.sub_sample_ratio == SampleRatios::V |
| 112 | || self.sub_sample_ratio == SampleRatios::HV) |
| 113 | { |
| 114 | // For a specific set of images, e.g interleaved, |
| 115 | // when converting from YcbCr to grayscale, we need to |
| 116 | // take into account mcu height since the MCU decoding needs to take |
| 117 | // it into account for padding purposes and the post processor |
| 118 | // parses two rows per mcu width. |
| 119 | // |
| 120 | // set coeff to be 2 to ensure that we increment two rows |
| 121 | // for every mcu processed also |
| 122 | mcu_height *= self.v_max; |
| 123 | mcu_height /= self.h_max; |
| 124 | self.coeff = 2; |
| 125 | } |
| 126 | |
| 127 | if self.input_colorspace.num_components() > self.components.len() { |
| 128 | let msg = format!( |
| 129 | " Expected {} number of components but found {}" , |
| 130 | self.input_colorspace.num_components(), |
| 131 | self.components.len() |
| 132 | ); |
| 133 | return Err(DecodeErrors::Format(msg)); |
| 134 | } |
| 135 | |
| 136 | if self.input_colorspace == ColorSpace::Luma && self.is_interleaved { |
| 137 | warn!("Grayscale image with down-sampled component, resetting component details" ); |
| 138 | |
| 139 | self.reset_params(); |
| 140 | |
| 141 | mcu_width = ((self.info.width + 7) / 8) as usize; |
| 142 | mcu_height = ((self.info.height + 7) / 8) as usize; |
| 143 | } |
| 144 | let width = usize::from(self.info.width); |
| 145 | |
| 146 | let padded_width = calculate_padded_width(width, self.sub_sample_ratio); |
| 147 | |
| 148 | let mut stream = BitStream::new(); |
| 149 | let mut tmp = [0_i32; DCT_BLOCK]; |
| 150 | |
| 151 | let comp_len = self.components.len(); |
| 152 | |
| 153 | for (pos, comp) in self.components.iter_mut().enumerate() { |
| 154 | // Allocate only needed components. |
| 155 | // |
| 156 | // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed |
| 157 | // components. |
| 158 | if min( |
| 159 | self.options.jpeg_get_out_colorspace().num_components() - 1, |
| 160 | pos |
| 161 | ) == pos |
| 162 | || comp_len == 4 |
| 163 | // Special colorspace |
| 164 | { |
| 165 | // allocate enough space to hold a whole MCU width |
| 166 | // this means we should take into account sampling ratios |
| 167 | // `*8` is because each MCU spans 8 widths. |
| 168 | let len = comp.width_stride * comp.vertical_sample * 8; |
| 169 | |
| 170 | comp.needed = true; |
| 171 | comp.raw_coeff = vec![0; len]; |
| 172 | } else { |
| 173 | comp.needed = false; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | let mut pixels_written = 0; |
| 178 | |
| 179 | let is_hv = usize::from(self.is_interleaved); |
| 180 | let upsampler_scratch_size = is_hv * self.components[0].width_stride; |
| 181 | let mut upsampler_scratch_space = vec![0; upsampler_scratch_size]; |
| 182 | |
| 183 | for i in 0..mcu_height { |
| 184 | // Report if we have no more bytes |
| 185 | // This may generate false negatives since we over-read bytes |
| 186 | // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem) |
| 187 | if stream.overread_by > 37 |
| 188 | // favourite number :) |
| 189 | { |
| 190 | if self.options.get_strict_mode() { |
| 191 | return Err(DecodeErrors::FormatStatic("Premature end of buffer" )); |
| 192 | }; |
| 193 | |
| 194 | error!("Premature end of buffer" ); |
| 195 | break; |
| 196 | } |
| 197 | // decode a whole MCU width, |
| 198 | // this takes into account interleaved components. |
| 199 | self.decode_mcu_width(mcu_width, &mut tmp, &mut stream)?; |
| 200 | // process that width up until it's impossible |
| 201 | self.post_process( |
| 202 | pixels, |
| 203 | i, |
| 204 | mcu_height, |
| 205 | width, |
| 206 | padded_width, |
| 207 | &mut pixels_written, |
| 208 | &mut upsampler_scratch_space |
| 209 | )?; |
| 210 | } |
| 211 | // it may happen that some images don't have the whole buffer |
| 212 | // so we can't panic in case of that |
| 213 | // assert_eq!(pixels_written, pixels.len()); |
| 214 | |
| 215 | trace!("Finished decoding image" ); |
| 216 | |
| 217 | Ok(()) |
| 218 | } |
| 219 | fn decode_mcu_width( |
| 220 | &mut self, mcu_width: usize, tmp: &mut [i32; 64], stream: &mut BitStream |
| 221 | ) -> Result<(), DecodeErrors> { |
| 222 | for j in 0..mcu_width { |
| 223 | // iterate over components |
| 224 | for component in &mut self.components { |
| 225 | let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS] |
| 226 | .as_ref() |
| 227 | .unwrap(); |
| 228 | |
| 229 | let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS] |
| 230 | .as_ref() |
| 231 | .unwrap(); |
| 232 | |
| 233 | let qt_table = &component.quantization_table; |
| 234 | let channel = &mut component.raw_coeff; |
| 235 | |
| 236 | // If image is interleaved iterate over scan components, |
| 237 | // otherwise if it-s non-interleaved, these routines iterate in |
| 238 | // trivial scanline order(Y,Cb,Cr) |
| 239 | for v_samp in 0..component.vertical_sample { |
| 240 | for h_samp in 0..component.horizontal_sample { |
| 241 | // Fill the array with zeroes, decode_mcu_block expects |
| 242 | // a zero based array. |
| 243 | tmp.fill(0); |
| 244 | |
| 245 | stream.decode_mcu_block( |
| 246 | &mut self.stream, |
| 247 | dc_table, |
| 248 | ac_table, |
| 249 | qt_table, |
| 250 | tmp, |
| 251 | &mut component.dc_pred |
| 252 | )?; |
| 253 | |
| 254 | if component.needed { |
| 255 | let idct_position = { |
| 256 | // derived from stb and rewritten for my tastes |
| 257 | let c2 = v_samp * 8; |
| 258 | let c3 = ((j * component.horizontal_sample) + h_samp) * 8; |
| 259 | |
| 260 | component.width_stride * c2 + c3 |
| 261 | }; |
| 262 | |
| 263 | let idct_pos = channel.get_mut(idct_position..).unwrap(); |
| 264 | // call idct. |
| 265 | (self.idct_func)(tmp, idct_pos, component.width_stride); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | self.todo = self.todo.saturating_sub(1); |
| 271 | // After all interleaved components, that's an MCU |
| 272 | // handle stream markers |
| 273 | // |
| 274 | // In some corrupt images, it may occur that header markers occur in the stream. |
| 275 | // The spec EXPLICITLY FORBIDS this, specifically, in |
| 276 | // routine F.2.2.5 it says |
| 277 | // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.` |
| 278 | // |
| 279 | // But libjpeg-turbo allows it because of some weird reason. so I'll also |
| 280 | // allow it because of some weird reason. |
| 281 | if let Some(m) = stream.marker { |
| 282 | if m == Marker::EOI { |
| 283 | // acknowledge and ignore EOI marker. |
| 284 | stream.marker.take(); |
| 285 | trace!("Found EOI marker" ); |
| 286 | } else if let Marker::RST(_) = m { |
| 287 | if self.todo == 0 { |
| 288 | self.handle_rst(stream)?; |
| 289 | } |
| 290 | } else { |
| 291 | if self.options.get_strict_mode() { |
| 292 | return Err(DecodeErrors::Format(format!( |
| 293 | "Marker {m:?} found where not expected" |
| 294 | ))); |
| 295 | } |
| 296 | error!( |
| 297 | "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg" , |
| 298 | m |
| 299 | ); |
| 300 | |
| 301 | self.parse_marker_inner(m)?; |
| 302 | } |
| 303 | } |
| 304 | } |
| 305 | Ok(()) |
| 306 | } |
| 307 | // handle RST markers. |
| 308 | // No-op if not using restarts |
| 309 | // this routine is shared with mcu_prog |
| 310 | #[cold ] |
| 311 | pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> { |
| 312 | self.todo = self.restart_interval; |
| 313 | |
| 314 | if let Some(marker) = stream.marker { |
| 315 | // Found a marker |
| 316 | // Read stream and see what marker is stored there |
| 317 | match marker { |
| 318 | Marker::RST(_) => { |
| 319 | // reset stream |
| 320 | stream.reset(); |
| 321 | // Initialize dc predictions to zero for all components |
| 322 | self.components.iter_mut().for_each(|x| x.dc_pred = 0); |
| 323 | // Start iterating again. from position. |
| 324 | } |
| 325 | Marker::EOI => { |
| 326 | // silent pass |
| 327 | } |
| 328 | _ => { |
| 329 | return Err(DecodeErrors::MCUError(format!( |
| 330 | "Marker {marker:?} found in bitstream, possibly corrupt jpeg" |
| 331 | ))); |
| 332 | } |
| 333 | } |
| 334 | } |
| 335 | Ok(()) |
| 336 | } |
| 337 | #[allow (clippy::too_many_lines, clippy::too_many_arguments)] |
| 338 | pub(crate) fn post_process( |
| 339 | &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize, |
| 340 | padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16] |
| 341 | ) -> Result<(), DecodeErrors> { |
| 342 | let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components(); |
| 343 | |
| 344 | let mut px = *pixels_written; |
| 345 | // indicates whether image is vertically up-sampled |
| 346 | let is_vertically_sampled = self |
| 347 | .components |
| 348 | .iter() |
| 349 | .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V); |
| 350 | |
| 351 | let mut comp_len = self.components.len(); |
| 352 | |
| 353 | // If we are moving from YCbCr-> Luma, we do not allocate storage for other components, so we |
| 354 | // will panic when we are trying to read samples, so for that case, |
| 355 | // hardcode it so that we don't panic when doing |
| 356 | // *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width] |
| 357 | if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma { |
| 358 | comp_len = out_colorspace_components; |
| 359 | } |
| 360 | let mut color_conv_function = |
| 361 | |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> { |
| 362 | for (pos, output) in pixels[px..] |
| 363 | .chunks_exact_mut(width * out_colorspace_components) |
| 364 | .take(num_iters) |
| 365 | .enumerate() |
| 366 | { |
| 367 | let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
| 368 | |
| 369 | // iterate over each line, since color-convert needs only |
| 370 | // one line |
| 371 | for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) { |
| 372 | *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width] |
| 373 | } |
| 374 | color_convert( |
| 375 | &raw_samples, |
| 376 | self.color_convert_16, |
| 377 | self.input_colorspace, |
| 378 | self.options.jpeg_get_out_colorspace(), |
| 379 | output, |
| 380 | width, |
| 381 | padded_width |
| 382 | )?; |
| 383 | px += width * out_colorspace_components; |
| 384 | } |
| 385 | Ok(()) |
| 386 | }; |
| 387 | |
| 388 | let comps = &mut self.components[..]; |
| 389 | |
| 390 | if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma { |
| 391 | { |
| 392 | // duplicated so that we can check that samples match |
| 393 | // Fixes bug https://github.com/etemesi254/zune-image/issues/151 |
| 394 | let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
| 395 | |
| 396 | for (samp, component) in samples.iter_mut().zip(comps.iter()) { |
| 397 | *samp = if component.sample_ratio == SampleRatios::None { |
| 398 | &component.raw_coeff |
| 399 | } else { |
| 400 | &component.upsample_dest |
| 401 | }; |
| 402 | } |
| 403 | } |
| 404 | for comp in comps.iter_mut() { |
| 405 | upsample( |
| 406 | comp, |
| 407 | mcu_height, |
| 408 | i, |
| 409 | upsampler_scratch_space, |
| 410 | is_vertically_sampled |
| 411 | ); |
| 412 | } |
| 413 | |
| 414 | if is_vertically_sampled { |
| 415 | if i > 0 { |
| 416 | // write the last line, it wasn't up-sampled as we didn't have row_down |
| 417 | // yet |
| 418 | let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
| 419 | |
| 420 | for (samp, component) in samples.iter_mut().zip(comps.iter()) { |
| 421 | *samp = &component.first_row_upsample_dest; |
| 422 | } |
| 423 | |
| 424 | // ensure length matches for all samples |
| 425 | let first_len = samples[0].len(); |
| 426 | for samp in samples.iter().take(comp_len) { |
| 427 | assert_eq!(first_len, samp.len()); |
| 428 | } |
| 429 | let num_iters = self.coeff * self.v_max; |
| 430 | |
| 431 | color_conv_function(num_iters, samples)?; |
| 432 | } |
| 433 | |
| 434 | // After up-sampling the last row, save any row that can be used for |
| 435 | // a later up-sampling, |
| 436 | // |
| 437 | // E.g the Y sample is not sampled but we haven't finished upsampling the last row of |
| 438 | // the previous mcu, since we don't have the down row, so save it |
| 439 | for component in comps.iter_mut() { |
| 440 | if component.sample_ratio != SampleRatios::H { |
| 441 | // We don't care about H sampling factors, since it's copied in the workers function |
| 442 | |
| 443 | // copy last row to be used for the next color conversion |
| 444 | let size = component.vertical_sample |
| 445 | * component.width_stride |
| 446 | * component.sample_ratio.sample(); |
| 447 | |
| 448 | let last_bytes = |
| 449 | component.raw_coeff.rchunks_exact_mut(size).next().unwrap(); |
| 450 | |
| 451 | component |
| 452 | .first_row_upsample_dest |
| 453 | .copy_from_slice(last_bytes); |
| 454 | } |
| 455 | } |
| 456 | } |
| 457 | |
| 458 | let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
| 459 | |
| 460 | for (samp, component) in samples.iter_mut().zip(comps.iter()) { |
| 461 | *samp = if component.sample_ratio == SampleRatios::None { |
| 462 | &component.raw_coeff |
| 463 | } else { |
| 464 | &component.upsample_dest |
| 465 | }; |
| 466 | } |
| 467 | |
| 468 | // we either do 7 or 8 MCU's depending on the state, this only applies to |
| 469 | // vertically sampled images |
| 470 | // |
| 471 | // for rows up until the last MCU, we do not upsample the last stride of the MCU |
| 472 | // which means that the number of iterations should take that into account is one less the |
| 473 | // up-sampled size |
| 474 | // |
| 475 | // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we |
| 476 | // should sample full raw coeffs |
| 477 | let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1)); |
| 478 | |
| 479 | let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max; |
| 480 | |
| 481 | color_conv_function(num_iters, samples)?; |
| 482 | } else { |
| 483 | let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS]; |
| 484 | |
| 485 | self.components |
| 486 | .iter() |
| 487 | .enumerate() |
| 488 | .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff); |
| 489 | |
| 490 | color_conv_function(8 * self.coeff, channels_ref)?; |
| 491 | } |
| 492 | |
| 493 | *pixels_written = px; |
| 494 | Ok(()) |
| 495 | } |
| 496 | } |