1 | /* |
2 | * Copyright (c) 2023. |
3 | * |
4 | * This software is free software; |
5 | * |
6 | * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license |
7 | */ |
8 | |
9 | use alloc::{format, vec}; |
10 | use core::cmp::min; |
11 | |
12 | use zune_core::bytestream::ZReaderTrait; |
13 | use zune_core::colorspace::ColorSpace; |
14 | use zune_core::colorspace::ColorSpace::Luma; |
15 | use zune_core::log::{error, trace, warn}; |
16 | |
17 | use crate::bitstream::BitStream; |
18 | use crate::components::SampleRatios; |
19 | use crate::decoder::MAX_COMPONENTS; |
20 | use crate::errors::DecodeErrors; |
21 | use crate::marker::Marker; |
22 | use crate::misc::{calculate_padded_width, setup_component_params}; |
23 | use crate::worker::{color_convert, upsample}; |
24 | use crate::JpegDecoder; |
25 | |
26 | /// The size of a DC block for a MCU. |
27 | |
28 | pub const DCT_BLOCK: usize = 64; |
29 | |
30 | impl<T: ZReaderTrait> JpegDecoder<T> { |
31 | /// Check for existence of DC and AC Huffman Tables |
32 | pub(crate) fn check_tables(&self) -> Result<(), DecodeErrors> { |
33 | // check that dc and AC tables exist outside the hot path |
34 | for component in &self.components { |
35 | let _ = &self |
36 | .dc_huffman_tables |
37 | .get(component.dc_huff_table) |
38 | .as_ref() |
39 | .ok_or_else(|| { |
40 | DecodeErrors::HuffmanDecode(format!( |
41 | "No Huffman DC table for component {:?} " , |
42 | component.component_id |
43 | )) |
44 | })? |
45 | .as_ref() |
46 | .ok_or_else(|| { |
47 | DecodeErrors::HuffmanDecode(format!( |
48 | "No DC table for component {:?}" , |
49 | component.component_id |
50 | )) |
51 | })?; |
52 | |
53 | let _ = &self |
54 | .ac_huffman_tables |
55 | .get(component.ac_huff_table) |
56 | .as_ref() |
57 | .ok_or_else(|| { |
58 | DecodeErrors::HuffmanDecode(format!( |
59 | "No Huffman AC table for component {:?} " , |
60 | component.component_id |
61 | )) |
62 | })? |
63 | .as_ref() |
64 | .ok_or_else(|| { |
65 | DecodeErrors::HuffmanDecode(format!( |
66 | "No AC table for component {:?}" , |
67 | component.component_id |
68 | )) |
69 | })?; |
70 | } |
71 | Ok(()) |
72 | } |
73 | |
74 | /// Decode MCUs and carry out post processing. |
75 | /// |
76 | /// This is the main decoder loop for the library, the hot path. |
77 | /// |
78 | /// Because of this, we pull in some very crazy optimization tricks hence readability is a pinch |
79 | /// here. |
80 | #[allow ( |
81 | clippy::similar_names, |
82 | clippy::too_many_lines, |
83 | clippy::cast_possible_truncation |
84 | )] |
85 | #[inline (never)] |
86 | pub(crate) fn decode_mcu_ycbcr_baseline( |
87 | &mut self, pixels: &mut [u8] |
88 | ) -> Result<(), DecodeErrors> { |
89 | setup_component_params(self)?; |
90 | |
91 | // check dc and AC tables |
92 | self.check_tables()?; |
93 | |
94 | let (mut mcu_width, mut mcu_height); |
95 | |
96 | if self.is_interleaved { |
97 | // set upsampling functions |
98 | self.set_upsampling()?; |
99 | |
100 | mcu_width = self.mcu_x; |
101 | mcu_height = self.mcu_y; |
102 | } else { |
103 | // For non-interleaved images( (1*1) subsampling) |
104 | // number of MCU's are the widths (+7 to account for paddings) divided bu 8. |
105 | mcu_width = ((self.info.width + 7) / 8) as usize; |
106 | mcu_height = ((self.info.height + 7) / 8) as usize; |
107 | } |
108 | if self.is_interleaved |
109 | && self.input_colorspace.num_components() > 1 |
110 | && self.options.jpeg_get_out_colorspace().num_components() == 1 |
111 | && (self.sub_sample_ratio == SampleRatios::V |
112 | || self.sub_sample_ratio == SampleRatios::HV) |
113 | { |
114 | // For a specific set of images, e.g interleaved, |
115 | // when converting from YcbCr to grayscale, we need to |
116 | // take into account mcu height since the MCU decoding needs to take |
117 | // it into account for padding purposes and the post processor |
118 | // parses two rows per mcu width. |
119 | // |
120 | // set coeff to be 2 to ensure that we increment two rows |
121 | // for every mcu processed also |
122 | mcu_height *= self.v_max; |
123 | mcu_height /= self.h_max; |
124 | self.coeff = 2; |
125 | } |
126 | |
127 | if self.input_colorspace.num_components() > self.components.len() { |
128 | let msg = format!( |
129 | " Expected {} number of components but found {}" , |
130 | self.input_colorspace.num_components(), |
131 | self.components.len() |
132 | ); |
133 | return Err(DecodeErrors::Format(msg)); |
134 | } |
135 | |
136 | if self.input_colorspace == ColorSpace::Luma && self.is_interleaved { |
137 | warn!("Grayscale image with down-sampled component, resetting component details" ); |
138 | |
139 | self.reset_params(); |
140 | |
141 | mcu_width = ((self.info.width + 7) / 8) as usize; |
142 | mcu_height = ((self.info.height + 7) / 8) as usize; |
143 | } |
144 | let width = usize::from(self.info.width); |
145 | |
146 | let padded_width = calculate_padded_width(width, self.sub_sample_ratio); |
147 | |
148 | let mut stream = BitStream::new(); |
149 | let mut tmp = [0_i32; DCT_BLOCK]; |
150 | |
151 | let comp_len = self.components.len(); |
152 | |
153 | for (pos, comp) in self.components.iter_mut().enumerate() { |
154 | // Allocate only needed components. |
155 | // |
156 | // For special colorspaces i.e YCCK and CMYK, just allocate all of the needed |
157 | // components. |
158 | if min( |
159 | self.options.jpeg_get_out_colorspace().num_components() - 1, |
160 | pos |
161 | ) == pos |
162 | || comp_len == 4 |
163 | // Special colorspace |
164 | { |
165 | // allocate enough space to hold a whole MCU width |
166 | // this means we should take into account sampling ratios |
167 | // `*8` is because each MCU spans 8 widths. |
168 | let len = comp.width_stride * comp.vertical_sample * 8; |
169 | |
170 | comp.needed = true; |
171 | comp.raw_coeff = vec![0; len]; |
172 | } else { |
173 | comp.needed = false; |
174 | } |
175 | } |
176 | |
177 | let mut pixels_written = 0; |
178 | |
179 | let is_hv = usize::from(self.is_interleaved); |
180 | let upsampler_scratch_size = is_hv * self.components[0].width_stride; |
181 | let mut upsampler_scratch_space = vec![0; upsampler_scratch_size]; |
182 | |
183 | for i in 0..mcu_height { |
184 | // Report if we have no more bytes |
185 | // This may generate false negatives since we over-read bytes |
186 | // hence that why 37 is chosen(we assume if we over-read more than 37 bytes, we have a problem) |
187 | if stream.overread_by > 37 |
188 | // favourite number :) |
189 | { |
190 | if self.options.get_strict_mode() { |
191 | return Err(DecodeErrors::FormatStatic("Premature end of buffer" )); |
192 | }; |
193 | |
194 | error!("Premature end of buffer" ); |
195 | break; |
196 | } |
197 | // decode a whole MCU width, |
198 | // this takes into account interleaved components. |
199 | self.decode_mcu_width(mcu_width, &mut tmp, &mut stream)?; |
200 | // process that width up until it's impossible |
201 | self.post_process( |
202 | pixels, |
203 | i, |
204 | mcu_height, |
205 | width, |
206 | padded_width, |
207 | &mut pixels_written, |
208 | &mut upsampler_scratch_space |
209 | )?; |
210 | } |
211 | // it may happen that some images don't have the whole buffer |
212 | // so we can't panic in case of that |
213 | // assert_eq!(pixels_written, pixels.len()); |
214 | |
215 | trace!("Finished decoding image" ); |
216 | |
217 | Ok(()) |
218 | } |
219 | fn decode_mcu_width( |
220 | &mut self, mcu_width: usize, tmp: &mut [i32; 64], stream: &mut BitStream |
221 | ) -> Result<(), DecodeErrors> { |
222 | for j in 0..mcu_width { |
223 | // iterate over components |
224 | for component in &mut self.components { |
225 | let dc_table = self.dc_huffman_tables[component.dc_huff_table % MAX_COMPONENTS] |
226 | .as_ref() |
227 | .unwrap(); |
228 | |
229 | let ac_table = self.ac_huffman_tables[component.ac_huff_table % MAX_COMPONENTS] |
230 | .as_ref() |
231 | .unwrap(); |
232 | |
233 | let qt_table = &component.quantization_table; |
234 | let channel = &mut component.raw_coeff; |
235 | |
236 | // If image is interleaved iterate over scan components, |
237 | // otherwise if it-s non-interleaved, these routines iterate in |
238 | // trivial scanline order(Y,Cb,Cr) |
239 | for v_samp in 0..component.vertical_sample { |
240 | for h_samp in 0..component.horizontal_sample { |
241 | // Fill the array with zeroes, decode_mcu_block expects |
242 | // a zero based array. |
243 | tmp.fill(0); |
244 | |
245 | stream.decode_mcu_block( |
246 | &mut self.stream, |
247 | dc_table, |
248 | ac_table, |
249 | qt_table, |
250 | tmp, |
251 | &mut component.dc_pred |
252 | )?; |
253 | |
254 | if component.needed { |
255 | let idct_position = { |
256 | // derived from stb and rewritten for my tastes |
257 | let c2 = v_samp * 8; |
258 | let c3 = ((j * component.horizontal_sample) + h_samp) * 8; |
259 | |
260 | component.width_stride * c2 + c3 |
261 | }; |
262 | |
263 | let idct_pos = channel.get_mut(idct_position..).unwrap(); |
264 | // call idct. |
265 | (self.idct_func)(tmp, idct_pos, component.width_stride); |
266 | } |
267 | } |
268 | } |
269 | } |
270 | self.todo = self.todo.saturating_sub(1); |
271 | // After all interleaved components, that's an MCU |
272 | // handle stream markers |
273 | // |
274 | // In some corrupt images, it may occur that header markers occur in the stream. |
275 | // The spec EXPLICITLY FORBIDS this, specifically, in |
276 | // routine F.2.2.5 it says |
277 | // `The only valid marker which may occur within the Huffman coded data is the RSTm marker.` |
278 | // |
279 | // But libjpeg-turbo allows it because of some weird reason. so I'll also |
280 | // allow it because of some weird reason. |
281 | if let Some(m) = stream.marker { |
282 | if m == Marker::EOI { |
283 | // acknowledge and ignore EOI marker. |
284 | stream.marker.take(); |
285 | trace!("Found EOI marker" ); |
286 | } else if let Marker::RST(_) = m { |
287 | if self.todo == 0 { |
288 | self.handle_rst(stream)?; |
289 | } |
290 | } else { |
291 | if self.options.get_strict_mode() { |
292 | return Err(DecodeErrors::Format(format!( |
293 | "Marker {m:?} found where not expected" |
294 | ))); |
295 | } |
296 | error!( |
297 | "Marker `{:?}` Found within Huffman Stream, possibly corrupt jpeg" , |
298 | m |
299 | ); |
300 | |
301 | self.parse_marker_inner(m)?; |
302 | } |
303 | } |
304 | } |
305 | Ok(()) |
306 | } |
307 | // handle RST markers. |
308 | // No-op if not using restarts |
309 | // this routine is shared with mcu_prog |
310 | #[cold ] |
311 | pub(crate) fn handle_rst(&mut self, stream: &mut BitStream) -> Result<(), DecodeErrors> { |
312 | self.todo = self.restart_interval; |
313 | |
314 | if let Some(marker) = stream.marker { |
315 | // Found a marker |
316 | // Read stream and see what marker is stored there |
317 | match marker { |
318 | Marker::RST(_) => { |
319 | // reset stream |
320 | stream.reset(); |
321 | // Initialize dc predictions to zero for all components |
322 | self.components.iter_mut().for_each(|x| x.dc_pred = 0); |
323 | // Start iterating again. from position. |
324 | } |
325 | Marker::EOI => { |
326 | // silent pass |
327 | } |
328 | _ => { |
329 | return Err(DecodeErrors::MCUError(format!( |
330 | "Marker {marker:?} found in bitstream, possibly corrupt jpeg" |
331 | ))); |
332 | } |
333 | } |
334 | } |
335 | Ok(()) |
336 | } |
337 | #[allow (clippy::too_many_lines, clippy::too_many_arguments)] |
338 | pub(crate) fn post_process( |
339 | &mut self, pixels: &mut [u8], i: usize, mcu_height: usize, width: usize, |
340 | padded_width: usize, pixels_written: &mut usize, upsampler_scratch_space: &mut [i16] |
341 | ) -> Result<(), DecodeErrors> { |
342 | let out_colorspace_components = self.options.jpeg_get_out_colorspace().num_components(); |
343 | |
344 | let mut px = *pixels_written; |
345 | // indicates whether image is vertically up-sampled |
346 | let is_vertically_sampled = self |
347 | .components |
348 | .iter() |
349 | .any(|c| c.sample_ratio == SampleRatios::HV || c.sample_ratio == SampleRatios::V); |
350 | |
351 | let mut comp_len = self.components.len(); |
352 | |
353 | // If we are moving from YCbCr-> Luma, we do not allocate storage for other components, so we |
354 | // will panic when we are trying to read samples, so for that case, |
355 | // hardcode it so that we don't panic when doing |
356 | // *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width] |
357 | if out_colorspace_components < comp_len && self.options.jpeg_get_out_colorspace() == Luma { |
358 | comp_len = out_colorspace_components; |
359 | } |
360 | let mut color_conv_function = |
361 | |num_iters: usize, samples: [&[i16]; 4]| -> Result<(), DecodeErrors> { |
362 | for (pos, output) in pixels[px..] |
363 | .chunks_exact_mut(width * out_colorspace_components) |
364 | .take(num_iters) |
365 | .enumerate() |
366 | { |
367 | let mut raw_samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
368 | |
369 | // iterate over each line, since color-convert needs only |
370 | // one line |
371 | for (j, samp) in raw_samples.iter_mut().enumerate().take(comp_len) { |
372 | *samp = &samples[j][pos * padded_width..(pos + 1) * padded_width] |
373 | } |
374 | color_convert( |
375 | &raw_samples, |
376 | self.color_convert_16, |
377 | self.input_colorspace, |
378 | self.options.jpeg_get_out_colorspace(), |
379 | output, |
380 | width, |
381 | padded_width |
382 | )?; |
383 | px += width * out_colorspace_components; |
384 | } |
385 | Ok(()) |
386 | }; |
387 | |
388 | let comps = &mut self.components[..]; |
389 | |
390 | if self.is_interleaved && self.options.jpeg_get_out_colorspace() != ColorSpace::Luma { |
391 | { |
392 | // duplicated so that we can check that samples match |
393 | // Fixes bug https://github.com/etemesi254/zune-image/issues/151 |
394 | let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
395 | |
396 | for (samp, component) in samples.iter_mut().zip(comps.iter()) { |
397 | *samp = if component.sample_ratio == SampleRatios::None { |
398 | &component.raw_coeff |
399 | } else { |
400 | &component.upsample_dest |
401 | }; |
402 | } |
403 | } |
404 | for comp in comps.iter_mut() { |
405 | upsample( |
406 | comp, |
407 | mcu_height, |
408 | i, |
409 | upsampler_scratch_space, |
410 | is_vertically_sampled |
411 | ); |
412 | } |
413 | |
414 | if is_vertically_sampled { |
415 | if i > 0 { |
416 | // write the last line, it wasn't up-sampled as we didn't have row_down |
417 | // yet |
418 | let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
419 | |
420 | for (samp, component) in samples.iter_mut().zip(comps.iter()) { |
421 | *samp = &component.first_row_upsample_dest; |
422 | } |
423 | |
424 | // ensure length matches for all samples |
425 | let first_len = samples[0].len(); |
426 | for samp in samples.iter().take(comp_len) { |
427 | assert_eq!(first_len, samp.len()); |
428 | } |
429 | let num_iters = self.coeff * self.v_max; |
430 | |
431 | color_conv_function(num_iters, samples)?; |
432 | } |
433 | |
434 | // After up-sampling the last row, save any row that can be used for |
435 | // a later up-sampling, |
436 | // |
437 | // E.g the Y sample is not sampled but we haven't finished upsampling the last row of |
438 | // the previous mcu, since we don't have the down row, so save it |
439 | for component in comps.iter_mut() { |
440 | if component.sample_ratio != SampleRatios::H { |
441 | // We don't care about H sampling factors, since it's copied in the workers function |
442 | |
443 | // copy last row to be used for the next color conversion |
444 | let size = component.vertical_sample |
445 | * component.width_stride |
446 | * component.sample_ratio.sample(); |
447 | |
448 | let last_bytes = |
449 | component.raw_coeff.rchunks_exact_mut(size).next().unwrap(); |
450 | |
451 | component |
452 | .first_row_upsample_dest |
453 | .copy_from_slice(last_bytes); |
454 | } |
455 | } |
456 | } |
457 | |
458 | let mut samples: [&[i16]; 4] = [&[], &[], &[], &[]]; |
459 | |
460 | for (samp, component) in samples.iter_mut().zip(comps.iter()) { |
461 | *samp = if component.sample_ratio == SampleRatios::None { |
462 | &component.raw_coeff |
463 | } else { |
464 | &component.upsample_dest |
465 | }; |
466 | } |
467 | |
468 | // we either do 7 or 8 MCU's depending on the state, this only applies to |
469 | // vertically sampled images |
470 | // |
471 | // for rows up until the last MCU, we do not upsample the last stride of the MCU |
472 | // which means that the number of iterations should take that into account is one less the |
473 | // up-sampled size |
474 | // |
475 | // For the last MCU, we upsample the last stride, meaning that if we hit the last MCU, we |
476 | // should sample full raw coeffs |
477 | let is_last_considered = is_vertically_sampled && (i != mcu_height.saturating_sub(1)); |
478 | |
479 | let num_iters = (8 - usize::from(is_last_considered)) * self.coeff * self.v_max; |
480 | |
481 | color_conv_function(num_iters, samples)?; |
482 | } else { |
483 | let mut channels_ref: [&[i16]; MAX_COMPONENTS] = [&[]; MAX_COMPONENTS]; |
484 | |
485 | self.components |
486 | .iter() |
487 | .enumerate() |
488 | .for_each(|(pos, x)| channels_ref[pos] = &x.raw_coeff); |
489 | |
490 | color_conv_function(8 * self.coeff, channels_ref)?; |
491 | } |
492 | |
493 | *pixels_written = px; |
494 | Ok(()) |
495 | } |
496 | } |