1 | /* |
2 | * Copyright (c) 2023. |
3 | * |
4 | * This software is free software; |
5 | * |
6 | * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license |
7 | */ |
8 | |
9 | //! Decode Decoder markers/segments |
10 | //! |
11 | //! This file deals with decoding header information in a jpeg file |
12 | //! |
13 | use alloc::format; |
14 | use alloc::string::ToString; |
15 | use alloc::vec::Vec; |
16 | |
17 | use zune_core::bytestream::ZReaderTrait; |
18 | use zune_core::colorspace::ColorSpace; |
19 | use zune_core::log::{debug, error, trace, warn}; |
20 | |
21 | use crate::components::Components; |
22 | use crate::decoder::{ICCChunk, JpegDecoder, MAX_COMPONENTS}; |
23 | use crate::errors::DecodeErrors; |
24 | use crate::huffman::HuffmanTable; |
25 | use crate::misc::{SOFMarkers, UN_ZIGZAG}; |
26 | |
27 | ///**B.2.4.2 Huffman table-specification syntax** |
28 | #[allow (clippy::similar_names, clippy::cast_sign_loss)] |
29 | pub(crate) fn parse_huffman<T: ZReaderTrait>( |
30 | decoder: &mut JpegDecoder<T> |
31 | ) -> Result<(), DecodeErrors> |
32 | where |
33 | { |
34 | // Read the length of the Huffman table |
35 | let mut dht_length = i32::from(decoder.stream.get_u16_be_err()?.checked_sub(2).ok_or( |
36 | DecodeErrors::FormatStatic("Invalid Huffman length in image" ) |
37 | )?); |
38 | |
39 | while dht_length > 16 { |
40 | // HT information |
41 | let ht_info = decoder.stream.get_u8_err()?; |
42 | // third bit indicates whether the huffman encoding is DC or AC type |
43 | let dc_or_ac = (ht_info >> 4) & 0xF; |
44 | // Indicate the position of this table, should be less than 4; |
45 | let index = (ht_info & 0xF) as usize; |
46 | // read the number of symbols |
47 | let mut num_symbols: [u8; 17] = [0; 17]; |
48 | |
49 | if index >= MAX_COMPONENTS { |
50 | return Err(DecodeErrors::HuffmanDecode(format!( |
51 | "Invalid DHT index {index}, expected between 0 and 3" |
52 | ))); |
53 | } |
54 | |
55 | if dc_or_ac > 1 { |
56 | return Err(DecodeErrors::HuffmanDecode(format!( |
57 | "Invalid DHT position {dc_or_ac}, should be 0 or 1" |
58 | ))); |
59 | } |
60 | |
61 | decoder |
62 | .stream |
63 | .read_exact(&mut num_symbols[1..17]) |
64 | .map_err(|_| DecodeErrors::ExhaustedData)?; |
65 | |
66 | dht_length -= 1 + 16; |
67 | |
68 | let symbols_sum: i32 = num_symbols.iter().map(|f| i32::from(*f)).sum(); |
69 | |
70 | // The sum of the number of symbols cannot be greater than 256; |
71 | if symbols_sum > 256 { |
72 | return Err(DecodeErrors::FormatStatic( |
73 | "Encountered Huffman table with excessive length in DHT" |
74 | )); |
75 | } |
76 | if symbols_sum > dht_length { |
77 | return Err(DecodeErrors::HuffmanDecode(format!( |
78 | "Excessive Huffman table of length {symbols_sum} found when header length is {dht_length}" |
79 | ))); |
80 | } |
81 | dht_length -= symbols_sum; |
82 | // A table containing symbols in increasing code length |
83 | let mut symbols = [0; 256]; |
84 | |
85 | decoder |
86 | .stream |
87 | .read_exact(&mut symbols[0..(symbols_sum as usize)]) |
88 | .map_err(|x| { |
89 | DecodeErrors::Format(format!("Could not read symbols into the buffer \n{x}" )) |
90 | })?; |
91 | // store |
92 | match dc_or_ac { |
93 | 0 => { |
94 | decoder.dc_huffman_tables[index] = Some(HuffmanTable::new( |
95 | &num_symbols, |
96 | symbols, |
97 | true, |
98 | decoder.is_progressive |
99 | )?); |
100 | } |
101 | _ => { |
102 | decoder.ac_huffman_tables[index] = Some(HuffmanTable::new( |
103 | &num_symbols, |
104 | symbols, |
105 | false, |
106 | decoder.is_progressive |
107 | )?); |
108 | } |
109 | } |
110 | } |
111 | |
112 | if dht_length > 0 { |
113 | return Err(DecodeErrors::FormatStatic("Bogus Huffman table definition" )); |
114 | } |
115 | |
116 | Ok(()) |
117 | } |
118 | |
119 | ///**B.2.4.1 Quantization table-specification syntax** |
120 | #[allow (clippy::cast_possible_truncation, clippy::needless_range_loop)] |
121 | pub(crate) fn parse_dqt<T: ZReaderTrait>(img: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> { |
122 | // read length |
123 | let mut qt_length = |
124 | img.stream |
125 | .get_u16_be_err()? |
126 | .checked_sub(2) |
127 | .ok_or(DecodeErrors::FormatStatic( |
128 | "Invalid DQT length. Length should be greater than 2" |
129 | ))?; |
130 | // A single DQT header may have multiple QT's |
131 | while qt_length > 0 { |
132 | let qt_info = img.stream.get_u8_err()?; |
133 | // 0 = 8 bit otherwise 16 bit dqt |
134 | let precision = (qt_info >> 4) as usize; |
135 | // last 4 bits give us position |
136 | let table_position = (qt_info & 0x0f) as usize; |
137 | let precision_value = 64 * (precision + 1); |
138 | |
139 | if (precision_value + 1) as u16 > qt_length { |
140 | return Err(DecodeErrors::DqtError(format!("Invalid QT table bytes left : {}. Too small to construct a valid qt table which should be {} long" , qt_length, precision_value + 1))); |
141 | } |
142 | |
143 | let dct_table = match precision { |
144 | 0 => { |
145 | let mut qt_values = [0; 64]; |
146 | |
147 | img.stream.read_exact(&mut qt_values).map_err(|x| { |
148 | DecodeErrors::Format(format!("Could not read symbols into the buffer \n{x}" )) |
149 | })?; |
150 | qt_length -= (precision_value as u16) + 1 /*QT BIT*/; |
151 | // carry out un zig-zag here |
152 | un_zig_zag(&qt_values) |
153 | } |
154 | 1 => { |
155 | // 16 bit quantization tables |
156 | let mut qt_values = [0_u16; 64]; |
157 | |
158 | for i in 0..64 { |
159 | qt_values[i] = img.stream.get_u16_be_err()?; |
160 | } |
161 | qt_length -= (precision_value as u16) + 1; |
162 | |
163 | un_zig_zag(&qt_values) |
164 | } |
165 | _ => { |
166 | return Err(DecodeErrors::DqtError(format!( |
167 | "Expected QT precision value of either 0 or 1, found {precision:?}" |
168 | ))); |
169 | } |
170 | }; |
171 | |
172 | if table_position >= MAX_COMPONENTS { |
173 | return Err(DecodeErrors::DqtError(format!( |
174 | "Too large table position for QT : {table_position}, expected between 0 and 3" |
175 | ))); |
176 | } |
177 | |
178 | img.qt_tables[table_position] = Some(dct_table); |
179 | } |
180 | |
181 | return Ok(()); |
182 | } |
183 | |
184 | /// Section:`B.2.2 Frame header syntax` |
185 | |
186 | pub(crate) fn parse_start_of_frame<T: ZReaderTrait>( |
187 | sof: SOFMarkers, img: &mut JpegDecoder<T> |
188 | ) -> Result<(), DecodeErrors> { |
189 | if img.seen_sof { |
190 | return Err(DecodeErrors::SofError( |
191 | "Two Start of Frame Markers" .to_string() |
192 | )); |
193 | } |
194 | // Get length of the frame header |
195 | let length = img.stream.get_u16_be_err()?; |
196 | // usually 8, but can be 12 and 16, we currently support only 8 |
197 | // so sorry about that 12 bit images |
198 | let dt_precision = img.stream.get_u8_err()?; |
199 | |
200 | if dt_precision != 8 { |
201 | return Err(DecodeErrors::SofError(format!( |
202 | "The library can only parse 8-bit images, the image has {dt_precision} bits of precision" |
203 | ))); |
204 | } |
205 | |
206 | img.info.set_density(dt_precision); |
207 | |
208 | // read and set the image height. |
209 | let img_height = img.stream.get_u16_be_err()?; |
210 | img.info.set_height(img_height); |
211 | |
212 | // read and set the image width |
213 | let img_width = img.stream.get_u16_be_err()?; |
214 | img.info.set_width(img_width); |
215 | |
216 | trace!("Image width :{}" , img_width); |
217 | trace!("Image height :{}" , img_height); |
218 | |
219 | if usize::from(img_width) > img.options.get_max_width() { |
220 | return Err(DecodeErrors::Format(format!("Image width {} greater than width limit {}. If use `set_limits` if you want to support huge images" , img_width, img.options.get_max_width()))); |
221 | } |
222 | |
223 | if usize::from(img_height) > img.options.get_max_height() { |
224 | return Err(DecodeErrors::Format(format!("Image height {} greater than height limit {}. If use `set_limits` if you want to support huge images" , img_height, img.options.get_max_height()))); |
225 | } |
226 | |
227 | // Check image width or height is zero |
228 | if img_width == 0 || img_height == 0 { |
229 | return Err(DecodeErrors::ZeroError); |
230 | } |
231 | |
232 | // Number of components for the image. |
233 | let num_components = img.stream.get_u8_err()?; |
234 | |
235 | if num_components == 0 { |
236 | return Err(DecodeErrors::SofError( |
237 | "Number of components cannot be zero." .to_string() |
238 | )); |
239 | } |
240 | |
241 | let expected = 8 + 3 * u16::from(num_components); |
242 | // length should be equal to num components |
243 | if length != expected { |
244 | return Err(DecodeErrors::SofError(format!( |
245 | "Length of start of frame differs from expected {expected},value is {length}" |
246 | ))); |
247 | } |
248 | |
249 | trace!("Image components : {}" , num_components); |
250 | |
251 | if num_components == 1 { |
252 | // SOF sets the number of image components |
253 | // and that to us translates to setting input and output |
254 | // colorspaces to zero |
255 | img.input_colorspace = ColorSpace::Luma; |
256 | img.options = img.options.jpeg_set_out_colorspace(ColorSpace::Luma); |
257 | debug!("Overriding default colorspace set to Luma" ); |
258 | } |
259 | if num_components == 4 && img.input_colorspace == ColorSpace::YCbCr { |
260 | trace!("Input image has 4 components, defaulting to CMYK colorspace" ); |
261 | // https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/ |
262 | img.input_colorspace = ColorSpace::CMYK; |
263 | } |
264 | |
265 | // set number of components |
266 | img.info.components = num_components; |
267 | |
268 | let mut components = Vec::with_capacity(num_components as usize); |
269 | let mut temp = [0; 3]; |
270 | |
271 | for pos in 0..num_components { |
272 | // read 3 bytes for each component |
273 | img.stream |
274 | .read_exact(&mut temp) |
275 | .map_err(|x| DecodeErrors::Format(format!("Could not read component data \n{x}" )))?; |
276 | // create a component. |
277 | let component = Components::from(temp, pos)?; |
278 | |
279 | components.push(component); |
280 | } |
281 | img.seen_sof = true; |
282 | |
283 | img.info.set_sof_marker(sof); |
284 | |
285 | img.components = components; |
286 | |
287 | Ok(()) |
288 | } |
289 | |
290 | /// Parse a start of scan data |
291 | pub(crate) fn parse_sos<T: ZReaderTrait>(image: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> { |
292 | // Scan header length |
293 | let ls = image.stream.get_u16_be_err()?; |
294 | // Number of image components in scan |
295 | let ns = image.stream.get_u8_err()?; |
296 | |
297 | let mut seen = [-1; { MAX_COMPONENTS + 1 }]; |
298 | |
299 | image.num_scans = ns; |
300 | |
301 | if ls != 6 + 2 * u16::from(ns) { |
302 | return Err(DecodeErrors::SosError(format!( |
303 | "Bad SOS length {ls},corrupt jpeg" |
304 | ))); |
305 | } |
306 | |
307 | // Check number of components. |
308 | if !(1..5).contains(&ns) { |
309 | return Err(DecodeErrors::SosError(format!( |
310 | "Number of components in start of scan should be less than 3 but more than 0. Found {ns}" |
311 | ))); |
312 | } |
313 | |
314 | if image.info.components == 0 { |
315 | return Err(DecodeErrors::FormatStatic( |
316 | "Error decoding SOF Marker, Number of components cannot be zero." |
317 | )); |
318 | } |
319 | |
320 | // consume spec parameters |
321 | for i in 0..ns { |
322 | // CS_i parameter, I don't need it so I might as well delete it |
323 | let id = image.stream.get_u8_err()?; |
324 | |
325 | if seen.contains(&i32::from(id)) { |
326 | return Err(DecodeErrors::SofError(format!( |
327 | "Duplicate ID {id} seen twice in the same component" |
328 | ))); |
329 | } |
330 | |
331 | seen[usize::from(i)] = i32::from(id); |
332 | // DC and AC huffman table position |
333 | // top 4 bits contain dc huffman destination table |
334 | // lower four bits contain ac huffman destination table |
335 | let y = image.stream.get_u8_err()?; |
336 | |
337 | let mut j = 0; |
338 | |
339 | while j < image.info.components { |
340 | if image.components[j as usize].id == id { |
341 | break; |
342 | } |
343 | |
344 | j += 1; |
345 | } |
346 | |
347 | if j == image.info.components { |
348 | return Err(DecodeErrors::SofError(format!( |
349 | "Invalid component id {}, expected a value between 0 and {}" , |
350 | id, |
351 | image.components.len() |
352 | ))); |
353 | } |
354 | |
355 | image.components[usize::from(j)].dc_huff_table = usize::from((y >> 4) & 0xF); |
356 | image.components[usize::from(j)].ac_huff_table = usize::from(y & 0xF); |
357 | image.z_order[i as usize] = j as usize; |
358 | } |
359 | |
360 | // Collect the component spec parameters |
361 | // This is only needed for progressive images but I'll read |
362 | // them in order to ensure they are correct according to the spec |
363 | |
364 | // Extract progressive information |
365 | |
366 | // https://www.w3.org/Graphics/JPEG/itu-t81.pdf |
367 | // Page 42 |
368 | |
369 | // Start of spectral / predictor selection. (between 0 and 63) |
370 | image.spec_start = image.stream.get_u8_err()?; |
371 | // End of spectral selection |
372 | image.spec_end = image.stream.get_u8_err()?; |
373 | |
374 | let bit_approx = image.stream.get_u8_err()?; |
375 | // successive approximation bit position high |
376 | image.succ_high = bit_approx >> 4; |
377 | |
378 | if image.spec_end > 63 { |
379 | return Err(DecodeErrors::SosError(format!( |
380 | "Invalid Se parameter {}, range should be 0-63" , |
381 | image.spec_end |
382 | ))); |
383 | } |
384 | if image.spec_start > 63 { |
385 | return Err(DecodeErrors::SosError(format!( |
386 | "Invalid Ss parameter {}, range should be 0-63" , |
387 | image.spec_start |
388 | ))); |
389 | } |
390 | if image.succ_high > 13 { |
391 | return Err(DecodeErrors::SosError(format!( |
392 | "Invalid Ah parameter {}, range should be 0-13" , |
393 | image.succ_low |
394 | ))); |
395 | } |
396 | // successive approximation bit position low |
397 | image.succ_low = bit_approx & 0xF; |
398 | |
399 | if image.succ_low > 13 { |
400 | return Err(DecodeErrors::SosError(format!( |
401 | "Invalid Al parameter {}, range should be 0-13" , |
402 | image.succ_low |
403 | ))); |
404 | } |
405 | |
406 | trace!( |
407 | "Ss={}, Se={} Ah={} Al={}" , |
408 | image.spec_start, |
409 | image.spec_end, |
410 | image.succ_high, |
411 | image.succ_low |
412 | ); |
413 | |
414 | Ok(()) |
415 | } |
416 | |
417 | /// Parse Adobe App14 segment |
418 | pub(crate) fn parse_app14<T: ZReaderTrait>( |
419 | decoder: &mut JpegDecoder<T> |
420 | ) -> Result<(), DecodeErrors> { |
421 | // skip length |
422 | let mut length = usize::from(decoder.stream.get_u16_be()); |
423 | |
424 | if length < 2 || !decoder.stream.has(length - 2) { |
425 | return Err(DecodeErrors::ExhaustedData); |
426 | } |
427 | if length < 14 { |
428 | return Err(DecodeErrors::FormatStatic( |
429 | "Too short of a length for App14 segment" |
430 | )); |
431 | } |
432 | if decoder.stream.peek_at(0, 5) == Ok(b"Adobe" ) { |
433 | // move stream 6 bytes to remove adobe id |
434 | decoder.stream.skip(6); |
435 | // skip version, flags0 and flags1 |
436 | decoder.stream.skip(5); |
437 | // get color transform |
438 | let transform = decoder.stream.get_u8(); |
439 | // https://exiftool.org/TagNames/JPEG.html#Adobe |
440 | match transform { |
441 | 0 => decoder.input_colorspace = ColorSpace::CMYK, |
442 | 1 => decoder.input_colorspace = ColorSpace::YCbCr, |
443 | 2 => decoder.input_colorspace = ColorSpace::YCCK, |
444 | _ => { |
445 | return Err(DecodeErrors::Format(format!( |
446 | "Unknown Adobe colorspace {transform}" |
447 | ))) |
448 | } |
449 | } |
450 | // length = 2 |
451 | // adobe id = 6 |
452 | // version = 5 |
453 | // transform = 1 |
454 | length = length.saturating_sub(14); |
455 | } else if decoder.options.get_strict_mode() { |
456 | return Err(DecodeErrors::FormatStatic("Corrupt Adobe App14 segment" )); |
457 | } else { |
458 | length = length.saturating_sub(2); |
459 | error!("Not a valid Adobe APP14 Segment" ); |
460 | } |
461 | // skip any proceeding lengths. |
462 | // we do not need them |
463 | decoder.stream.skip(length); |
464 | |
465 | Ok(()) |
466 | } |
467 | |
468 | /// Parse the APP1 segment |
469 | /// |
470 | /// This contains the exif tag |
471 | pub(crate) fn parse_app1<T: ZReaderTrait>( |
472 | decoder: &mut JpegDecoder<T> |
473 | ) -> Result<(), DecodeErrors> { |
474 | // contains exif data |
475 | let mut length = usize::from(decoder.stream.get_u16_be()); |
476 | |
477 | if length < 2 || !decoder.stream.has(length - 2) { |
478 | return Err(DecodeErrors::ExhaustedData); |
479 | } |
480 | // length bytes |
481 | length -= 2; |
482 | |
483 | if length > 6 && decoder.stream.peek_at(0, 6).unwrap() == b"Exif \x00\x00" { |
484 | trace!("Exif segment present" ); |
485 | // skip bytes we read above |
486 | decoder.stream.skip(6); |
487 | length -= 6; |
488 | |
489 | let exif_bytes = decoder.stream.peek_at(0, length).unwrap().to_vec(); |
490 | |
491 | decoder.exif_data = Some(exif_bytes); |
492 | } else { |
493 | warn!("Wrongly formatted exif tag" ); |
494 | } |
495 | |
496 | decoder.stream.skip(length); |
497 | Ok(()) |
498 | } |
499 | |
500 | pub(crate) fn parse_app2<T: ZReaderTrait>( |
501 | decoder: &mut JpegDecoder<T> |
502 | ) -> Result<(), DecodeErrors> { |
503 | let mut length = usize::from(decoder.stream.get_u16_be()); |
504 | |
505 | if length < 2 || !decoder.stream.has(length - 2) { |
506 | return Err(DecodeErrors::ExhaustedData); |
507 | } |
508 | // length bytes |
509 | length -= 2; |
510 | |
511 | if length > 14 && decoder.stream.peek_at(0, 12).unwrap() == *b"ICC_PROFILE \0" { |
512 | trace!("ICC Profile present" ); |
513 | // skip 12 bytes which indicate ICC profile |
514 | length -= 12; |
515 | decoder.stream.skip(12); |
516 | let seq_no = decoder.stream.get_u8(); |
517 | let num_markers = decoder.stream.get_u8(); |
518 | // deduct the two bytes we read above |
519 | length -= 2; |
520 | |
521 | let data = decoder.stream.peek_at(0, length).unwrap().to_vec(); |
522 | |
523 | let icc_chunk = ICCChunk { |
524 | seq_no, |
525 | num_markers, |
526 | data |
527 | }; |
528 | decoder.icc_data.push(icc_chunk); |
529 | } |
530 | |
531 | decoder.stream.skip(length); |
532 | |
533 | Ok(()) |
534 | } |
535 | |
536 | /// Small utility function to print Un-zig-zagged quantization tables |
537 | |
538 | fn un_zig_zag<T>(a: &[T]) -> [i32; 64] |
539 | where |
540 | T: Default + Copy, |
541 | i32: core::convert::From<T> |
542 | { |
543 | let mut output: [i32; 64] = [i32::default(); 64]; |
544 | |
545 | for i: usize in 0..64 { |
546 | output[UN_ZIGZAG[i]] = i32::from(a[i]); |
547 | } |
548 | |
549 | output |
550 | } |
551 | |