| 1 | /* |
| 2 | * Copyright (c) 2023. |
| 3 | * |
| 4 | * This software is free software; |
| 5 | * |
| 6 | * You can redistribute it or modify it under terms of the MIT, Apache License or Zlib license |
| 7 | */ |
| 8 | |
| 9 | //! Decode Decoder markers/segments |
| 10 | //! |
| 11 | //! This file deals with decoding header information in a jpeg file |
| 12 | //! |
| 13 | use alloc::format; |
| 14 | use alloc::string::ToString; |
| 15 | use alloc::vec::Vec; |
| 16 | |
| 17 | use zune_core::bytestream::ZReaderTrait; |
| 18 | use zune_core::colorspace::ColorSpace; |
| 19 | use zune_core::log::{debug, error, trace, warn}; |
| 20 | |
| 21 | use crate::components::Components; |
| 22 | use crate::decoder::{ICCChunk, JpegDecoder, MAX_COMPONENTS}; |
| 23 | use crate::errors::DecodeErrors; |
| 24 | use crate::huffman::HuffmanTable; |
| 25 | use crate::misc::{SOFMarkers, UN_ZIGZAG}; |
| 26 | |
| 27 | ///**B.2.4.2 Huffman table-specification syntax** |
| 28 | #[allow (clippy::similar_names, clippy::cast_sign_loss)] |
| 29 | pub(crate) fn parse_huffman<T: ZReaderTrait>( |
| 30 | decoder: &mut JpegDecoder<T> |
| 31 | ) -> Result<(), DecodeErrors> |
| 32 | where |
| 33 | { |
| 34 | // Read the length of the Huffman table |
| 35 | let mut dht_length = i32::from(decoder.stream.get_u16_be_err()?.checked_sub(2).ok_or( |
| 36 | DecodeErrors::FormatStatic("Invalid Huffman length in image" ) |
| 37 | )?); |
| 38 | |
| 39 | while dht_length > 16 { |
| 40 | // HT information |
| 41 | let ht_info = decoder.stream.get_u8_err()?; |
| 42 | // third bit indicates whether the huffman encoding is DC or AC type |
| 43 | let dc_or_ac = (ht_info >> 4) & 0xF; |
| 44 | // Indicate the position of this table, should be less than 4; |
| 45 | let index = (ht_info & 0xF) as usize; |
| 46 | // read the number of symbols |
| 47 | let mut num_symbols: [u8; 17] = [0; 17]; |
| 48 | |
| 49 | if index >= MAX_COMPONENTS { |
| 50 | return Err(DecodeErrors::HuffmanDecode(format!( |
| 51 | "Invalid DHT index {index}, expected between 0 and 3" |
| 52 | ))); |
| 53 | } |
| 54 | |
| 55 | if dc_or_ac > 1 { |
| 56 | return Err(DecodeErrors::HuffmanDecode(format!( |
| 57 | "Invalid DHT position {dc_or_ac}, should be 0 or 1" |
| 58 | ))); |
| 59 | } |
| 60 | |
| 61 | decoder |
| 62 | .stream |
| 63 | .read_exact(&mut num_symbols[1..17]) |
| 64 | .map_err(|_| DecodeErrors::ExhaustedData)?; |
| 65 | |
| 66 | dht_length -= 1 + 16; |
| 67 | |
| 68 | let symbols_sum: i32 = num_symbols.iter().map(|f| i32::from(*f)).sum(); |
| 69 | |
| 70 | // The sum of the number of symbols cannot be greater than 256; |
| 71 | if symbols_sum > 256 { |
| 72 | return Err(DecodeErrors::FormatStatic( |
| 73 | "Encountered Huffman table with excessive length in DHT" |
| 74 | )); |
| 75 | } |
| 76 | if symbols_sum > dht_length { |
| 77 | return Err(DecodeErrors::HuffmanDecode(format!( |
| 78 | "Excessive Huffman table of length {symbols_sum} found when header length is {dht_length}" |
| 79 | ))); |
| 80 | } |
| 81 | dht_length -= symbols_sum; |
| 82 | // A table containing symbols in increasing code length |
| 83 | let mut symbols = [0; 256]; |
| 84 | |
| 85 | decoder |
| 86 | .stream |
| 87 | .read_exact(&mut symbols[0..(symbols_sum as usize)]) |
| 88 | .map_err(|x| { |
| 89 | DecodeErrors::Format(format!("Could not read symbols into the buffer \n{x}" )) |
| 90 | })?; |
| 91 | // store |
| 92 | match dc_or_ac { |
| 93 | 0 => { |
| 94 | decoder.dc_huffman_tables[index] = Some(HuffmanTable::new( |
| 95 | &num_symbols, |
| 96 | symbols, |
| 97 | true, |
| 98 | decoder.is_progressive |
| 99 | )?); |
| 100 | } |
| 101 | _ => { |
| 102 | decoder.ac_huffman_tables[index] = Some(HuffmanTable::new( |
| 103 | &num_symbols, |
| 104 | symbols, |
| 105 | false, |
| 106 | decoder.is_progressive |
| 107 | )?); |
| 108 | } |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | if dht_length > 0 { |
| 113 | return Err(DecodeErrors::FormatStatic("Bogus Huffman table definition" )); |
| 114 | } |
| 115 | |
| 116 | Ok(()) |
| 117 | } |
| 118 | |
| 119 | ///**B.2.4.1 Quantization table-specification syntax** |
| 120 | #[allow (clippy::cast_possible_truncation, clippy::needless_range_loop)] |
| 121 | pub(crate) fn parse_dqt<T: ZReaderTrait>(img: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> { |
| 122 | // read length |
| 123 | let mut qt_length = |
| 124 | img.stream |
| 125 | .get_u16_be_err()? |
| 126 | .checked_sub(2) |
| 127 | .ok_or(DecodeErrors::FormatStatic( |
| 128 | "Invalid DQT length. Length should be greater than 2" |
| 129 | ))?; |
| 130 | // A single DQT header may have multiple QT's |
| 131 | while qt_length > 0 { |
| 132 | let qt_info = img.stream.get_u8_err()?; |
| 133 | // 0 = 8 bit otherwise 16 bit dqt |
| 134 | let precision = (qt_info >> 4) as usize; |
| 135 | // last 4 bits give us position |
| 136 | let table_position = (qt_info & 0x0f) as usize; |
| 137 | let precision_value = 64 * (precision + 1); |
| 138 | |
| 139 | if (precision_value + 1) as u16 > qt_length { |
| 140 | return Err(DecodeErrors::DqtError(format!("Invalid QT table bytes left : {}. Too small to construct a valid qt table which should be {} long" , qt_length, precision_value + 1))); |
| 141 | } |
| 142 | |
| 143 | let dct_table = match precision { |
| 144 | 0 => { |
| 145 | let mut qt_values = [0; 64]; |
| 146 | |
| 147 | img.stream.read_exact(&mut qt_values).map_err(|x| { |
| 148 | DecodeErrors::Format(format!("Could not read symbols into the buffer \n{x}" )) |
| 149 | })?; |
| 150 | qt_length -= (precision_value as u16) + 1 /*QT BIT*/; |
| 151 | // carry out un zig-zag here |
| 152 | un_zig_zag(&qt_values) |
| 153 | } |
| 154 | 1 => { |
| 155 | // 16 bit quantization tables |
| 156 | let mut qt_values = [0_u16; 64]; |
| 157 | |
| 158 | for i in 0..64 { |
| 159 | qt_values[i] = img.stream.get_u16_be_err()?; |
| 160 | } |
| 161 | qt_length -= (precision_value as u16) + 1; |
| 162 | |
| 163 | un_zig_zag(&qt_values) |
| 164 | } |
| 165 | _ => { |
| 166 | return Err(DecodeErrors::DqtError(format!( |
| 167 | "Expected QT precision value of either 0 or 1, found {precision:?}" |
| 168 | ))); |
| 169 | } |
| 170 | }; |
| 171 | |
| 172 | if table_position >= MAX_COMPONENTS { |
| 173 | return Err(DecodeErrors::DqtError(format!( |
| 174 | "Too large table position for QT : {table_position}, expected between 0 and 3" |
| 175 | ))); |
| 176 | } |
| 177 | |
| 178 | img.qt_tables[table_position] = Some(dct_table); |
| 179 | } |
| 180 | |
| 181 | return Ok(()); |
| 182 | } |
| 183 | |
| 184 | /// Section:`B.2.2 Frame header syntax` |
| 185 | |
| 186 | pub(crate) fn parse_start_of_frame<T: ZReaderTrait>( |
| 187 | sof: SOFMarkers, img: &mut JpegDecoder<T> |
| 188 | ) -> Result<(), DecodeErrors> { |
| 189 | if img.seen_sof { |
| 190 | return Err(DecodeErrors::SofError( |
| 191 | "Two Start of Frame Markers" .to_string() |
| 192 | )); |
| 193 | } |
| 194 | // Get length of the frame header |
| 195 | let length = img.stream.get_u16_be_err()?; |
| 196 | // usually 8, but can be 12 and 16, we currently support only 8 |
| 197 | // so sorry about that 12 bit images |
| 198 | let dt_precision = img.stream.get_u8_err()?; |
| 199 | |
| 200 | if dt_precision != 8 { |
| 201 | return Err(DecodeErrors::SofError(format!( |
| 202 | "The library can only parse 8-bit images, the image has {dt_precision} bits of precision" |
| 203 | ))); |
| 204 | } |
| 205 | |
| 206 | img.info.set_density(dt_precision); |
| 207 | |
| 208 | // read and set the image height. |
| 209 | let img_height = img.stream.get_u16_be_err()?; |
| 210 | img.info.set_height(img_height); |
| 211 | |
| 212 | // read and set the image width |
| 213 | let img_width = img.stream.get_u16_be_err()?; |
| 214 | img.info.set_width(img_width); |
| 215 | |
| 216 | trace!("Image width :{}" , img_width); |
| 217 | trace!("Image height :{}" , img_height); |
| 218 | |
| 219 | if usize::from(img_width) > img.options.get_max_width() { |
| 220 | return Err(DecodeErrors::Format(format!("Image width {} greater than width limit {}. If use `set_limits` if you want to support huge images" , img_width, img.options.get_max_width()))); |
| 221 | } |
| 222 | |
| 223 | if usize::from(img_height) > img.options.get_max_height() { |
| 224 | return Err(DecodeErrors::Format(format!("Image height {} greater than height limit {}. If use `set_limits` if you want to support huge images" , img_height, img.options.get_max_height()))); |
| 225 | } |
| 226 | |
| 227 | // Check image width or height is zero |
| 228 | if img_width == 0 || img_height == 0 { |
| 229 | return Err(DecodeErrors::ZeroError); |
| 230 | } |
| 231 | |
| 232 | // Number of components for the image. |
| 233 | let num_components = img.stream.get_u8_err()?; |
| 234 | |
| 235 | if num_components == 0 { |
| 236 | return Err(DecodeErrors::SofError( |
| 237 | "Number of components cannot be zero." .to_string() |
| 238 | )); |
| 239 | } |
| 240 | |
| 241 | let expected = 8 + 3 * u16::from(num_components); |
| 242 | // length should be equal to num components |
| 243 | if length != expected { |
| 244 | return Err(DecodeErrors::SofError(format!( |
| 245 | "Length of start of frame differs from expected {expected},value is {length}" |
| 246 | ))); |
| 247 | } |
| 248 | |
| 249 | trace!("Image components : {}" , num_components); |
| 250 | |
| 251 | if num_components == 1 { |
| 252 | // SOF sets the number of image components |
| 253 | // and that to us translates to setting input and output |
| 254 | // colorspaces to zero |
| 255 | img.input_colorspace = ColorSpace::Luma; |
| 256 | img.options = img.options.jpeg_set_out_colorspace(ColorSpace::Luma); |
| 257 | debug!("Overriding default colorspace set to Luma" ); |
| 258 | } |
| 259 | if num_components == 4 && img.input_colorspace == ColorSpace::YCbCr { |
| 260 | trace!("Input image has 4 components, defaulting to CMYK colorspace" ); |
| 261 | // https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/ |
| 262 | img.input_colorspace = ColorSpace::CMYK; |
| 263 | } |
| 264 | |
| 265 | // set number of components |
| 266 | img.info.components = num_components; |
| 267 | |
| 268 | let mut components = Vec::with_capacity(num_components as usize); |
| 269 | let mut temp = [0; 3]; |
| 270 | |
| 271 | for pos in 0..num_components { |
| 272 | // read 3 bytes for each component |
| 273 | img.stream |
| 274 | .read_exact(&mut temp) |
| 275 | .map_err(|x| DecodeErrors::Format(format!("Could not read component data \n{x}" )))?; |
| 276 | // create a component. |
| 277 | let component = Components::from(temp, pos)?; |
| 278 | |
| 279 | components.push(component); |
| 280 | } |
| 281 | img.seen_sof = true; |
| 282 | |
| 283 | img.info.set_sof_marker(sof); |
| 284 | |
| 285 | img.components = components; |
| 286 | |
| 287 | Ok(()) |
| 288 | } |
| 289 | |
| 290 | /// Parse a start of scan data |
| 291 | pub(crate) fn parse_sos<T: ZReaderTrait>(image: &mut JpegDecoder<T>) -> Result<(), DecodeErrors> { |
| 292 | // Scan header length |
| 293 | let ls = image.stream.get_u16_be_err()?; |
| 294 | // Number of image components in scan |
| 295 | let ns = image.stream.get_u8_err()?; |
| 296 | |
| 297 | let mut seen = [-1; { MAX_COMPONENTS + 1 }]; |
| 298 | |
| 299 | image.num_scans = ns; |
| 300 | |
| 301 | if ls != 6 + 2 * u16::from(ns) { |
| 302 | return Err(DecodeErrors::SosError(format!( |
| 303 | "Bad SOS length {ls},corrupt jpeg" |
| 304 | ))); |
| 305 | } |
| 306 | |
| 307 | // Check number of components. |
| 308 | if !(1..5).contains(&ns) { |
| 309 | return Err(DecodeErrors::SosError(format!( |
| 310 | "Number of components in start of scan should be less than 3 but more than 0. Found {ns}" |
| 311 | ))); |
| 312 | } |
| 313 | |
| 314 | if image.info.components == 0 { |
| 315 | return Err(DecodeErrors::FormatStatic( |
| 316 | "Error decoding SOF Marker, Number of components cannot be zero." |
| 317 | )); |
| 318 | } |
| 319 | |
| 320 | // consume spec parameters |
| 321 | for i in 0..ns { |
| 322 | // CS_i parameter, I don't need it so I might as well delete it |
| 323 | let id = image.stream.get_u8_err()?; |
| 324 | |
| 325 | if seen.contains(&i32::from(id)) { |
| 326 | return Err(DecodeErrors::SofError(format!( |
| 327 | "Duplicate ID {id} seen twice in the same component" |
| 328 | ))); |
| 329 | } |
| 330 | |
| 331 | seen[usize::from(i)] = i32::from(id); |
| 332 | // DC and AC huffman table position |
| 333 | // top 4 bits contain dc huffman destination table |
| 334 | // lower four bits contain ac huffman destination table |
| 335 | let y = image.stream.get_u8_err()?; |
| 336 | |
| 337 | let mut j = 0; |
| 338 | |
| 339 | while j < image.info.components { |
| 340 | if image.components[j as usize].id == id { |
| 341 | break; |
| 342 | } |
| 343 | |
| 344 | j += 1; |
| 345 | } |
| 346 | |
| 347 | if j == image.info.components { |
| 348 | return Err(DecodeErrors::SofError(format!( |
| 349 | "Invalid component id {}, expected a value between 0 and {}" , |
| 350 | id, |
| 351 | image.components.len() |
| 352 | ))); |
| 353 | } |
| 354 | |
| 355 | image.components[usize::from(j)].dc_huff_table = usize::from((y >> 4) & 0xF); |
| 356 | image.components[usize::from(j)].ac_huff_table = usize::from(y & 0xF); |
| 357 | image.z_order[i as usize] = j as usize; |
| 358 | } |
| 359 | |
| 360 | // Collect the component spec parameters |
| 361 | // This is only needed for progressive images but I'll read |
| 362 | // them in order to ensure they are correct according to the spec |
| 363 | |
| 364 | // Extract progressive information |
| 365 | |
| 366 | // https://www.w3.org/Graphics/JPEG/itu-t81.pdf |
| 367 | // Page 42 |
| 368 | |
| 369 | // Start of spectral / predictor selection. (between 0 and 63) |
| 370 | image.spec_start = image.stream.get_u8_err()?; |
| 371 | // End of spectral selection |
| 372 | image.spec_end = image.stream.get_u8_err()?; |
| 373 | |
| 374 | let bit_approx = image.stream.get_u8_err()?; |
| 375 | // successive approximation bit position high |
| 376 | image.succ_high = bit_approx >> 4; |
| 377 | |
| 378 | if image.spec_end > 63 { |
| 379 | return Err(DecodeErrors::SosError(format!( |
| 380 | "Invalid Se parameter {}, range should be 0-63" , |
| 381 | image.spec_end |
| 382 | ))); |
| 383 | } |
| 384 | if image.spec_start > 63 { |
| 385 | return Err(DecodeErrors::SosError(format!( |
| 386 | "Invalid Ss parameter {}, range should be 0-63" , |
| 387 | image.spec_start |
| 388 | ))); |
| 389 | } |
| 390 | if image.succ_high > 13 { |
| 391 | return Err(DecodeErrors::SosError(format!( |
| 392 | "Invalid Ah parameter {}, range should be 0-13" , |
| 393 | image.succ_low |
| 394 | ))); |
| 395 | } |
| 396 | // successive approximation bit position low |
| 397 | image.succ_low = bit_approx & 0xF; |
| 398 | |
| 399 | if image.succ_low > 13 { |
| 400 | return Err(DecodeErrors::SosError(format!( |
| 401 | "Invalid Al parameter {}, range should be 0-13" , |
| 402 | image.succ_low |
| 403 | ))); |
| 404 | } |
| 405 | |
| 406 | trace!( |
| 407 | "Ss={}, Se={} Ah={} Al={}" , |
| 408 | image.spec_start, |
| 409 | image.spec_end, |
| 410 | image.succ_high, |
| 411 | image.succ_low |
| 412 | ); |
| 413 | |
| 414 | Ok(()) |
| 415 | } |
| 416 | |
| 417 | /// Parse Adobe App14 segment |
| 418 | pub(crate) fn parse_app14<T: ZReaderTrait>( |
| 419 | decoder: &mut JpegDecoder<T> |
| 420 | ) -> Result<(), DecodeErrors> { |
| 421 | // skip length |
| 422 | let mut length = usize::from(decoder.stream.get_u16_be()); |
| 423 | |
| 424 | if length < 2 || !decoder.stream.has(length - 2) { |
| 425 | return Err(DecodeErrors::ExhaustedData); |
| 426 | } |
| 427 | if length < 14 { |
| 428 | return Err(DecodeErrors::FormatStatic( |
| 429 | "Too short of a length for App14 segment" |
| 430 | )); |
| 431 | } |
| 432 | if decoder.stream.peek_at(0, 5) == Ok(b"Adobe" ) { |
| 433 | // move stream 6 bytes to remove adobe id |
| 434 | decoder.stream.skip(6); |
| 435 | // skip version, flags0 and flags1 |
| 436 | decoder.stream.skip(5); |
| 437 | // get color transform |
| 438 | let transform = decoder.stream.get_u8(); |
| 439 | // https://exiftool.org/TagNames/JPEG.html#Adobe |
| 440 | match transform { |
| 441 | 0 => decoder.input_colorspace = ColorSpace::CMYK, |
| 442 | 1 => decoder.input_colorspace = ColorSpace::YCbCr, |
| 443 | 2 => decoder.input_colorspace = ColorSpace::YCCK, |
| 444 | _ => { |
| 445 | return Err(DecodeErrors::Format(format!( |
| 446 | "Unknown Adobe colorspace {transform}" |
| 447 | ))) |
| 448 | } |
| 449 | } |
| 450 | // length = 2 |
| 451 | // adobe id = 6 |
| 452 | // version = 5 |
| 453 | // transform = 1 |
| 454 | length = length.saturating_sub(14); |
| 455 | } else if decoder.options.get_strict_mode() { |
| 456 | return Err(DecodeErrors::FormatStatic("Corrupt Adobe App14 segment" )); |
| 457 | } else { |
| 458 | length = length.saturating_sub(2); |
| 459 | error!("Not a valid Adobe APP14 Segment" ); |
| 460 | } |
| 461 | // skip any proceeding lengths. |
| 462 | // we do not need them |
| 463 | decoder.stream.skip(length); |
| 464 | |
| 465 | Ok(()) |
| 466 | } |
| 467 | |
| 468 | /// Parse the APP1 segment |
| 469 | /// |
| 470 | /// This contains the exif tag |
| 471 | pub(crate) fn parse_app1<T: ZReaderTrait>( |
| 472 | decoder: &mut JpegDecoder<T> |
| 473 | ) -> Result<(), DecodeErrors> { |
| 474 | // contains exif data |
| 475 | let mut length = usize::from(decoder.stream.get_u16_be()); |
| 476 | |
| 477 | if length < 2 || !decoder.stream.has(length - 2) { |
| 478 | return Err(DecodeErrors::ExhaustedData); |
| 479 | } |
| 480 | // length bytes |
| 481 | length -= 2; |
| 482 | |
| 483 | if length > 6 && decoder.stream.peek_at(0, 6).unwrap() == b"Exif \x00\x00" { |
| 484 | trace!("Exif segment present" ); |
| 485 | // skip bytes we read above |
| 486 | decoder.stream.skip(6); |
| 487 | length -= 6; |
| 488 | |
| 489 | let exif_bytes = decoder.stream.peek_at(0, length).unwrap().to_vec(); |
| 490 | |
| 491 | decoder.exif_data = Some(exif_bytes); |
| 492 | } else { |
| 493 | warn!("Wrongly formatted exif tag" ); |
| 494 | } |
| 495 | |
| 496 | decoder.stream.skip(length); |
| 497 | Ok(()) |
| 498 | } |
| 499 | |
| 500 | pub(crate) fn parse_app2<T: ZReaderTrait>( |
| 501 | decoder: &mut JpegDecoder<T> |
| 502 | ) -> Result<(), DecodeErrors> { |
| 503 | let mut length = usize::from(decoder.stream.get_u16_be()); |
| 504 | |
| 505 | if length < 2 || !decoder.stream.has(length - 2) { |
| 506 | return Err(DecodeErrors::ExhaustedData); |
| 507 | } |
| 508 | // length bytes |
| 509 | length -= 2; |
| 510 | |
| 511 | if length > 14 && decoder.stream.peek_at(0, 12).unwrap() == *b"ICC_PROFILE \0" { |
| 512 | trace!("ICC Profile present" ); |
| 513 | // skip 12 bytes which indicate ICC profile |
| 514 | length -= 12; |
| 515 | decoder.stream.skip(12); |
| 516 | let seq_no = decoder.stream.get_u8(); |
| 517 | let num_markers = decoder.stream.get_u8(); |
| 518 | // deduct the two bytes we read above |
| 519 | length -= 2; |
| 520 | |
| 521 | let data = decoder.stream.peek_at(0, length).unwrap().to_vec(); |
| 522 | |
| 523 | let icc_chunk = ICCChunk { |
| 524 | seq_no, |
| 525 | num_markers, |
| 526 | data |
| 527 | }; |
| 528 | decoder.icc_data.push(icc_chunk); |
| 529 | } |
| 530 | |
| 531 | decoder.stream.skip(length); |
| 532 | |
| 533 | Ok(()) |
| 534 | } |
| 535 | |
| 536 | /// Small utility function to print Un-zig-zagged quantization tables |
| 537 | |
| 538 | fn un_zig_zag<T>(a: &[T]) -> [i32; 64] |
| 539 | where |
| 540 | T: Default + Copy, |
| 541 | i32: core::convert::From<T> |
| 542 | { |
| 543 | let mut output: [i32; 64] = [i32::default(); 64]; |
| 544 | |
| 545 | for i: usize in 0..64 { |
| 546 | output[UN_ZIGZAG[i]] = i32::from(a[i]); |
| 547 | } |
| 548 | |
| 549 | output |
| 550 | } |
| 551 | |