| 1 | //! Utilities and representations for the first half of a block, the literals section. |
| 2 | //! It contains data that is then copied from by the sequences section. |
| 3 | use super::super::decoding::bit_reader::{BitReader, GetBitsError}; |
| 4 | |
| 5 | /// A compressed block consists of two sections, a literals section, and a sequences section. |
| 6 | /// |
| 7 | /// This is the first of those two sections. A literal is just any arbitrary data, and it is copied by the sequences section |
| 8 | pub struct LiteralsSection { |
| 9 | /// - If this block is of type [LiteralsSectionType::Raw], then the data is `regenerated_bytes` |
| 10 | /// bytes long, and it contains the raw literals data to be used during the second section, |
| 11 | /// the sequences section. |
| 12 | /// - If this block is of type [LiteralsSectionType::RLE], |
| 13 | /// then the literal consists of a single byte repeated `regenerated_size` times. |
| 14 | /// - For types [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], |
| 15 | /// then this is the size of the decompressed data. |
| 16 | pub regenerated_size: u32, |
| 17 | /// - For types [LiteralsSectionType::Raw] and [LiteralsSectionType::RLE], this value is not present. |
| 18 | /// - For types [LiteralsSectionType::Compressed] and [LiteralsSectionType::Treeless], this value will |
| 19 | /// be set to the size of the compressed data. |
| 20 | pub compressed_size: Option<u32>, |
| 21 | /// This value will be either 1 stream or 4 streams if the literal is of type |
| 22 | /// [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], and it |
| 23 | /// is not used for RLE or uncompressed literals. |
| 24 | pub num_streams: Option<u8>, |
| 25 | /// The type of the literal section. |
| 26 | pub ls_type: LiteralsSectionType, |
| 27 | } |
| 28 | |
| 29 | /// The way which a literal section is encoded. |
| 30 | pub enum LiteralsSectionType { |
| 31 | /// Literals are stored uncompressed. |
| 32 | Raw, |
| 33 | /// Literals consist of a single byte value repeated [LiteralsSection::regenerated_size] times. |
| 34 | RLE, |
| 35 | /// This is a standard Huffman-compressed block, starting with a Huffman tree description. |
| 36 | /// In this mode, there are at least *2* different literals represented in the Huffman tree |
| 37 | /// description. |
| 38 | Compressed, |
| 39 | /// This is a Huffman-compressed block, |
| 40 | /// using the Huffman tree from the previous [LiteralsSectionType::Compressed] block |
| 41 | /// in the sequence. If this mode is triggered without any previous Huffman-tables in the |
| 42 | /// frame (or dictionary), it should be treated as data corruption. |
| 43 | Treeless, |
| 44 | } |
| 45 | |
| 46 | #[derive (Debug)] |
| 47 | #[non_exhaustive ] |
| 48 | pub enum LiteralsSectionParseError { |
| 49 | IllegalLiteralSectionType { got: u8 }, |
| 50 | GetBitsError(GetBitsError), |
| 51 | NotEnoughBytes { have: usize, need: u8 }, |
| 52 | } |
| 53 | |
| 54 | #[cfg (feature = "std" )] |
| 55 | impl std::error::Error for LiteralsSectionParseError { |
| 56 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| 57 | match self { |
| 58 | LiteralsSectionParseError::GetBitsError(source: &GetBitsError) => Some(source), |
| 59 | _ => None, |
| 60 | } |
| 61 | } |
| 62 | } |
| 63 | impl core::fmt::Display for LiteralsSectionParseError { |
| 64 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 65 | match self { |
| 66 | LiteralsSectionParseError::IllegalLiteralSectionType { got: &u8 } => { |
| 67 | write!( |
| 68 | f, |
| 69 | "Illegal literalssectiontype. Is: {}, must be in: 0, 1, 2, 3" , |
| 70 | got |
| 71 | ) |
| 72 | } |
| 73 | LiteralsSectionParseError::GetBitsError(e: &GetBitsError) => write!(f, " {:?}" , e), |
| 74 | LiteralsSectionParseError::NotEnoughBytes { have: &usize, need: &u8 } => { |
| 75 | write!( |
| 76 | f, |
| 77 | "Not enough byte to parse the literals section header. Have: {}, Need: {}" , |
| 78 | have, need, |
| 79 | ) |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | } |
| 84 | |
| 85 | impl From<GetBitsError> for LiteralsSectionParseError { |
| 86 | fn from(val: GetBitsError) -> Self { |
| 87 | Self::GetBitsError(val) |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | impl core::fmt::Display for LiteralsSectionType { |
| 92 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { |
| 93 | match self { |
| 94 | LiteralsSectionType::Compressed => write!(f, "Compressed" ), |
| 95 | LiteralsSectionType::Raw => write!(f, "Raw" ), |
| 96 | LiteralsSectionType::RLE => write!(f, "RLE" ), |
| 97 | LiteralsSectionType::Treeless => write!(f, "Treeless" ), |
| 98 | } |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | impl Default for LiteralsSection { |
| 103 | fn default() -> Self { |
| 104 | Self::new() |
| 105 | } |
| 106 | } |
| 107 | |
| 108 | impl LiteralsSection { |
| 109 | /// Create a new [LiteralsSection]. |
| 110 | pub fn new() -> LiteralsSection { |
| 111 | LiteralsSection { |
| 112 | regenerated_size: 0, |
| 113 | compressed_size: None, |
| 114 | num_streams: None, |
| 115 | ls_type: LiteralsSectionType::Raw, |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | /// Given the first byte of a header, determine the size of the whole header, from 1 to 5 bytes. |
| 120 | pub fn header_bytes_needed(&self, first_byte: u8) -> Result<u8, LiteralsSectionParseError> { |
| 121 | let ls_type: LiteralsSectionType = Self::section_type(first_byte)?; |
| 122 | let size_format = (first_byte >> 2) & 0x3; |
| 123 | match ls_type { |
| 124 | LiteralsSectionType::RLE | LiteralsSectionType::Raw => { |
| 125 | match size_format { |
| 126 | 0 | 2 => { |
| 127 | // size_format actually only uses one bit |
| 128 | // regenerated_size uses 5 bits |
| 129 | Ok(1) |
| 130 | } |
| 131 | 1 => { |
| 132 | // size_format uses 2 bit |
| 133 | // regenerated_size uses 12 bits |
| 134 | Ok(2) |
| 135 | } |
| 136 | 3 => { |
| 137 | // size_format uses 2 bit |
| 138 | // regenerated_size uses 20 bits |
| 139 | Ok(3) |
| 140 | } |
| 141 | _ => panic!( |
| 142 | "This is a bug in the program. There should only be values between 0..3" |
| 143 | ), |
| 144 | } |
| 145 | } |
| 146 | LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => { |
| 147 | match size_format { |
| 148 | 0 | 1 => { |
| 149 | // Only differ in num_streams |
| 150 | // both regenerated and compressed sizes use 10 bit |
| 151 | Ok(3) |
| 152 | } |
| 153 | 2 => { |
| 154 | // both regenerated and compressed sizes use 14 bit |
| 155 | Ok(4) |
| 156 | } |
| 157 | 3 => { |
| 158 | // both regenerated and compressed sizes use 18 bit |
| 159 | Ok(5) |
| 160 | } |
| 161 | |
| 162 | _ => panic!( |
| 163 | "This is a bug in the program. There should only be values between 0..3" |
| 164 | ), |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | } |
| 169 | |
| 170 | /// Parse the header into `self`, and returns the number of bytes read. |
| 171 | pub fn parse_from_header(&mut self, raw: &[u8]) -> Result<u8, LiteralsSectionParseError> { |
| 172 | let mut br: BitReader<'_> = BitReader::new(raw); |
| 173 | let block_type = br.get_bits(2)? as u8; |
| 174 | self.ls_type = Self::section_type(block_type)?; |
| 175 | let size_format = br.get_bits(2)? as u8; |
| 176 | |
| 177 | let byte_needed = self.header_bytes_needed(raw[0])?; |
| 178 | if raw.len() < byte_needed as usize { |
| 179 | return Err(LiteralsSectionParseError::NotEnoughBytes { |
| 180 | have: raw.len(), |
| 181 | need: byte_needed, |
| 182 | }); |
| 183 | } |
| 184 | |
| 185 | match self.ls_type { |
| 186 | LiteralsSectionType::RLE | LiteralsSectionType::Raw => { |
| 187 | self.compressed_size = None; |
| 188 | match size_format { |
| 189 | 0 | 2 => { |
| 190 | // size_format actually only uses one bit |
| 191 | // regenerated_size uses 5 bits |
| 192 | self.regenerated_size = u32::from(raw[0]) >> 3; |
| 193 | Ok(1) |
| 194 | } |
| 195 | 1 => { |
| 196 | // size_format uses 2 bit |
| 197 | // regenerated_size uses 12 bits |
| 198 | self.regenerated_size = (u32::from(raw[0]) >> 4) + (u32::from(raw[1]) << 4); |
| 199 | Ok(2) |
| 200 | } |
| 201 | 3 => { |
| 202 | // size_format uses 2 bit |
| 203 | // regenerated_size uses 20 bits |
| 204 | self.regenerated_size = (u32::from(raw[0]) >> 4) |
| 205 | + (u32::from(raw[1]) << 4) |
| 206 | + (u32::from(raw[2]) << 12); |
| 207 | Ok(3) |
| 208 | } |
| 209 | _ => panic!( |
| 210 | "This is a bug in the program. There should only be values between 0..3" |
| 211 | ), |
| 212 | } |
| 213 | } |
| 214 | LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => { |
| 215 | match size_format { |
| 216 | 0 => { |
| 217 | self.num_streams = Some(1); |
| 218 | } |
| 219 | 1..=3 => { |
| 220 | self.num_streams = Some(4); |
| 221 | } |
| 222 | _ => panic!( |
| 223 | "This is a bug in the program. There should only be values between 0..3" |
| 224 | ), |
| 225 | }; |
| 226 | |
| 227 | match size_format { |
| 228 | 0 | 1 => { |
| 229 | // Differ in num_streams see above |
| 230 | // both regenerated and compressed sizes use 10 bit |
| 231 | |
| 232 | // 4 from the first, six from the second byte |
| 233 | self.regenerated_size = |
| 234 | (u32::from(raw[0]) >> 4) + ((u32::from(raw[1]) & 0x3f) << 4); |
| 235 | |
| 236 | // 2 from the second, full last byte |
| 237 | self.compressed_size = |
| 238 | Some(u32::from(raw[1] >> 6) + (u32::from(raw[2]) << 2)); |
| 239 | Ok(3) |
| 240 | } |
| 241 | 2 => { |
| 242 | // both regenerated and compressed sizes use 14 bit |
| 243 | |
| 244 | // 4 from first, full second, 2 from the third byte |
| 245 | self.regenerated_size = (u32::from(raw[0]) >> 4) |
| 246 | + (u32::from(raw[1]) << 4) |
| 247 | + ((u32::from(raw[2]) & 0x3) << 12); |
| 248 | |
| 249 | // 6 from the third, full last byte |
| 250 | self.compressed_size = |
| 251 | Some((u32::from(raw[2]) >> 2) + (u32::from(raw[3]) << 6)); |
| 252 | Ok(4) |
| 253 | } |
| 254 | 3 => { |
| 255 | // both regenerated and compressed sizes use 18 bit |
| 256 | |
| 257 | // 4 from first, full second, six from third byte |
| 258 | self.regenerated_size = (u32::from(raw[0]) >> 4) |
| 259 | + (u32::from(raw[1]) << 4) |
| 260 | + ((u32::from(raw[2]) & 0x3F) << 12); |
| 261 | |
| 262 | // 2 from third, full fourth, full fifth byte |
| 263 | self.compressed_size = Some( |
| 264 | (u32::from(raw[2]) >> 6) |
| 265 | + (u32::from(raw[3]) << 2) |
| 266 | + (u32::from(raw[4]) << 10), |
| 267 | ); |
| 268 | Ok(5) |
| 269 | } |
| 270 | |
| 271 | _ => panic!( |
| 272 | "This is a bug in the program. There should only be values between 0..3" |
| 273 | ), |
| 274 | } |
| 275 | } |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | /// Given the first two bits of a header, determine the type of a header. |
| 280 | fn section_type(raw: u8) -> Result<LiteralsSectionType, LiteralsSectionParseError> { |
| 281 | let t = raw & 0x3; |
| 282 | match t { |
| 283 | 0 => Ok(LiteralsSectionType::Raw), |
| 284 | 1 => Ok(LiteralsSectionType::RLE), |
| 285 | 2 => Ok(LiteralsSectionType::Compressed), |
| 286 | 3 => Ok(LiteralsSectionType::Treeless), |
| 287 | other => Err(LiteralsSectionParseError::IllegalLiteralSectionType { got: other }), |
| 288 | } |
| 289 | } |
| 290 | } |
| 291 | |