1 | //! Utilities and representations for the first half of a block, the literals section. |
2 | //! It contains data that is then copied from by the sequences section. |
3 | use super::super::decoding::bit_reader::{BitReader, GetBitsError}; |
4 | |
5 | /// A compressed block consists of two sections, a literals section, and a sequences section. |
6 | /// |
7 | /// This is the first of those two sections. A literal is just any arbitrary data, and it is copied by the sequences section |
8 | pub struct LiteralsSection { |
9 | /// - If this block is of type [LiteralsSectionType::Raw], then the data is `regenerated_bytes` |
10 | /// bytes long, and it contains the raw literals data to be used during the second section, |
11 | /// the sequences section. |
12 | /// - If this block is of type [LiteralsSectionType::RLE], |
13 | /// then the literal consists of a single byte repeated `regenerated_size` times. |
14 | /// - For types [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], |
15 | /// then this is the size of the decompressed data. |
16 | pub regenerated_size: u32, |
17 | /// - For types [LiteralsSectionType::Raw] and [LiteralsSectionType::RLE], this value is not present. |
18 | /// - For types [LiteralsSectionType::Compressed] and [LiteralsSectionType::Treeless], this value will |
19 | /// be set to the size of the compressed data. |
20 | pub compressed_size: Option<u32>, |
21 | /// This value will be either 1 stream or 4 streams if the literal is of type |
22 | /// [LiteralsSectionType::Compressed] or [LiteralsSectionType::Treeless], and it |
23 | /// is not used for RLE or uncompressed literals. |
24 | pub num_streams: Option<u8>, |
25 | /// The type of the literal section. |
26 | pub ls_type: LiteralsSectionType, |
27 | } |
28 | |
29 | /// The way which a literal section is encoded. |
30 | pub enum LiteralsSectionType { |
31 | /// Literals are stored uncompressed. |
32 | Raw, |
33 | /// Literals consist of a single byte value repeated [LiteralsSection::regenerated_size] times. |
34 | RLE, |
35 | /// This is a standard Huffman-compressed block, starting with a Huffman tree description. |
36 | /// In this mode, there are at least *2* different literals represented in the Huffman tree |
37 | /// description. |
38 | Compressed, |
39 | /// This is a Huffman-compressed block, |
40 | /// using the Huffman tree from the previous [LiteralsSectionType::Compressed] block |
41 | /// in the sequence. If this mode is triggered without any previous Huffman-tables in the |
42 | /// frame (or dictionary), it should be treated as data corruption. |
43 | Treeless, |
44 | } |
45 | |
46 | #[derive (Debug)] |
47 | #[non_exhaustive ] |
48 | pub enum LiteralsSectionParseError { |
49 | IllegalLiteralSectionType { got: u8 }, |
50 | GetBitsError(GetBitsError), |
51 | NotEnoughBytes { have: usize, need: u8 }, |
52 | } |
53 | |
54 | #[cfg (feature = "std" )] |
55 | impl std::error::Error for LiteralsSectionParseError { |
56 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
57 | match self { |
58 | LiteralsSectionParseError::GetBitsError(source: &GetBitsError) => Some(source), |
59 | _ => None, |
60 | } |
61 | } |
62 | } |
63 | impl core::fmt::Display for LiteralsSectionParseError { |
64 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
65 | match self { |
66 | LiteralsSectionParseError::IllegalLiteralSectionType { got: &u8 } => { |
67 | write!( |
68 | f, |
69 | "Illegal literalssectiontype. Is: {}, must be in: 0, 1, 2, 3" , |
70 | got |
71 | ) |
72 | } |
73 | LiteralsSectionParseError::GetBitsError(e: &GetBitsError) => write!(f, " {:?}" , e), |
74 | LiteralsSectionParseError::NotEnoughBytes { have: &usize, need: &u8 } => { |
75 | write!( |
76 | f, |
77 | "Not enough byte to parse the literals section header. Have: {}, Need: {}" , |
78 | have, need, |
79 | ) |
80 | } |
81 | } |
82 | } |
83 | } |
84 | |
85 | impl From<GetBitsError> for LiteralsSectionParseError { |
86 | fn from(val: GetBitsError) -> Self { |
87 | Self::GetBitsError(val) |
88 | } |
89 | } |
90 | |
91 | impl core::fmt::Display for LiteralsSectionType { |
92 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { |
93 | match self { |
94 | LiteralsSectionType::Compressed => write!(f, "Compressed" ), |
95 | LiteralsSectionType::Raw => write!(f, "Raw" ), |
96 | LiteralsSectionType::RLE => write!(f, "RLE" ), |
97 | LiteralsSectionType::Treeless => write!(f, "Treeless" ), |
98 | } |
99 | } |
100 | } |
101 | |
102 | impl Default for LiteralsSection { |
103 | fn default() -> Self { |
104 | Self::new() |
105 | } |
106 | } |
107 | |
108 | impl LiteralsSection { |
109 | /// Create a new [LiteralsSection]. |
110 | pub fn new() -> LiteralsSection { |
111 | LiteralsSection { |
112 | regenerated_size: 0, |
113 | compressed_size: None, |
114 | num_streams: None, |
115 | ls_type: LiteralsSectionType::Raw, |
116 | } |
117 | } |
118 | |
119 | /// Given the first byte of a header, determine the size of the whole header, from 1 to 5 bytes. |
120 | pub fn header_bytes_needed(&self, first_byte: u8) -> Result<u8, LiteralsSectionParseError> { |
121 | let ls_type: LiteralsSectionType = Self::section_type(first_byte)?; |
122 | let size_format = (first_byte >> 2) & 0x3; |
123 | match ls_type { |
124 | LiteralsSectionType::RLE | LiteralsSectionType::Raw => { |
125 | match size_format { |
126 | 0 | 2 => { |
127 | // size_format actually only uses one bit |
128 | // regenerated_size uses 5 bits |
129 | Ok(1) |
130 | } |
131 | 1 => { |
132 | // size_format uses 2 bit |
133 | // regenerated_size uses 12 bits |
134 | Ok(2) |
135 | } |
136 | 3 => { |
137 | // size_format uses 2 bit |
138 | // regenerated_size uses 20 bits |
139 | Ok(3) |
140 | } |
141 | _ => panic!( |
142 | "This is a bug in the program. There should only be values between 0..3" |
143 | ), |
144 | } |
145 | } |
146 | LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => { |
147 | match size_format { |
148 | 0 | 1 => { |
149 | // Only differ in num_streams |
150 | // both regenerated and compressed sizes use 10 bit |
151 | Ok(3) |
152 | } |
153 | 2 => { |
154 | // both regenerated and compressed sizes use 14 bit |
155 | Ok(4) |
156 | } |
157 | 3 => { |
158 | // both regenerated and compressed sizes use 18 bit |
159 | Ok(5) |
160 | } |
161 | |
162 | _ => panic!( |
163 | "This is a bug in the program. There should only be values between 0..3" |
164 | ), |
165 | } |
166 | } |
167 | } |
168 | } |
169 | |
170 | /// Parse the header into `self`, and returns the number of bytes read. |
171 | pub fn parse_from_header(&mut self, raw: &[u8]) -> Result<u8, LiteralsSectionParseError> { |
172 | let mut br: BitReader<'_> = BitReader::new(raw); |
173 | let block_type = br.get_bits(2)? as u8; |
174 | self.ls_type = Self::section_type(block_type)?; |
175 | let size_format = br.get_bits(2)? as u8; |
176 | |
177 | let byte_needed = self.header_bytes_needed(raw[0])?; |
178 | if raw.len() < byte_needed as usize { |
179 | return Err(LiteralsSectionParseError::NotEnoughBytes { |
180 | have: raw.len(), |
181 | need: byte_needed, |
182 | }); |
183 | } |
184 | |
185 | match self.ls_type { |
186 | LiteralsSectionType::RLE | LiteralsSectionType::Raw => { |
187 | self.compressed_size = None; |
188 | match size_format { |
189 | 0 | 2 => { |
190 | // size_format actually only uses one bit |
191 | // regenerated_size uses 5 bits |
192 | self.regenerated_size = u32::from(raw[0]) >> 3; |
193 | Ok(1) |
194 | } |
195 | 1 => { |
196 | // size_format uses 2 bit |
197 | // regenerated_size uses 12 bits |
198 | self.regenerated_size = (u32::from(raw[0]) >> 4) + (u32::from(raw[1]) << 4); |
199 | Ok(2) |
200 | } |
201 | 3 => { |
202 | // size_format uses 2 bit |
203 | // regenerated_size uses 20 bits |
204 | self.regenerated_size = (u32::from(raw[0]) >> 4) |
205 | + (u32::from(raw[1]) << 4) |
206 | + (u32::from(raw[2]) << 12); |
207 | Ok(3) |
208 | } |
209 | _ => panic!( |
210 | "This is a bug in the program. There should only be values between 0..3" |
211 | ), |
212 | } |
213 | } |
214 | LiteralsSectionType::Compressed | LiteralsSectionType::Treeless => { |
215 | match size_format { |
216 | 0 => { |
217 | self.num_streams = Some(1); |
218 | } |
219 | 1..=3 => { |
220 | self.num_streams = Some(4); |
221 | } |
222 | _ => panic!( |
223 | "This is a bug in the program. There should only be values between 0..3" |
224 | ), |
225 | }; |
226 | |
227 | match size_format { |
228 | 0 | 1 => { |
229 | // Differ in num_streams see above |
230 | // both regenerated and compressed sizes use 10 bit |
231 | |
232 | // 4 from the first, six from the second byte |
233 | self.regenerated_size = |
234 | (u32::from(raw[0]) >> 4) + ((u32::from(raw[1]) & 0x3f) << 4); |
235 | |
236 | // 2 from the second, full last byte |
237 | self.compressed_size = |
238 | Some(u32::from(raw[1] >> 6) + (u32::from(raw[2]) << 2)); |
239 | Ok(3) |
240 | } |
241 | 2 => { |
242 | // both regenerated and compressed sizes use 14 bit |
243 | |
244 | // 4 from first, full second, 2 from the third byte |
245 | self.regenerated_size = (u32::from(raw[0]) >> 4) |
246 | + (u32::from(raw[1]) << 4) |
247 | + ((u32::from(raw[2]) & 0x3) << 12); |
248 | |
249 | // 6 from the third, full last byte |
250 | self.compressed_size = |
251 | Some((u32::from(raw[2]) >> 2) + (u32::from(raw[3]) << 6)); |
252 | Ok(4) |
253 | } |
254 | 3 => { |
255 | // both regenerated and compressed sizes use 18 bit |
256 | |
257 | // 4 from first, full second, six from third byte |
258 | self.regenerated_size = (u32::from(raw[0]) >> 4) |
259 | + (u32::from(raw[1]) << 4) |
260 | + ((u32::from(raw[2]) & 0x3F) << 12); |
261 | |
262 | // 2 from third, full fourth, full fifth byte |
263 | self.compressed_size = Some( |
264 | (u32::from(raw[2]) >> 6) |
265 | + (u32::from(raw[3]) << 2) |
266 | + (u32::from(raw[4]) << 10), |
267 | ); |
268 | Ok(5) |
269 | } |
270 | |
271 | _ => panic!( |
272 | "This is a bug in the program. There should only be values between 0..3" |
273 | ), |
274 | } |
275 | } |
276 | } |
277 | } |
278 | |
279 | /// Given the first two bits of a header, determine the type of a header. |
280 | fn section_type(raw: u8) -> Result<LiteralsSectionType, LiteralsSectionParseError> { |
281 | let t = raw & 0x3; |
282 | match t { |
283 | 0 => Ok(LiteralsSectionType::Raw), |
284 | 1 => Ok(LiteralsSectionType::RLE), |
285 | 2 => Ok(LiteralsSectionType::Compressed), |
286 | 3 => Ok(LiteralsSectionType::Treeless), |
287 | other => Err(LiteralsSectionParseError::IllegalLiteralSectionType { got: other }), |
288 | } |
289 | } |
290 | } |
291 | |