1 | use alloc::vec::Vec; |
2 | use core::convert::TryInto; |
3 | |
4 | use crate::decoding::scratch::FSEScratch; |
5 | use crate::decoding::scratch::HuffmanScratch; |
6 | use crate::fse::FSETableError; |
7 | use crate::huff0::HuffmanTableError; |
8 | |
9 | /// Zstandard includes support for "raw content" dictionaries, that store bytes optionally used |
10 | /// during sequence execution. |
11 | /// |
12 | /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format> |
13 | pub struct Dictionary { |
14 | /// A 4 byte value used by decoders to check if they can use |
15 | /// the correct dictionary. This value must not be zero. |
16 | pub id: u32, |
17 | /// A dictionary can contain an entropy table, either FSE or |
18 | /// Huffman. |
19 | pub fse: FSEScratch, |
20 | /// A dictionary can contain an entropy table, either FSE or |
21 | /// Huffman. |
22 | pub huf: HuffmanScratch, |
23 | /// The content of a dictionary acts as a "past" in front of data |
24 | /// to compress or decompress, |
25 | /// so it can be referenced in sequence commands. |
26 | /// As long as the amount of data decoded from this frame is less than or |
27 | /// equal to Window_Size, sequence commands may specify offsets longer than |
28 | /// the total length of decoded output so far to reference back to the |
29 | /// dictionary, even parts of the dictionary with offsets larger than Window_Size. |
30 | /// After the total output has surpassed Window_Size however, |
31 | /// this is no longer allowed and the dictionary is no longer accessible |
32 | pub dict_content: Vec<u8>, |
33 | /// The 3 most recent offsets are stored so that they can be used |
34 | /// during sequence execution, see |
35 | /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets> |
36 | /// for more. |
37 | pub offset_hist: [u32; 3], |
38 | } |
39 | |
40 | #[derive (Debug)] |
41 | #[non_exhaustive ] |
42 | pub enum DictionaryDecodeError { |
43 | BadMagicNum { got: [u8; 4] }, |
44 | FSETableError(FSETableError), |
45 | HuffmanTableError(HuffmanTableError), |
46 | } |
47 | |
48 | #[cfg (feature = "std" )] |
49 | impl std::error::Error for DictionaryDecodeError { |
50 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
51 | match self { |
52 | DictionaryDecodeError::FSETableError(source: &FSETableError) => Some(source), |
53 | DictionaryDecodeError::HuffmanTableError(source: &HuffmanTableError) => Some(source), |
54 | _ => None, |
55 | } |
56 | } |
57 | } |
58 | |
59 | impl core::fmt::Display for DictionaryDecodeError { |
60 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
61 | match self { |
62 | DictionaryDecodeError::BadMagicNum { got: &[u8; 4] } => { |
63 | write!( |
64 | f, |
65 | "Bad magic_num at start of the dictionary; Got: {:#04X?}, Expected: {:#04x?}" , |
66 | got, MAGIC_NUM, |
67 | ) |
68 | } |
69 | DictionaryDecodeError::FSETableError(e: &FSETableError) => write!(f, " {:?}" , e), |
70 | DictionaryDecodeError::HuffmanTableError(e: &HuffmanTableError) => write!(f, " {:?}" , e), |
71 | } |
72 | } |
73 | } |
74 | |
75 | impl From<FSETableError> for DictionaryDecodeError { |
76 | fn from(val: FSETableError) -> Self { |
77 | Self::FSETableError(val) |
78 | } |
79 | } |
80 | |
81 | impl From<HuffmanTableError> for DictionaryDecodeError { |
82 | fn from(val: HuffmanTableError) -> Self { |
83 | Self::HuffmanTableError(val) |
84 | } |
85 | } |
86 | |
87 | /// This 4 byte (little endian) magic number refers to the start of a dictionary |
88 | pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC]; |
89 | |
90 | impl Dictionary { |
91 | /// Parses the dictionary from `raw` and set the tables |
92 | /// it returns the dict_id for checking with the frame's `dict_id`` |
93 | pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> { |
94 | let mut new_dict = Dictionary { |
95 | id: 0, |
96 | fse: FSEScratch::new(), |
97 | huf: HuffmanScratch::new(), |
98 | dict_content: Vec::new(), |
99 | offset_hist: [2, 4, 8], |
100 | }; |
101 | |
102 | let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away" ); |
103 | if magic_num != MAGIC_NUM { |
104 | return Err(DictionaryDecodeError::BadMagicNum { got: magic_num }); |
105 | } |
106 | |
107 | let dict_id = raw[4..8].try_into().expect("optimized away" ); |
108 | let dict_id = u32::from_le_bytes(dict_id); |
109 | new_dict.id = dict_id; |
110 | |
111 | let raw_tables = &raw[8..]; |
112 | |
113 | let huf_size = new_dict.huf.table.build_decoder(raw_tables)?; |
114 | let raw_tables = &raw_tables[huf_size as usize..]; |
115 | |
116 | let of_size = new_dict.fse.offsets.build_decoder( |
117 | raw_tables, |
118 | crate::decoding::sequence_section_decoder::OF_MAX_LOG, |
119 | )?; |
120 | let raw_tables = &raw_tables[of_size..]; |
121 | |
122 | let ml_size = new_dict.fse.match_lengths.build_decoder( |
123 | raw_tables, |
124 | crate::decoding::sequence_section_decoder::ML_MAX_LOG, |
125 | )?; |
126 | let raw_tables = &raw_tables[ml_size..]; |
127 | |
128 | let ll_size = new_dict.fse.literal_lengths.build_decoder( |
129 | raw_tables, |
130 | crate::decoding::sequence_section_decoder::LL_MAX_LOG, |
131 | )?; |
132 | let raw_tables = &raw_tables[ll_size..]; |
133 | |
134 | let offset1 = raw_tables[0..4].try_into().expect("optimized away" ); |
135 | let offset1 = u32::from_le_bytes(offset1); |
136 | |
137 | let offset2 = raw_tables[4..8].try_into().expect("optimized away" ); |
138 | let offset2 = u32::from_le_bytes(offset2); |
139 | |
140 | let offset3 = raw_tables[8..12].try_into().expect("optimized away" ); |
141 | let offset3 = u32::from_le_bytes(offset3); |
142 | |
143 | new_dict.offset_hist[0] = offset1; |
144 | new_dict.offset_hist[1] = offset2; |
145 | new_dict.offset_hist[2] = offset3; |
146 | |
147 | let raw_content = &raw_tables[12..]; |
148 | new_dict.dict_content.extend(raw_content); |
149 | |
150 | Ok(new_dict) |
151 | } |
152 | } |
153 | |