1use alloc::vec::Vec;
2use core::convert::TryInto;
3
4use crate::decoding::scratch::FSEScratch;
5use crate::decoding::scratch::HuffmanScratch;
6use crate::fse::FSETableError;
7use crate::huff0::HuffmanTableError;
8
9/// Zstandard includes support for "raw content" dictionaries, that store bytes optionally used
10/// during sequence execution.
11///
12/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format>
13pub struct Dictionary {
14 /// A 4 byte value used by decoders to check if they can use
15 /// the correct dictionary. This value must not be zero.
16 pub id: u32,
17 /// A dictionary can contain an entropy table, either FSE or
18 /// Huffman.
19 pub fse: FSEScratch,
20 /// A dictionary can contain an entropy table, either FSE or
21 /// Huffman.
22 pub huf: HuffmanScratch,
23 /// The content of a dictionary acts as a "past" in front of data
24 /// to compress or decompress,
25 /// so it can be referenced in sequence commands.
26 /// As long as the amount of data decoded from this frame is less than or
27 /// equal to Window_Size, sequence commands may specify offsets longer than
28 /// the total length of decoded output so far to reference back to the
29 /// dictionary, even parts of the dictionary with offsets larger than Window_Size.
30 /// After the total output has surpassed Window_Size however,
31 /// this is no longer allowed and the dictionary is no longer accessible
32 pub dict_content: Vec<u8>,
33 /// The 3 most recent offsets are stored so that they can be used
34 /// during sequence execution, see
35 /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets>
36 /// for more.
37 pub offset_hist: [u32; 3],
38}
39
40#[derive(Debug)]
41#[non_exhaustive]
42pub enum DictionaryDecodeError {
43 BadMagicNum { got: [u8; 4] },
44 FSETableError(FSETableError),
45 HuffmanTableError(HuffmanTableError),
46}
47
48#[cfg(feature = "std")]
49impl std::error::Error for DictionaryDecodeError {
50 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
51 match self {
52 DictionaryDecodeError::FSETableError(source: &FSETableError) => Some(source),
53 DictionaryDecodeError::HuffmanTableError(source: &HuffmanTableError) => Some(source),
54 _ => None,
55 }
56 }
57}
58
59impl core::fmt::Display for DictionaryDecodeError {
60 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
61 match self {
62 DictionaryDecodeError::BadMagicNum { got: &[u8; 4] } => {
63 write!(
64 f,
65 "Bad magic_num at start of the dictionary; Got: {:#04X?}, Expected: {:#04x?}",
66 got, MAGIC_NUM,
67 )
68 }
69 DictionaryDecodeError::FSETableError(e: &FSETableError) => write!(f, "{:?}", e),
70 DictionaryDecodeError::HuffmanTableError(e: &HuffmanTableError) => write!(f, "{:?}", e),
71 }
72 }
73}
74
75impl From<FSETableError> for DictionaryDecodeError {
76 fn from(val: FSETableError) -> Self {
77 Self::FSETableError(val)
78 }
79}
80
81impl From<HuffmanTableError> for DictionaryDecodeError {
82 fn from(val: HuffmanTableError) -> Self {
83 Self::HuffmanTableError(val)
84 }
85}
86
87/// This 4 byte (little endian) magic number refers to the start of a dictionary
88pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC];
89
90impl Dictionary {
91 /// Parses the dictionary from `raw` and set the tables
92 /// it returns the dict_id for checking with the frame's `dict_id``
93 pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> {
94 let mut new_dict = Dictionary {
95 id: 0,
96 fse: FSEScratch::new(),
97 huf: HuffmanScratch::new(),
98 dict_content: Vec::new(),
99 offset_hist: [2, 4, 8],
100 };
101
102 let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away");
103 if magic_num != MAGIC_NUM {
104 return Err(DictionaryDecodeError::BadMagicNum { got: magic_num });
105 }
106
107 let dict_id = raw[4..8].try_into().expect("optimized away");
108 let dict_id = u32::from_le_bytes(dict_id);
109 new_dict.id = dict_id;
110
111 let raw_tables = &raw[8..];
112
113 let huf_size = new_dict.huf.table.build_decoder(raw_tables)?;
114 let raw_tables = &raw_tables[huf_size as usize..];
115
116 let of_size = new_dict.fse.offsets.build_decoder(
117 raw_tables,
118 crate::decoding::sequence_section_decoder::OF_MAX_LOG,
119 )?;
120 let raw_tables = &raw_tables[of_size..];
121
122 let ml_size = new_dict.fse.match_lengths.build_decoder(
123 raw_tables,
124 crate::decoding::sequence_section_decoder::ML_MAX_LOG,
125 )?;
126 let raw_tables = &raw_tables[ml_size..];
127
128 let ll_size = new_dict.fse.literal_lengths.build_decoder(
129 raw_tables,
130 crate::decoding::sequence_section_decoder::LL_MAX_LOG,
131 )?;
132 let raw_tables = &raw_tables[ll_size..];
133
134 let offset1 = raw_tables[0..4].try_into().expect("optimized away");
135 let offset1 = u32::from_le_bytes(offset1);
136
137 let offset2 = raw_tables[4..8].try_into().expect("optimized away");
138 let offset2 = u32::from_le_bytes(offset2);
139
140 let offset3 = raw_tables[8..12].try_into().expect("optimized away");
141 let offset3 = u32::from_le_bytes(offset3);
142
143 new_dict.offset_hist[0] = offset1;
144 new_dict.offset_hist[1] = offset2;
145 new_dict.offset_hist[2] = offset3;
146
147 let raw_content = &raw_tables[12..];
148 new_dict.dict_content.extend(raw_content);
149
150 Ok(new_dict)
151 }
152}
153