| 1 | use alloc::vec::Vec; |
| 2 | use core::convert::TryInto; |
| 3 | |
| 4 | use crate::decoding::scratch::FSEScratch; |
| 5 | use crate::decoding::scratch::HuffmanScratch; |
| 6 | use crate::fse::FSETableError; |
| 7 | use crate::huff0::HuffmanTableError; |
| 8 | |
| 9 | /// Zstandard includes support for "raw content" dictionaries, that store bytes optionally used |
| 10 | /// during sequence execution. |
| 11 | /// |
| 12 | /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format> |
| 13 | pub struct Dictionary { |
| 14 | /// A 4 byte value used by decoders to check if they can use |
| 15 | /// the correct dictionary. This value must not be zero. |
| 16 | pub id: u32, |
| 17 | /// A dictionary can contain an entropy table, either FSE or |
| 18 | /// Huffman. |
| 19 | pub fse: FSEScratch, |
| 20 | /// A dictionary can contain an entropy table, either FSE or |
| 21 | /// Huffman. |
| 22 | pub huf: HuffmanScratch, |
| 23 | /// The content of a dictionary acts as a "past" in front of data |
| 24 | /// to compress or decompress, |
| 25 | /// so it can be referenced in sequence commands. |
| 26 | /// As long as the amount of data decoded from this frame is less than or |
| 27 | /// equal to Window_Size, sequence commands may specify offsets longer than |
| 28 | /// the total length of decoded output so far to reference back to the |
| 29 | /// dictionary, even parts of the dictionary with offsets larger than Window_Size. |
| 30 | /// After the total output has surpassed Window_Size however, |
| 31 | /// this is no longer allowed and the dictionary is no longer accessible |
| 32 | pub dict_content: Vec<u8>, |
| 33 | /// The 3 most recent offsets are stored so that they can be used |
| 34 | /// during sequence execution, see |
| 35 | /// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#repeat-offsets> |
| 36 | /// for more. |
| 37 | pub offset_hist: [u32; 3], |
| 38 | } |
| 39 | |
| 40 | #[derive (Debug)] |
| 41 | #[non_exhaustive ] |
| 42 | pub enum DictionaryDecodeError { |
| 43 | BadMagicNum { got: [u8; 4] }, |
| 44 | FSETableError(FSETableError), |
| 45 | HuffmanTableError(HuffmanTableError), |
| 46 | } |
| 47 | |
| 48 | #[cfg (feature = "std" )] |
| 49 | impl std::error::Error for DictionaryDecodeError { |
| 50 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { |
| 51 | match self { |
| 52 | DictionaryDecodeError::FSETableError(source: &FSETableError) => Some(source), |
| 53 | DictionaryDecodeError::HuffmanTableError(source: &HuffmanTableError) => Some(source), |
| 54 | _ => None, |
| 55 | } |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | impl core::fmt::Display for DictionaryDecodeError { |
| 60 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { |
| 61 | match self { |
| 62 | DictionaryDecodeError::BadMagicNum { got: &[u8; 4] } => { |
| 63 | write!( |
| 64 | f, |
| 65 | "Bad magic_num at start of the dictionary; Got: {:#04X?}, Expected: {:#04x?}" , |
| 66 | got, MAGIC_NUM, |
| 67 | ) |
| 68 | } |
| 69 | DictionaryDecodeError::FSETableError(e: &FSETableError) => write!(f, " {:?}" , e), |
| 70 | DictionaryDecodeError::HuffmanTableError(e: &HuffmanTableError) => write!(f, " {:?}" , e), |
| 71 | } |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | impl From<FSETableError> for DictionaryDecodeError { |
| 76 | fn from(val: FSETableError) -> Self { |
| 77 | Self::FSETableError(val) |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | impl From<HuffmanTableError> for DictionaryDecodeError { |
| 82 | fn from(val: HuffmanTableError) -> Self { |
| 83 | Self::HuffmanTableError(val) |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | /// This 4 byte (little endian) magic number refers to the start of a dictionary |
| 88 | pub const MAGIC_NUM: [u8; 4] = [0x37, 0xA4, 0x30, 0xEC]; |
| 89 | |
| 90 | impl Dictionary { |
| 91 | /// Parses the dictionary from `raw` and set the tables |
| 92 | /// it returns the dict_id for checking with the frame's `dict_id`` |
| 93 | pub fn decode_dict(raw: &[u8]) -> Result<Dictionary, DictionaryDecodeError> { |
| 94 | let mut new_dict = Dictionary { |
| 95 | id: 0, |
| 96 | fse: FSEScratch::new(), |
| 97 | huf: HuffmanScratch::new(), |
| 98 | dict_content: Vec::new(), |
| 99 | offset_hist: [2, 4, 8], |
| 100 | }; |
| 101 | |
| 102 | let magic_num: [u8; 4] = raw[..4].try_into().expect("optimized away" ); |
| 103 | if magic_num != MAGIC_NUM { |
| 104 | return Err(DictionaryDecodeError::BadMagicNum { got: magic_num }); |
| 105 | } |
| 106 | |
| 107 | let dict_id = raw[4..8].try_into().expect("optimized away" ); |
| 108 | let dict_id = u32::from_le_bytes(dict_id); |
| 109 | new_dict.id = dict_id; |
| 110 | |
| 111 | let raw_tables = &raw[8..]; |
| 112 | |
| 113 | let huf_size = new_dict.huf.table.build_decoder(raw_tables)?; |
| 114 | let raw_tables = &raw_tables[huf_size as usize..]; |
| 115 | |
| 116 | let of_size = new_dict.fse.offsets.build_decoder( |
| 117 | raw_tables, |
| 118 | crate::decoding::sequence_section_decoder::OF_MAX_LOG, |
| 119 | )?; |
| 120 | let raw_tables = &raw_tables[of_size..]; |
| 121 | |
| 122 | let ml_size = new_dict.fse.match_lengths.build_decoder( |
| 123 | raw_tables, |
| 124 | crate::decoding::sequence_section_decoder::ML_MAX_LOG, |
| 125 | )?; |
| 126 | let raw_tables = &raw_tables[ml_size..]; |
| 127 | |
| 128 | let ll_size = new_dict.fse.literal_lengths.build_decoder( |
| 129 | raw_tables, |
| 130 | crate::decoding::sequence_section_decoder::LL_MAX_LOG, |
| 131 | )?; |
| 132 | let raw_tables = &raw_tables[ll_size..]; |
| 133 | |
| 134 | let offset1 = raw_tables[0..4].try_into().expect("optimized away" ); |
| 135 | let offset1 = u32::from_le_bytes(offset1); |
| 136 | |
| 137 | let offset2 = raw_tables[4..8].try_into().expect("optimized away" ); |
| 138 | let offset2 = u32::from_le_bytes(offset2); |
| 139 | |
| 140 | let offset3 = raw_tables[8..12].try_into().expect("optimized away" ); |
| 141 | let offset3 = u32::from_le_bytes(offset3); |
| 142 | |
| 143 | new_dict.offset_hist[0] = offset1; |
| 144 | new_dict.offset_hist[1] = offset2; |
| 145 | new_dict.offset_hist[2] = offset3; |
| 146 | |
| 147 | let raw_content = &raw_tables[12..]; |
| 148 | new_dict.dict_content.extend(raw_content); |
| 149 | |
| 150 | Ok(new_dict) |
| 151 | } |
| 152 | } |
| 153 | |