1 | /* |
2 | * Copyright (c) Yann Collet, Facebook, Inc. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | /* zstd_ddict.c : |
12 | * concentrates all logic that needs to know the internals of ZSTD_DDict object */ |
13 | |
14 | /*-******************************************************* |
15 | * Dependencies |
16 | *********************************************************/ |
17 | #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ |
18 | #include "../common/cpu.h" /* bmi2 */ |
19 | #include "../common/mem.h" /* low level memory routines */ |
20 | #define FSE_STATIC_LINKING_ONLY |
21 | #include "../common/fse.h" |
22 | #define HUF_STATIC_LINKING_ONLY |
23 | #include "../common/huf.h" |
24 | #include "zstd_decompress_internal.h" |
25 | #include "zstd_ddict.h" |
26 | |
27 | |
28 | |
29 | |
30 | /*-******************************************************* |
31 | * Types |
32 | *********************************************************/ |
33 | struct ZSTD_DDict_s { |
34 | void* dictBuffer; |
35 | const void* dictContent; |
36 | size_t dictSize; |
37 | ZSTD_entropyDTables_t entropy; |
38 | U32 dictID; |
39 | U32 entropyPresent; |
40 | ZSTD_customMem cMem; |
41 | }; /* typedef'd to ZSTD_DDict within "zstd.h" */ |
42 | |
43 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) |
44 | { |
45 | assert(ddict != NULL); |
46 | return ddict->dictContent; |
47 | } |
48 | |
49 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) |
50 | { |
51 | assert(ddict != NULL); |
52 | return ddict->dictSize; |
53 | } |
54 | |
55 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) |
56 | { |
57 | DEBUGLOG(4, "ZSTD_copyDDictParameters" ); |
58 | assert(dctx != NULL); |
59 | assert(ddict != NULL); |
60 | dctx->dictID = ddict->dictID; |
61 | dctx->prefixStart = ddict->dictContent; |
62 | dctx->virtualStart = ddict->dictContent; |
63 | dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; |
64 | dctx->previousDstEnd = dctx->dictEnd; |
65 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
66 | dctx->dictContentBeginForFuzzing = dctx->prefixStart; |
67 | dctx->dictContentEndForFuzzing = dctx->previousDstEnd; |
68 | #endif |
69 | if (ddict->entropyPresent) { |
70 | dctx->litEntropy = 1; |
71 | dctx->fseEntropy = 1; |
72 | dctx->LLTptr = ddict->entropy.LLTable; |
73 | dctx->MLTptr = ddict->entropy.MLTable; |
74 | dctx->OFTptr = ddict->entropy.OFTable; |
75 | dctx->HUFptr = ddict->entropy.hufTable; |
76 | dctx->entropy.rep[0] = ddict->entropy.rep[0]; |
77 | dctx->entropy.rep[1] = ddict->entropy.rep[1]; |
78 | dctx->entropy.rep[2] = ddict->entropy.rep[2]; |
79 | } else { |
80 | dctx->litEntropy = 0; |
81 | dctx->fseEntropy = 0; |
82 | } |
83 | } |
84 | |
85 | |
86 | static size_t |
87 | ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, |
88 | ZSTD_dictContentType_e dictContentType) |
89 | { |
90 | ddict->dictID = 0; |
91 | ddict->entropyPresent = 0; |
92 | if (dictContentType == ZSTD_dct_rawContent) return 0; |
93 | |
94 | if (ddict->dictSize < 8) { |
95 | if (dictContentType == ZSTD_dct_fullDict) |
96 | return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ |
97 | return 0; /* pure content mode */ |
98 | } |
99 | { U32 const magic = MEM_readLE32(memPtr: ddict->dictContent); |
100 | if (magic != ZSTD_MAGIC_DICTIONARY) { |
101 | if (dictContentType == ZSTD_dct_fullDict) |
102 | return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ |
103 | return 0; /* pure content mode */ |
104 | } |
105 | } |
106 | ddict->dictID = MEM_readLE32(memPtr: (const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); |
107 | |
108 | /* load entropy tables */ |
109 | RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( |
110 | &ddict->entropy, ddict->dictContent, ddict->dictSize)), |
111 | dictionary_corrupted, "" ); |
112 | ddict->entropyPresent = 1; |
113 | return 0; |
114 | } |
115 | |
116 | |
117 | static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, |
118 | const void* dict, size_t dictSize, |
119 | ZSTD_dictLoadMethod_e dictLoadMethod, |
120 | ZSTD_dictContentType_e dictContentType) |
121 | { |
122 | if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { |
123 | ddict->dictBuffer = NULL; |
124 | ddict->dictContent = dict; |
125 | if (!dict) dictSize = 0; |
126 | } else { |
127 | void* const internalBuffer = ZSTD_customMalloc(size: dictSize, customMem: ddict->cMem); |
128 | ddict->dictBuffer = internalBuffer; |
129 | ddict->dictContent = internalBuffer; |
130 | if (!internalBuffer) return ERROR(memory_allocation); |
131 | ZSTD_memcpy(internalBuffer, dict, dictSize); |
132 | } |
133 | ddict->dictSize = dictSize; |
134 | ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ |
135 | |
136 | /* parse dictionary content */ |
137 | FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "" ); |
138 | |
139 | return 0; |
140 | } |
141 | |
142 | ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, |
143 | ZSTD_dictLoadMethod_e dictLoadMethod, |
144 | ZSTD_dictContentType_e dictContentType, |
145 | ZSTD_customMem customMem) |
146 | { |
147 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; |
148 | |
149 | { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(size: sizeof(ZSTD_DDict), customMem); |
150 | if (ddict == NULL) return NULL; |
151 | ddict->cMem = customMem; |
152 | { size_t const initResult = ZSTD_initDDict_internal(ddict, |
153 | dict, dictSize, |
154 | dictLoadMethod, dictContentType); |
155 | if (ZSTD_isError(code: initResult)) { |
156 | ZSTD_freeDDict(ddict); |
157 | return NULL; |
158 | } } |
159 | return ddict; |
160 | } |
161 | } |
162 | |
163 | /*! ZSTD_createDDict() : |
164 | * Create a digested dictionary, to start decompression without startup delay. |
165 | * `dict` content is copied inside DDict. |
166 | * Consequently, `dict` can be released after `ZSTD_DDict` creation */ |
167 | ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) |
168 | { |
169 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
170 | return ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod: ZSTD_dlm_byCopy, dictContentType: ZSTD_dct_auto, customMem: allocator); |
171 | } |
172 | |
173 | /*! ZSTD_createDDict_byReference() : |
174 | * Create a digested dictionary, to start decompression without startup delay. |
175 | * Dictionary content is simply referenced, it will be accessed during decompression. |
176 | * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ |
177 | ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) |
178 | { |
179 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
180 | return ZSTD_createDDict_advanced(dict: dictBuffer, dictSize, dictLoadMethod: ZSTD_dlm_byRef, dictContentType: ZSTD_dct_auto, customMem: allocator); |
181 | } |
182 | |
183 | |
184 | const ZSTD_DDict* ZSTD_initStaticDDict( |
185 | void* sBuffer, size_t sBufferSize, |
186 | const void* dict, size_t dictSize, |
187 | ZSTD_dictLoadMethod_e dictLoadMethod, |
188 | ZSTD_dictContentType_e dictContentType) |
189 | { |
190 | size_t const neededSpace = sizeof(ZSTD_DDict) |
191 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); |
192 | ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; |
193 | assert(sBuffer != NULL); |
194 | assert(dict != NULL); |
195 | if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ |
196 | if (sBufferSize < neededSpace) return NULL; |
197 | if (dictLoadMethod == ZSTD_dlm_byCopy) { |
198 | ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ |
199 | dict = ddict+1; |
200 | } |
201 | if (ZSTD_isError( code: ZSTD_initDDict_internal(ddict, |
202 | dict, dictSize, |
203 | dictLoadMethod: ZSTD_dlm_byRef, dictContentType) )) |
204 | return NULL; |
205 | return ddict; |
206 | } |
207 | |
208 | |
209 | size_t ZSTD_freeDDict(ZSTD_DDict* ddict) |
210 | { |
211 | if (ddict==NULL) return 0; /* support free on NULL */ |
212 | { ZSTD_customMem const cMem = ddict->cMem; |
213 | ZSTD_customFree(ptr: ddict->dictBuffer, customMem: cMem); |
214 | ZSTD_customFree(ptr: ddict, customMem: cMem); |
215 | return 0; |
216 | } |
217 | } |
218 | |
219 | /*! ZSTD_estimateDDictSize() : |
220 | * Estimate amount of memory that will be needed to create a dictionary for decompression. |
221 | * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ |
222 | size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) |
223 | { |
224 | return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); |
225 | } |
226 | |
227 | size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) |
228 | { |
229 | if (ddict==NULL) return 0; /* support sizeof on NULL */ |
230 | return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; |
231 | } |
232 | |
233 | /*! ZSTD_getDictID_fromDDict() : |
234 | * Provides the dictID of the dictionary loaded into `ddict`. |
235 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. |
236 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ |
237 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) |
238 | { |
239 | if (ddict==NULL) return 0; |
240 | return ZSTD_getDictID_fromDict(dict: ddict->dictContent, dictSize: ddict->dictSize); |
241 | } |
242 | |