1 | /* |
2 | * Copyright (c) Yann Collet, Facebook, Inc. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | /*-************************************* |
12 | * Dependencies |
13 | ***************************************/ |
14 | #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ |
15 | #include "../common/mem.h" |
16 | #include "hist.h" /* HIST_countFast_wksp */ |
17 | #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ |
18 | #include "../common/fse.h" |
19 | #define HUF_STATIC_LINKING_ONLY |
20 | #include "../common/huf.h" |
21 | #include "zstd_compress_internal.h" |
22 | #include "zstd_compress_sequences.h" |
23 | #include "zstd_compress_literals.h" |
24 | #include "zstd_fast.h" |
25 | #include "zstd_double_fast.h" |
26 | #include "zstd_lazy.h" |
27 | #include "zstd_opt.h" |
28 | #include "zstd_ldm.h" |
29 | #include "zstd_compress_superblock.h" |
30 | |
31 | /* *************************************************************** |
32 | * Tuning parameters |
33 | *****************************************************************/ |
34 | /*! |
35 | * COMPRESS_HEAPMODE : |
36 | * Select how default decompression function ZSTD_compress() allocates its context, |
37 | * on stack (0, default), or into heap (1). |
38 | * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. |
39 | */ |
40 | |
41 | /*! |
42 | * ZSTD_HASHLOG3_MAX : |
43 | * Maximum size of the hash table dedicated to find 3-bytes matches, |
44 | * in log format, aka 17 => 1 << 17 == 128Ki positions. |
45 | * This structure is only used in zstd_opt. |
46 | * Since allocation is centralized for all strategies, it has to be known here. |
47 | * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, |
48 | * so that zstd_opt.c doesn't need to know about this constant. |
49 | */ |
50 | #ifndef ZSTD_HASHLOG3_MAX |
51 | # define ZSTD_HASHLOG3_MAX 17 |
52 | #endif |
53 | |
54 | /*-************************************* |
55 | * Helper functions |
56 | ***************************************/ |
57 | /* ZSTD_compressBound() |
58 | * Note that the result from this function is only compatible with the "normal" |
59 | * full-block strategy. |
60 | * When there are a lot of small blocks due to frequent flush in streaming mode |
61 | * the overhead of headers can make the compressed data to be larger than the |
62 | * return value of ZSTD_compressBound(). |
63 | */ |
64 | size_t ZSTD_compressBound(size_t srcSize) { |
65 | return ZSTD_COMPRESSBOUND(srcSize); |
66 | } |
67 | |
68 | |
69 | /*-************************************* |
70 | * Context memory management |
71 | ***************************************/ |
72 | struct ZSTD_CDict_s { |
73 | const void* dictContent; |
74 | size_t dictContentSize; |
75 | ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ |
76 | U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ |
77 | ZSTD_cwksp workspace; |
78 | ZSTD_matchState_t matchState; |
79 | ZSTD_compressedBlockState_t cBlockState; |
80 | ZSTD_customMem customMem; |
81 | U32 dictID; |
82 | int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ |
83 | ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use |
84 | * row-based matchfinder. Unless the cdict is reloaded, we will use |
85 | * the same greedy/lazy matchfinder at compression time. |
86 | */ |
87 | }; /* typedef'd to ZSTD_CDict within "zstd.h" */ |
88 | |
89 | ZSTD_CCtx* ZSTD_createCCtx(void) |
90 | { |
91 | return ZSTD_createCCtx_advanced(customMem: ZSTD_defaultCMem); |
92 | } |
93 | |
94 | static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) |
95 | { |
96 | assert(cctx != NULL); |
97 | ZSTD_memset(cctx, 0, sizeof(*cctx)); |
98 | cctx->customMem = memManager; |
99 | cctx->bmi2 = ZSTD_cpuSupportsBmi2(); |
100 | { size_t const err = ZSTD_CCtx_reset(cctx, reset: ZSTD_reset_parameters); |
101 | assert(!ZSTD_isError(err)); |
102 | (void)err; |
103 | } |
104 | } |
105 | |
106 | ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) |
107 | { |
108 | ZSTD_STATIC_ASSERT(zcss_init==0); |
109 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); |
110 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; |
111 | { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(size: sizeof(ZSTD_CCtx), customMem); |
112 | if (!cctx) return NULL; |
113 | ZSTD_initCCtx(cctx, memManager: customMem); |
114 | return cctx; |
115 | } |
116 | } |
117 | |
118 | ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) |
119 | { |
120 | ZSTD_cwksp ws; |
121 | ZSTD_CCtx* cctx; |
122 | if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ |
123 | if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ |
124 | ZSTD_cwksp_init(ws: &ws, start: workspace, size: workspaceSize, isStatic: ZSTD_cwksp_static_alloc); |
125 | |
126 | cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(ws: &ws, bytes: sizeof(ZSTD_CCtx)); |
127 | if (cctx == NULL) return NULL; |
128 | |
129 | ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); |
130 | ZSTD_cwksp_move(dst: &cctx->workspace, src: &ws); |
131 | cctx->staticSize = workspaceSize; |
132 | |
133 | /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ |
134 | if (!ZSTD_cwksp_check_available(ws: &cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; |
135 | cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(ws: &cctx->workspace, bytes: sizeof(ZSTD_compressedBlockState_t)); |
136 | cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(ws: &cctx->workspace, bytes: sizeof(ZSTD_compressedBlockState_t)); |
137 | cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(ws: &cctx->workspace, ENTROPY_WORKSPACE_SIZE); |
138 | cctx->bmi2 = ZSTD_cpuid_bmi2(cpuid: ZSTD_cpuid()); |
139 | return cctx; |
140 | } |
141 | |
142 | /* |
143 | * Clears and frees all of the dictionaries in the CCtx. |
144 | */ |
145 | static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) |
146 | { |
147 | ZSTD_customFree(ptr: cctx->localDict.dictBuffer, customMem: cctx->customMem); |
148 | ZSTD_freeCDict(CDict: cctx->localDict.cdict); |
149 | ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); |
150 | ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); |
151 | cctx->cdict = NULL; |
152 | } |
153 | |
154 | static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) |
155 | { |
156 | size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; |
157 | size_t const cdictSize = ZSTD_sizeof_CDict(cdict: dict.cdict); |
158 | return bufferSize + cdictSize; |
159 | } |
160 | |
161 | static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) |
162 | { |
163 | assert(cctx != NULL); |
164 | assert(cctx->staticSize == 0); |
165 | ZSTD_clearAllDicts(cctx); |
166 | ZSTD_cwksp_free(ws: &cctx->workspace, customMem: cctx->customMem); |
167 | } |
168 | |
169 | size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) |
170 | { |
171 | if (cctx==NULL) return 0; /* support free on NULL */ |
172 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, |
173 | "not compatible with static CCtx" ); |
174 | { |
175 | int cctxInWorkspace = ZSTD_cwksp_owns_buffer(ws: &cctx->workspace, ptr: cctx); |
176 | ZSTD_freeCCtxContent(cctx); |
177 | if (!cctxInWorkspace) { |
178 | ZSTD_customFree(ptr: cctx, customMem: cctx->customMem); |
179 | } |
180 | } |
181 | return 0; |
182 | } |
183 | |
184 | |
185 | static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) |
186 | { |
187 | (void)cctx; |
188 | return 0; |
189 | } |
190 | |
191 | |
192 | size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) |
193 | { |
194 | if (cctx==NULL) return 0; /* support sizeof on NULL */ |
195 | /* cctx may be in the workspace */ |
196 | return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) |
197 | + ZSTD_cwksp_sizeof(ws: &cctx->workspace) |
198 | + ZSTD_sizeof_localDict(dict: cctx->localDict) |
199 | + ZSTD_sizeof_mtctx(cctx); |
200 | } |
201 | |
202 | size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) |
203 | { |
204 | return ZSTD_sizeof_CCtx(cctx: zcs); /* same object */ |
205 | } |
206 | |
207 | /* private API call, for dictBuilder only */ |
208 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } |
209 | |
210 | /* Returns true if the strategy supports using a row based matchfinder */ |
211 | static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { |
212 | return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); |
213 | } |
214 | |
215 | /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder |
216 | * for this compression. |
217 | */ |
218 | static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { |
219 | assert(mode != ZSTD_ps_auto); |
220 | return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); |
221 | } |
222 | |
223 | /* Returns row matchfinder usage given an initial mode and cParams */ |
224 | static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, |
225 | const ZSTD_compressionParameters* const cParams) { |
226 | #if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) |
227 | int const kHasSIMD128 = 1; |
228 | #else |
229 | int const kHasSIMD128 = 0; |
230 | #endif |
231 | if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ |
232 | mode = ZSTD_ps_disable; |
233 | if (!ZSTD_rowMatchFinderSupported(strategy: cParams->strategy)) return mode; |
234 | if (kHasSIMD128) { |
235 | if (cParams->windowLog > 14) mode = ZSTD_ps_enable; |
236 | } else { |
237 | if (cParams->windowLog > 17) mode = ZSTD_ps_enable; |
238 | } |
239 | return mode; |
240 | } |
241 | |
242 | /* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ |
243 | static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, |
244 | const ZSTD_compressionParameters* const cParams) { |
245 | if (mode != ZSTD_ps_auto) return mode; |
246 | return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; |
247 | } |
248 | |
249 | /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ |
250 | static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, |
251 | const ZSTD_paramSwitch_e useRowMatchFinder, |
252 | const U32 forDDSDict) { |
253 | assert(useRowMatchFinder != ZSTD_ps_auto); |
254 | /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. |
255 | * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. |
256 | */ |
257 | return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, mode: useRowMatchFinder)); |
258 | } |
259 | |
260 | /* Returns 1 if compression parameters are such that we should |
261 | * enable long distance matching (wlog >= 27, strategy >= btopt). |
262 | * Returns 0 otherwise. |
263 | */ |
264 | static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, |
265 | const ZSTD_compressionParameters* const cParams) { |
266 | if (mode != ZSTD_ps_auto) return mode; |
267 | return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; |
268 | } |
269 | |
270 | static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( |
271 | ZSTD_compressionParameters cParams) |
272 | { |
273 | ZSTD_CCtx_params cctxParams; |
274 | /* should not matter, as all cParams are presumed properly defined */ |
275 | ZSTD_CCtxParams_init(cctxParams: &cctxParams, ZSTD_CLEVEL_DEFAULT); |
276 | cctxParams.cParams = cParams; |
277 | |
278 | /* Adjust advanced params according to cParams */ |
279 | cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(mode: cctxParams.ldmParams.enableLdm, cParams: &cParams); |
280 | if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) { |
281 | ZSTD_ldm_adjustParameters(params: &cctxParams.ldmParams, cParams: &cParams); |
282 | assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); |
283 | assert(cctxParams.ldmParams.hashRateLog < 32); |
284 | } |
285 | cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(mode: cctxParams.useBlockSplitter, cParams: &cParams); |
286 | cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: cctxParams.useRowMatchFinder, cParams: &cParams); |
287 | assert(!ZSTD_checkCParams(cParams)); |
288 | return cctxParams; |
289 | } |
290 | |
291 | static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( |
292 | ZSTD_customMem customMem) |
293 | { |
294 | ZSTD_CCtx_params* params; |
295 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; |
296 | params = (ZSTD_CCtx_params*)ZSTD_customCalloc( |
297 | size: sizeof(ZSTD_CCtx_params), customMem); |
298 | if (!params) { return NULL; } |
299 | ZSTD_CCtxParams_init(cctxParams: params, ZSTD_CLEVEL_DEFAULT); |
300 | params->customMem = customMem; |
301 | return params; |
302 | } |
303 | |
304 | ZSTD_CCtx_params* ZSTD_createCCtxParams(void) |
305 | { |
306 | return ZSTD_createCCtxParams_advanced(customMem: ZSTD_defaultCMem); |
307 | } |
308 | |
309 | size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) |
310 | { |
311 | if (params == NULL) { return 0; } |
312 | ZSTD_customFree(ptr: params, customMem: params->customMem); |
313 | return 0; |
314 | } |
315 | |
316 | size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) |
317 | { |
318 | return ZSTD_CCtxParams_init(cctxParams: params, ZSTD_CLEVEL_DEFAULT); |
319 | } |
320 | |
321 | size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { |
322 | RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!" ); |
323 | ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); |
324 | cctxParams->compressionLevel = compressionLevel; |
325 | cctxParams->fParams.contentSizeFlag = 1; |
326 | return 0; |
327 | } |
328 | |
329 | #define ZSTD_NO_CLEVEL 0 |
330 | |
331 | /* |
332 | * Initializes the cctxParams from params and compressionLevel. |
333 | * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. |
334 | */ |
335 | static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) |
336 | { |
337 | assert(!ZSTD_checkCParams(params->cParams)); |
338 | ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); |
339 | cctxParams->cParams = params->cParams; |
340 | cctxParams->fParams = params->fParams; |
341 | /* Should not matter, as all cParams are presumed properly defined. |
342 | * But, set it for tracing anyway. |
343 | */ |
344 | cctxParams->compressionLevel = compressionLevel; |
345 | cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: cctxParams->useRowMatchFinder, cParams: ¶ms->cParams); |
346 | cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(mode: cctxParams->useBlockSplitter, cParams: ¶ms->cParams); |
347 | cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(mode: cctxParams->ldmParams.enableLdm, cParams: ¶ms->cParams); |
348 | DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d" , |
349 | cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); |
350 | } |
351 | |
352 | size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) |
353 | { |
354 | RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!" ); |
355 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "" ); |
356 | ZSTD_CCtxParams_init_internal(cctxParams, params: ¶ms, ZSTD_NO_CLEVEL); |
357 | return 0; |
358 | } |
359 | |
360 | /* |
361 | * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. |
362 | * @param param Validated zstd parameters. |
363 | */ |
364 | static void ZSTD_CCtxParams_setZstdParams( |
365 | ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) |
366 | { |
367 | assert(!ZSTD_checkCParams(params->cParams)); |
368 | cctxParams->cParams = params->cParams; |
369 | cctxParams->fParams = params->fParams; |
370 | /* Should not matter, as all cParams are presumed properly defined. |
371 | * But, set it for tracing anyway. |
372 | */ |
373 | cctxParams->compressionLevel = ZSTD_NO_CLEVEL; |
374 | } |
375 | |
376 | ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) |
377 | { |
378 | ZSTD_bounds bounds = { 0, 0, 0 }; |
379 | |
380 | switch(param) |
381 | { |
382 | case ZSTD_c_compressionLevel: |
383 | bounds.lowerBound = ZSTD_minCLevel(); |
384 | bounds.upperBound = ZSTD_maxCLevel(); |
385 | return bounds; |
386 | |
387 | case ZSTD_c_windowLog: |
388 | bounds.lowerBound = ZSTD_WINDOWLOG_MIN; |
389 | bounds.upperBound = ZSTD_WINDOWLOG_MAX; |
390 | return bounds; |
391 | |
392 | case ZSTD_c_hashLog: |
393 | bounds.lowerBound = ZSTD_HASHLOG_MIN; |
394 | bounds.upperBound = ZSTD_HASHLOG_MAX; |
395 | return bounds; |
396 | |
397 | case ZSTD_c_chainLog: |
398 | bounds.lowerBound = ZSTD_CHAINLOG_MIN; |
399 | bounds.upperBound = ZSTD_CHAINLOG_MAX; |
400 | return bounds; |
401 | |
402 | case ZSTD_c_searchLog: |
403 | bounds.lowerBound = ZSTD_SEARCHLOG_MIN; |
404 | bounds.upperBound = ZSTD_SEARCHLOG_MAX; |
405 | return bounds; |
406 | |
407 | case ZSTD_c_minMatch: |
408 | bounds.lowerBound = ZSTD_MINMATCH_MIN; |
409 | bounds.upperBound = ZSTD_MINMATCH_MAX; |
410 | return bounds; |
411 | |
412 | case ZSTD_c_targetLength: |
413 | bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; |
414 | bounds.upperBound = ZSTD_TARGETLENGTH_MAX; |
415 | return bounds; |
416 | |
417 | case ZSTD_c_strategy: |
418 | bounds.lowerBound = ZSTD_STRATEGY_MIN; |
419 | bounds.upperBound = ZSTD_STRATEGY_MAX; |
420 | return bounds; |
421 | |
422 | case ZSTD_c_contentSizeFlag: |
423 | bounds.lowerBound = 0; |
424 | bounds.upperBound = 1; |
425 | return bounds; |
426 | |
427 | case ZSTD_c_checksumFlag: |
428 | bounds.lowerBound = 0; |
429 | bounds.upperBound = 1; |
430 | return bounds; |
431 | |
432 | case ZSTD_c_dictIDFlag: |
433 | bounds.lowerBound = 0; |
434 | bounds.upperBound = 1; |
435 | return bounds; |
436 | |
437 | case ZSTD_c_nbWorkers: |
438 | bounds.lowerBound = 0; |
439 | bounds.upperBound = 0; |
440 | return bounds; |
441 | |
442 | case ZSTD_c_jobSize: |
443 | bounds.lowerBound = 0; |
444 | bounds.upperBound = 0; |
445 | return bounds; |
446 | |
447 | case ZSTD_c_overlapLog: |
448 | bounds.lowerBound = 0; |
449 | bounds.upperBound = 0; |
450 | return bounds; |
451 | |
452 | case ZSTD_c_enableDedicatedDictSearch: |
453 | bounds.lowerBound = 0; |
454 | bounds.upperBound = 1; |
455 | return bounds; |
456 | |
457 | case ZSTD_c_enableLongDistanceMatching: |
458 | bounds.lowerBound = 0; |
459 | bounds.upperBound = 1; |
460 | return bounds; |
461 | |
462 | case ZSTD_c_ldmHashLog: |
463 | bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; |
464 | bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; |
465 | return bounds; |
466 | |
467 | case ZSTD_c_ldmMinMatch: |
468 | bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; |
469 | bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; |
470 | return bounds; |
471 | |
472 | case ZSTD_c_ldmBucketSizeLog: |
473 | bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; |
474 | bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; |
475 | return bounds; |
476 | |
477 | case ZSTD_c_ldmHashRateLog: |
478 | bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; |
479 | bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; |
480 | return bounds; |
481 | |
482 | /* experimental parameters */ |
483 | case ZSTD_c_rsyncable: |
484 | bounds.lowerBound = 0; |
485 | bounds.upperBound = 1; |
486 | return bounds; |
487 | |
488 | case ZSTD_c_forceMaxWindow : |
489 | bounds.lowerBound = 0; |
490 | bounds.upperBound = 1; |
491 | return bounds; |
492 | |
493 | case ZSTD_c_format: |
494 | ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); |
495 | bounds.lowerBound = ZSTD_f_zstd1; |
496 | bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ |
497 | return bounds; |
498 | |
499 | case ZSTD_c_forceAttachDict: |
500 | ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); |
501 | bounds.lowerBound = ZSTD_dictDefaultAttach; |
502 | bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ |
503 | return bounds; |
504 | |
505 | case ZSTD_c_literalCompressionMode: |
506 | ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable); |
507 | bounds.lowerBound = (int)ZSTD_ps_auto; |
508 | bounds.upperBound = (int)ZSTD_ps_disable; |
509 | return bounds; |
510 | |
511 | case ZSTD_c_targetCBlockSize: |
512 | bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; |
513 | bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; |
514 | return bounds; |
515 | |
516 | case ZSTD_c_srcSizeHint: |
517 | bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; |
518 | bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; |
519 | return bounds; |
520 | |
521 | case ZSTD_c_stableInBuffer: |
522 | case ZSTD_c_stableOutBuffer: |
523 | bounds.lowerBound = (int)ZSTD_bm_buffered; |
524 | bounds.upperBound = (int)ZSTD_bm_stable; |
525 | return bounds; |
526 | |
527 | case ZSTD_c_blockDelimiters: |
528 | bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; |
529 | bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; |
530 | return bounds; |
531 | |
532 | case ZSTD_c_validateSequences: |
533 | bounds.lowerBound = 0; |
534 | bounds.upperBound = 1; |
535 | return bounds; |
536 | |
537 | case ZSTD_c_useBlockSplitter: |
538 | bounds.lowerBound = (int)ZSTD_ps_auto; |
539 | bounds.upperBound = (int)ZSTD_ps_disable; |
540 | return bounds; |
541 | |
542 | case ZSTD_c_useRowMatchFinder: |
543 | bounds.lowerBound = (int)ZSTD_ps_auto; |
544 | bounds.upperBound = (int)ZSTD_ps_disable; |
545 | return bounds; |
546 | |
547 | case ZSTD_c_deterministicRefPrefix: |
548 | bounds.lowerBound = 0; |
549 | bounds.upperBound = 1; |
550 | return bounds; |
551 | |
552 | default: |
553 | bounds.error = ERROR(parameter_unsupported); |
554 | return bounds; |
555 | } |
556 | } |
557 | |
558 | /* ZSTD_cParam_clampBounds: |
559 | * Clamps the value into the bounded range. |
560 | */ |
561 | static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) |
562 | { |
563 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(param: cParam); |
564 | if (ZSTD_isError(code: bounds.error)) return bounds.error; |
565 | if (*value < bounds.lowerBound) *value = bounds.lowerBound; |
566 | if (*value > bounds.upperBound) *value = bounds.upperBound; |
567 | return 0; |
568 | } |
569 | |
570 | #define BOUNDCHECK(cParam, val) { \ |
571 | RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ |
572 | parameter_outOfBound, "Param out of bounds"); \ |
573 | } |
574 | |
575 | |
576 | static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) |
577 | { |
578 | switch(param) |
579 | { |
580 | case ZSTD_c_compressionLevel: |
581 | case ZSTD_c_hashLog: |
582 | case ZSTD_c_chainLog: |
583 | case ZSTD_c_searchLog: |
584 | case ZSTD_c_minMatch: |
585 | case ZSTD_c_targetLength: |
586 | case ZSTD_c_strategy: |
587 | return 1; |
588 | |
589 | case ZSTD_c_format: |
590 | case ZSTD_c_windowLog: |
591 | case ZSTD_c_contentSizeFlag: |
592 | case ZSTD_c_checksumFlag: |
593 | case ZSTD_c_dictIDFlag: |
594 | case ZSTD_c_forceMaxWindow : |
595 | case ZSTD_c_nbWorkers: |
596 | case ZSTD_c_jobSize: |
597 | case ZSTD_c_overlapLog: |
598 | case ZSTD_c_rsyncable: |
599 | case ZSTD_c_enableDedicatedDictSearch: |
600 | case ZSTD_c_enableLongDistanceMatching: |
601 | case ZSTD_c_ldmHashLog: |
602 | case ZSTD_c_ldmMinMatch: |
603 | case ZSTD_c_ldmBucketSizeLog: |
604 | case ZSTD_c_ldmHashRateLog: |
605 | case ZSTD_c_forceAttachDict: |
606 | case ZSTD_c_literalCompressionMode: |
607 | case ZSTD_c_targetCBlockSize: |
608 | case ZSTD_c_srcSizeHint: |
609 | case ZSTD_c_stableInBuffer: |
610 | case ZSTD_c_stableOutBuffer: |
611 | case ZSTD_c_blockDelimiters: |
612 | case ZSTD_c_validateSequences: |
613 | case ZSTD_c_useBlockSplitter: |
614 | case ZSTD_c_useRowMatchFinder: |
615 | case ZSTD_c_deterministicRefPrefix: |
616 | default: |
617 | return 0; |
618 | } |
619 | } |
620 | |
621 | size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) |
622 | { |
623 | DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)" , (int)param, value); |
624 | if (cctx->streamStage != zcss_init) { |
625 | if (ZSTD_isUpdateAuthorized(param)) { |
626 | cctx->cParamsChanged = 1; |
627 | } else { |
628 | RETURN_ERROR(stage_wrong, "can only set params in ctx init stage" ); |
629 | } } |
630 | |
631 | switch(param) |
632 | { |
633 | case ZSTD_c_nbWorkers: |
634 | RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, |
635 | "MT not compatible with static alloc" ); |
636 | break; |
637 | |
638 | case ZSTD_c_compressionLevel: |
639 | case ZSTD_c_windowLog: |
640 | case ZSTD_c_hashLog: |
641 | case ZSTD_c_chainLog: |
642 | case ZSTD_c_searchLog: |
643 | case ZSTD_c_minMatch: |
644 | case ZSTD_c_targetLength: |
645 | case ZSTD_c_strategy: |
646 | case ZSTD_c_ldmHashRateLog: |
647 | case ZSTD_c_format: |
648 | case ZSTD_c_contentSizeFlag: |
649 | case ZSTD_c_checksumFlag: |
650 | case ZSTD_c_dictIDFlag: |
651 | case ZSTD_c_forceMaxWindow: |
652 | case ZSTD_c_forceAttachDict: |
653 | case ZSTD_c_literalCompressionMode: |
654 | case ZSTD_c_jobSize: |
655 | case ZSTD_c_overlapLog: |
656 | case ZSTD_c_rsyncable: |
657 | case ZSTD_c_enableDedicatedDictSearch: |
658 | case ZSTD_c_enableLongDistanceMatching: |
659 | case ZSTD_c_ldmHashLog: |
660 | case ZSTD_c_ldmMinMatch: |
661 | case ZSTD_c_ldmBucketSizeLog: |
662 | case ZSTD_c_targetCBlockSize: |
663 | case ZSTD_c_srcSizeHint: |
664 | case ZSTD_c_stableInBuffer: |
665 | case ZSTD_c_stableOutBuffer: |
666 | case ZSTD_c_blockDelimiters: |
667 | case ZSTD_c_validateSequences: |
668 | case ZSTD_c_useBlockSplitter: |
669 | case ZSTD_c_useRowMatchFinder: |
670 | case ZSTD_c_deterministicRefPrefix: |
671 | break; |
672 | |
673 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter" ); |
674 | } |
675 | return ZSTD_CCtxParams_setParameter(params: &cctx->requestedParams, param, value); |
676 | } |
677 | |
678 | size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, |
679 | ZSTD_cParameter param, int value) |
680 | { |
681 | DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)" , (int)param, value); |
682 | switch(param) |
683 | { |
684 | case ZSTD_c_format : |
685 | BOUNDCHECK(ZSTD_c_format, value); |
686 | CCtxParams->format = (ZSTD_format_e)value; |
687 | return (size_t)CCtxParams->format; |
688 | |
689 | case ZSTD_c_compressionLevel : { |
690 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "" ); |
691 | if (value == 0) |
692 | CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ |
693 | else |
694 | CCtxParams->compressionLevel = value; |
695 | if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; |
696 | return 0; /* return type (size_t) cannot represent negative values */ |
697 | } |
698 | |
699 | case ZSTD_c_windowLog : |
700 | if (value!=0) /* 0 => use default */ |
701 | BOUNDCHECK(ZSTD_c_windowLog, value); |
702 | CCtxParams->cParams.windowLog = (U32)value; |
703 | return CCtxParams->cParams.windowLog; |
704 | |
705 | case ZSTD_c_hashLog : |
706 | if (value!=0) /* 0 => use default */ |
707 | BOUNDCHECK(ZSTD_c_hashLog, value); |
708 | CCtxParams->cParams.hashLog = (U32)value; |
709 | return CCtxParams->cParams.hashLog; |
710 | |
711 | case ZSTD_c_chainLog : |
712 | if (value!=0) /* 0 => use default */ |
713 | BOUNDCHECK(ZSTD_c_chainLog, value); |
714 | CCtxParams->cParams.chainLog = (U32)value; |
715 | return CCtxParams->cParams.chainLog; |
716 | |
717 | case ZSTD_c_searchLog : |
718 | if (value!=0) /* 0 => use default */ |
719 | BOUNDCHECK(ZSTD_c_searchLog, value); |
720 | CCtxParams->cParams.searchLog = (U32)value; |
721 | return (size_t)value; |
722 | |
723 | case ZSTD_c_minMatch : |
724 | if (value!=0) /* 0 => use default */ |
725 | BOUNDCHECK(ZSTD_c_minMatch, value); |
726 | CCtxParams->cParams.minMatch = value; |
727 | return CCtxParams->cParams.minMatch; |
728 | |
729 | case ZSTD_c_targetLength : |
730 | BOUNDCHECK(ZSTD_c_targetLength, value); |
731 | CCtxParams->cParams.targetLength = value; |
732 | return CCtxParams->cParams.targetLength; |
733 | |
734 | case ZSTD_c_strategy : |
735 | if (value!=0) /* 0 => use default */ |
736 | BOUNDCHECK(ZSTD_c_strategy, value); |
737 | CCtxParams->cParams.strategy = (ZSTD_strategy)value; |
738 | return (size_t)CCtxParams->cParams.strategy; |
739 | |
740 | case ZSTD_c_contentSizeFlag : |
741 | /* Content size written in frame header _when known_ (default:1) */ |
742 | DEBUGLOG(4, "set content size flag = %u" , (value!=0)); |
743 | CCtxParams->fParams.contentSizeFlag = value != 0; |
744 | return CCtxParams->fParams.contentSizeFlag; |
745 | |
746 | case ZSTD_c_checksumFlag : |
747 | /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ |
748 | CCtxParams->fParams.checksumFlag = value != 0; |
749 | return CCtxParams->fParams.checksumFlag; |
750 | |
751 | case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ |
752 | DEBUGLOG(4, "set dictIDFlag = %u" , (value!=0)); |
753 | CCtxParams->fParams.noDictIDFlag = !value; |
754 | return !CCtxParams->fParams.noDictIDFlag; |
755 | |
756 | case ZSTD_c_forceMaxWindow : |
757 | CCtxParams->forceWindow = (value != 0); |
758 | return CCtxParams->forceWindow; |
759 | |
760 | case ZSTD_c_forceAttachDict : { |
761 | const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; |
762 | BOUNDCHECK(ZSTD_c_forceAttachDict, pref); |
763 | CCtxParams->attachDictPref = pref; |
764 | return CCtxParams->attachDictPref; |
765 | } |
766 | |
767 | case ZSTD_c_literalCompressionMode : { |
768 | const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; |
769 | BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); |
770 | CCtxParams->literalCompressionMode = lcm; |
771 | return CCtxParams->literalCompressionMode; |
772 | } |
773 | |
774 | case ZSTD_c_nbWorkers : |
775 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading" ); |
776 | return 0; |
777 | |
778 | case ZSTD_c_jobSize : |
779 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading" ); |
780 | return 0; |
781 | |
782 | case ZSTD_c_overlapLog : |
783 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading" ); |
784 | return 0; |
785 | |
786 | case ZSTD_c_rsyncable : |
787 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading" ); |
788 | return 0; |
789 | |
790 | case ZSTD_c_enableDedicatedDictSearch : |
791 | CCtxParams->enableDedicatedDictSearch = (value!=0); |
792 | return CCtxParams->enableDedicatedDictSearch; |
793 | |
794 | case ZSTD_c_enableLongDistanceMatching : |
795 | CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; |
796 | return CCtxParams->ldmParams.enableLdm; |
797 | |
798 | case ZSTD_c_ldmHashLog : |
799 | if (value!=0) /* 0 ==> auto */ |
800 | BOUNDCHECK(ZSTD_c_ldmHashLog, value); |
801 | CCtxParams->ldmParams.hashLog = value; |
802 | return CCtxParams->ldmParams.hashLog; |
803 | |
804 | case ZSTD_c_ldmMinMatch : |
805 | if (value!=0) /* 0 ==> default */ |
806 | BOUNDCHECK(ZSTD_c_ldmMinMatch, value); |
807 | CCtxParams->ldmParams.minMatchLength = value; |
808 | return CCtxParams->ldmParams.minMatchLength; |
809 | |
810 | case ZSTD_c_ldmBucketSizeLog : |
811 | if (value!=0) /* 0 ==> default */ |
812 | BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); |
813 | CCtxParams->ldmParams.bucketSizeLog = value; |
814 | return CCtxParams->ldmParams.bucketSizeLog; |
815 | |
816 | case ZSTD_c_ldmHashRateLog : |
817 | if (value!=0) /* 0 ==> default */ |
818 | BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); |
819 | CCtxParams->ldmParams.hashRateLog = value; |
820 | return CCtxParams->ldmParams.hashRateLog; |
821 | |
822 | case ZSTD_c_targetCBlockSize : |
823 | if (value!=0) /* 0 ==> default */ |
824 | BOUNDCHECK(ZSTD_c_targetCBlockSize, value); |
825 | CCtxParams->targetCBlockSize = value; |
826 | return CCtxParams->targetCBlockSize; |
827 | |
828 | case ZSTD_c_srcSizeHint : |
829 | if (value!=0) /* 0 ==> default */ |
830 | BOUNDCHECK(ZSTD_c_srcSizeHint, value); |
831 | CCtxParams->srcSizeHint = value; |
832 | return CCtxParams->srcSizeHint; |
833 | |
834 | case ZSTD_c_stableInBuffer: |
835 | BOUNDCHECK(ZSTD_c_stableInBuffer, value); |
836 | CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; |
837 | return CCtxParams->inBufferMode; |
838 | |
839 | case ZSTD_c_stableOutBuffer: |
840 | BOUNDCHECK(ZSTD_c_stableOutBuffer, value); |
841 | CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; |
842 | return CCtxParams->outBufferMode; |
843 | |
844 | case ZSTD_c_blockDelimiters: |
845 | BOUNDCHECK(ZSTD_c_blockDelimiters, value); |
846 | CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; |
847 | return CCtxParams->blockDelimiters; |
848 | |
849 | case ZSTD_c_validateSequences: |
850 | BOUNDCHECK(ZSTD_c_validateSequences, value); |
851 | CCtxParams->validateSequences = value; |
852 | return CCtxParams->validateSequences; |
853 | |
854 | case ZSTD_c_useBlockSplitter: |
855 | BOUNDCHECK(ZSTD_c_useBlockSplitter, value); |
856 | CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; |
857 | return CCtxParams->useBlockSplitter; |
858 | |
859 | case ZSTD_c_useRowMatchFinder: |
860 | BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); |
861 | CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; |
862 | return CCtxParams->useRowMatchFinder; |
863 | |
864 | case ZSTD_c_deterministicRefPrefix: |
865 | BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); |
866 | CCtxParams->deterministicRefPrefix = !!value; |
867 | return CCtxParams->deterministicRefPrefix; |
868 | |
869 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter" ); |
870 | } |
871 | } |
872 | |
873 | size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) |
874 | { |
875 | return ZSTD_CCtxParams_getParameter(params: &cctx->requestedParams, param, value); |
876 | } |
877 | |
878 | size_t ZSTD_CCtxParams_getParameter( |
879 | ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) |
880 | { |
881 | switch(param) |
882 | { |
883 | case ZSTD_c_format : |
884 | *value = CCtxParams->format; |
885 | break; |
886 | case ZSTD_c_compressionLevel : |
887 | *value = CCtxParams->compressionLevel; |
888 | break; |
889 | case ZSTD_c_windowLog : |
890 | *value = (int)CCtxParams->cParams.windowLog; |
891 | break; |
892 | case ZSTD_c_hashLog : |
893 | *value = (int)CCtxParams->cParams.hashLog; |
894 | break; |
895 | case ZSTD_c_chainLog : |
896 | *value = (int)CCtxParams->cParams.chainLog; |
897 | break; |
898 | case ZSTD_c_searchLog : |
899 | *value = CCtxParams->cParams.searchLog; |
900 | break; |
901 | case ZSTD_c_minMatch : |
902 | *value = CCtxParams->cParams.minMatch; |
903 | break; |
904 | case ZSTD_c_targetLength : |
905 | *value = CCtxParams->cParams.targetLength; |
906 | break; |
907 | case ZSTD_c_strategy : |
908 | *value = (unsigned)CCtxParams->cParams.strategy; |
909 | break; |
910 | case ZSTD_c_contentSizeFlag : |
911 | *value = CCtxParams->fParams.contentSizeFlag; |
912 | break; |
913 | case ZSTD_c_checksumFlag : |
914 | *value = CCtxParams->fParams.checksumFlag; |
915 | break; |
916 | case ZSTD_c_dictIDFlag : |
917 | *value = !CCtxParams->fParams.noDictIDFlag; |
918 | break; |
919 | case ZSTD_c_forceMaxWindow : |
920 | *value = CCtxParams->forceWindow; |
921 | break; |
922 | case ZSTD_c_forceAttachDict : |
923 | *value = CCtxParams->attachDictPref; |
924 | break; |
925 | case ZSTD_c_literalCompressionMode : |
926 | *value = CCtxParams->literalCompressionMode; |
927 | break; |
928 | case ZSTD_c_nbWorkers : |
929 | assert(CCtxParams->nbWorkers == 0); |
930 | *value = CCtxParams->nbWorkers; |
931 | break; |
932 | case ZSTD_c_jobSize : |
933 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading" ); |
934 | case ZSTD_c_overlapLog : |
935 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading" ); |
936 | case ZSTD_c_rsyncable : |
937 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading" ); |
938 | case ZSTD_c_enableDedicatedDictSearch : |
939 | *value = CCtxParams->enableDedicatedDictSearch; |
940 | break; |
941 | case ZSTD_c_enableLongDistanceMatching : |
942 | *value = CCtxParams->ldmParams.enableLdm; |
943 | break; |
944 | case ZSTD_c_ldmHashLog : |
945 | *value = CCtxParams->ldmParams.hashLog; |
946 | break; |
947 | case ZSTD_c_ldmMinMatch : |
948 | *value = CCtxParams->ldmParams.minMatchLength; |
949 | break; |
950 | case ZSTD_c_ldmBucketSizeLog : |
951 | *value = CCtxParams->ldmParams.bucketSizeLog; |
952 | break; |
953 | case ZSTD_c_ldmHashRateLog : |
954 | *value = CCtxParams->ldmParams.hashRateLog; |
955 | break; |
956 | case ZSTD_c_targetCBlockSize : |
957 | *value = (int)CCtxParams->targetCBlockSize; |
958 | break; |
959 | case ZSTD_c_srcSizeHint : |
960 | *value = (int)CCtxParams->srcSizeHint; |
961 | break; |
962 | case ZSTD_c_stableInBuffer : |
963 | *value = (int)CCtxParams->inBufferMode; |
964 | break; |
965 | case ZSTD_c_stableOutBuffer : |
966 | *value = (int)CCtxParams->outBufferMode; |
967 | break; |
968 | case ZSTD_c_blockDelimiters : |
969 | *value = (int)CCtxParams->blockDelimiters; |
970 | break; |
971 | case ZSTD_c_validateSequences : |
972 | *value = (int)CCtxParams->validateSequences; |
973 | break; |
974 | case ZSTD_c_useBlockSplitter : |
975 | *value = (int)CCtxParams->useBlockSplitter; |
976 | break; |
977 | case ZSTD_c_useRowMatchFinder : |
978 | *value = (int)CCtxParams->useRowMatchFinder; |
979 | break; |
980 | case ZSTD_c_deterministicRefPrefix: |
981 | *value = (int)CCtxParams->deterministicRefPrefix; |
982 | break; |
983 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter" ); |
984 | } |
985 | return 0; |
986 | } |
987 | |
988 | /* ZSTD_CCtx_setParametersUsingCCtxParams() : |
989 | * just applies `params` into `cctx` |
990 | * no action is performed, parameters are merely stored. |
991 | * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. |
992 | * This is possible even if a compression is ongoing. |
993 | * In which case, new parameters will be applied on the fly, starting with next compression job. |
994 | */ |
995 | size_t ZSTD_CCtx_setParametersUsingCCtxParams( |
996 | ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) |
997 | { |
998 | DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams" ); |
999 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1000 | "The context is in the wrong stage!" ); |
1001 | RETURN_ERROR_IF(cctx->cdict, stage_wrong, |
1002 | "Can't override parameters with cdict attached (some must " |
1003 | "be inherited from the cdict)." ); |
1004 | |
1005 | cctx->requestedParams = *params; |
1006 | return 0; |
1007 | } |
1008 | |
1009 | size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) |
1010 | { |
1011 | DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes" , (U32)pledgedSrcSize); |
1012 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1013 | "Can't set pledgedSrcSize when not in init stage." ); |
1014 | cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; |
1015 | return 0; |
1016 | } |
1017 | |
1018 | static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( |
1019 | int const compressionLevel, |
1020 | size_t const dictSize); |
1021 | static int ZSTD_dedicatedDictSearch_isSupported( |
1022 | const ZSTD_compressionParameters* cParams); |
1023 | static void ZSTD_dedicatedDictSearch_revertCParams( |
1024 | ZSTD_compressionParameters* cParams); |
1025 | |
1026 | /* |
1027 | * Initializes the local dict using the requested parameters. |
1028 | * NOTE: This does not use the pledged src size, because it may be used for more |
1029 | * than one compression. |
1030 | */ |
1031 | static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) |
1032 | { |
1033 | ZSTD_localDict* const dl = &cctx->localDict; |
1034 | if (dl->dict == NULL) { |
1035 | /* No local dictionary. */ |
1036 | assert(dl->dictBuffer == NULL); |
1037 | assert(dl->cdict == NULL); |
1038 | assert(dl->dictSize == 0); |
1039 | return 0; |
1040 | } |
1041 | if (dl->cdict != NULL) { |
1042 | assert(cctx->cdict == dl->cdict); |
1043 | /* Local dictionary already initialized. */ |
1044 | return 0; |
1045 | } |
1046 | assert(dl->dictSize > 0); |
1047 | assert(cctx->cdict == NULL); |
1048 | assert(cctx->prefixDict.dict == NULL); |
1049 | |
1050 | dl->cdict = ZSTD_createCDict_advanced2( |
1051 | dict: dl->dict, |
1052 | dictSize: dl->dictSize, |
1053 | dictLoadMethod: ZSTD_dlm_byRef, |
1054 | dictContentType: dl->dictContentType, |
1055 | cctxParams: &cctx->requestedParams, |
1056 | customMem: cctx->customMem); |
1057 | RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed" ); |
1058 | cctx->cdict = dl->cdict; |
1059 | return 0; |
1060 | } |
1061 | |
1062 | size_t ZSTD_CCtx_loadDictionary_advanced( |
1063 | ZSTD_CCtx* cctx, const void* dict, size_t dictSize, |
1064 | ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) |
1065 | { |
1066 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1067 | "Can't load a dictionary when ctx is not in init stage." ); |
1068 | DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)" , (U32)dictSize); |
1069 | ZSTD_clearAllDicts(cctx); /* in case one already exists */ |
1070 | if (dict == NULL || dictSize == 0) /* no dictionary mode */ |
1071 | return 0; |
1072 | if (dictLoadMethod == ZSTD_dlm_byRef) { |
1073 | cctx->localDict.dict = dict; |
1074 | } else { |
1075 | void* dictBuffer; |
1076 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, |
1077 | "no malloc for static CCtx" ); |
1078 | dictBuffer = ZSTD_customMalloc(size: dictSize, customMem: cctx->customMem); |
1079 | RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!" ); |
1080 | ZSTD_memcpy(dictBuffer, dict, dictSize); |
1081 | cctx->localDict.dictBuffer = dictBuffer; |
1082 | cctx->localDict.dict = dictBuffer; |
1083 | } |
1084 | cctx->localDict.dictSize = dictSize; |
1085 | cctx->localDict.dictContentType = dictContentType; |
1086 | return 0; |
1087 | } |
1088 | |
1089 | size_t ZSTD_CCtx_loadDictionary_byReference( |
1090 | ZSTD_CCtx* cctx, const void* dict, size_t dictSize) |
1091 | { |
1092 | return ZSTD_CCtx_loadDictionary_advanced( |
1093 | cctx, dict, dictSize, dictLoadMethod: ZSTD_dlm_byRef, dictContentType: ZSTD_dct_auto); |
1094 | } |
1095 | |
1096 | size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) |
1097 | { |
1098 | return ZSTD_CCtx_loadDictionary_advanced( |
1099 | cctx, dict, dictSize, dictLoadMethod: ZSTD_dlm_byCopy, dictContentType: ZSTD_dct_auto); |
1100 | } |
1101 | |
1102 | |
1103 | size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) |
1104 | { |
1105 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1106 | "Can't ref a dict when ctx not in init stage." ); |
1107 | /* Free the existing local cdict (if any) to save memory. */ |
1108 | ZSTD_clearAllDicts(cctx); |
1109 | cctx->cdict = cdict; |
1110 | return 0; |
1111 | } |
1112 | |
1113 | size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) |
1114 | { |
1115 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1116 | "Can't ref a pool when ctx not in init stage." ); |
1117 | cctx->pool = pool; |
1118 | return 0; |
1119 | } |
1120 | |
1121 | size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) |
1122 | { |
1123 | return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, dictContentType: ZSTD_dct_rawContent); |
1124 | } |
1125 | |
1126 | size_t ZSTD_CCtx_refPrefix_advanced( |
1127 | ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) |
1128 | { |
1129 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1130 | "Can't ref a prefix when ctx not in init stage." ); |
1131 | ZSTD_clearAllDicts(cctx); |
1132 | if (prefix != NULL && prefixSize > 0) { |
1133 | cctx->prefixDict.dict = prefix; |
1134 | cctx->prefixDict.dictSize = prefixSize; |
1135 | cctx->prefixDict.dictContentType = dictContentType; |
1136 | } |
1137 | return 0; |
1138 | } |
1139 | |
1140 | /*! ZSTD_CCtx_reset() : |
1141 | * Also dumps dictionary */ |
1142 | size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) |
1143 | { |
1144 | if ( (reset == ZSTD_reset_session_only) |
1145 | || (reset == ZSTD_reset_session_and_parameters) ) { |
1146 | cctx->streamStage = zcss_init; |
1147 | cctx->pledgedSrcSizePlusOne = 0; |
1148 | } |
1149 | if ( (reset == ZSTD_reset_parameters) |
1150 | || (reset == ZSTD_reset_session_and_parameters) ) { |
1151 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, |
1152 | "Can't reset parameters only when not in init stage." ); |
1153 | ZSTD_clearAllDicts(cctx); |
1154 | return ZSTD_CCtxParams_reset(params: &cctx->requestedParams); |
1155 | } |
1156 | return 0; |
1157 | } |
1158 | |
1159 | |
1160 | /* ZSTD_checkCParams() : |
1161 | control CParam values remain within authorized range. |
1162 | @return : 0, or an error code if one value is beyond authorized range */ |
1163 | size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) |
1164 | { |
1165 | BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); |
1166 | BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); |
1167 | BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); |
1168 | BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); |
1169 | BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); |
1170 | BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); |
1171 | BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); |
1172 | return 0; |
1173 | } |
1174 | |
1175 | /* ZSTD_clampCParams() : |
1176 | * make CParam values within valid range. |
1177 | * @return : valid CParams */ |
1178 | static ZSTD_compressionParameters |
1179 | ZSTD_clampCParams(ZSTD_compressionParameters cParams) |
1180 | { |
1181 | # define CLAMP_TYPE(cParam, val, type) { \ |
1182 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ |
1183 | if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ |
1184 | else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ |
1185 | } |
1186 | # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) |
1187 | CLAMP(ZSTD_c_windowLog, cParams.windowLog); |
1188 | CLAMP(ZSTD_c_chainLog, cParams.chainLog); |
1189 | CLAMP(ZSTD_c_hashLog, cParams.hashLog); |
1190 | CLAMP(ZSTD_c_searchLog, cParams.searchLog); |
1191 | CLAMP(ZSTD_c_minMatch, cParams.minMatch); |
1192 | CLAMP(ZSTD_c_targetLength,cParams.targetLength); |
1193 | CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); |
1194 | return cParams; |
1195 | } |
1196 | |
1197 | /* ZSTD_cycleLog() : |
1198 | * condition for correct operation : hashLog > 1 */ |
1199 | U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) |
1200 | { |
1201 | U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); |
1202 | return hashLog - btScale; |
1203 | } |
1204 | |
1205 | /* ZSTD_dictAndWindowLog() : |
1206 | * Returns an adjusted window log that is large enough to fit the source and the dictionary. |
1207 | * The zstd format says that the entire dictionary is valid if one byte of the dictionary |
1208 | * is within the window. So the hashLog and chainLog should be large enough to reference both |
1209 | * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing |
1210 | * the hashLog and windowLog. |
1211 | * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. |
1212 | */ |
1213 | static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) |
1214 | { |
1215 | const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; |
1216 | /* No dictionary ==> No change */ |
1217 | if (dictSize == 0) { |
1218 | return windowLog; |
1219 | } |
1220 | assert(windowLog <= ZSTD_WINDOWLOG_MAX); |
1221 | assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ |
1222 | { |
1223 | U64 const windowSize = 1ULL << windowLog; |
1224 | U64 const dictAndWindowSize = dictSize + windowSize; |
1225 | /* If the window size is already large enough to fit both the source and the dictionary |
1226 | * then just use the window size. Otherwise adjust so that it fits the dictionary and |
1227 | * the window. |
1228 | */ |
1229 | if (windowSize >= dictSize + srcSize) { |
1230 | return windowLog; /* Window size large enough already */ |
1231 | } else if (dictAndWindowSize >= maxWindowSize) { |
1232 | return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ |
1233 | } else { |
1234 | return ZSTD_highbit32(val: (U32)dictAndWindowSize - 1) + 1; |
1235 | } |
1236 | } |
1237 | } |
1238 | |
1239 | /* ZSTD_adjustCParams_internal() : |
1240 | * optimize `cPar` for a specified input (`srcSize` and `dictSize`). |
1241 | * mostly downsize to reduce memory consumption and initialization latency. |
1242 | * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. |
1243 | * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. |
1244 | * note : `srcSize==0` means 0! |
1245 | * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ |
1246 | static ZSTD_compressionParameters |
1247 | ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, |
1248 | unsigned long long srcSize, |
1249 | size_t dictSize, |
1250 | ZSTD_cParamMode_e mode) |
1251 | { |
1252 | const U64 minSrcSize = 513; /* (1<<9) + 1 */ |
1253 | const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); |
1254 | assert(ZSTD_checkCParams(cPar)==0); |
1255 | |
1256 | switch (mode) { |
1257 | case ZSTD_cpm_unknown: |
1258 | case ZSTD_cpm_noAttachDict: |
1259 | /* If we don't know the source size, don't make any |
1260 | * assumptions about it. We will already have selected |
1261 | * smaller parameters if a dictionary is in use. |
1262 | */ |
1263 | break; |
1264 | case ZSTD_cpm_createCDict: |
1265 | /* Assume a small source size when creating a dictionary |
1266 | * with an unknown source size. |
1267 | */ |
1268 | if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) |
1269 | srcSize = minSrcSize; |
1270 | break; |
1271 | case ZSTD_cpm_attachDict: |
1272 | /* Dictionary has its own dedicated parameters which have |
1273 | * already been selected. We are selecting parameters |
1274 | * for only the source. |
1275 | */ |
1276 | dictSize = 0; |
1277 | break; |
1278 | default: |
1279 | assert(0); |
1280 | break; |
1281 | } |
1282 | |
1283 | /* resize windowLog if input is small enough, to use less memory */ |
1284 | if ( (srcSize < maxWindowResize) |
1285 | && (dictSize < maxWindowResize) ) { |
1286 | U32 const tSize = (U32)(srcSize + dictSize); |
1287 | static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; |
1288 | U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : |
1289 | ZSTD_highbit32(val: tSize-1) + 1; |
1290 | if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; |
1291 | } |
1292 | if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { |
1293 | U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(windowLog: cPar.windowLog, srcSize: (U64)srcSize, dictSize: (U64)dictSize); |
1294 | U32 const cycleLog = ZSTD_cycleLog(hashLog: cPar.chainLog, strat: cPar.strategy); |
1295 | if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; |
1296 | if (cycleLog > dictAndWindowLog) |
1297 | cPar.chainLog -= (cycleLog - dictAndWindowLog); |
1298 | } |
1299 | |
1300 | if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) |
1301 | cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ |
1302 | |
1303 | return cPar; |
1304 | } |
1305 | |
1306 | ZSTD_compressionParameters |
1307 | ZSTD_adjustCParams(ZSTD_compressionParameters cPar, |
1308 | unsigned long long srcSize, |
1309 | size_t dictSize) |
1310 | { |
1311 | cPar = ZSTD_clampCParams(cParams: cPar); /* resulting cPar is necessarily valid (all parameters within range) */ |
1312 | if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; |
1313 | return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, mode: ZSTD_cpm_unknown); |
1314 | } |
1315 | |
1316 | static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); |
1317 | static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); |
1318 | |
1319 | static void ZSTD_overrideCParams( |
1320 | ZSTD_compressionParameters* cParams, |
1321 | const ZSTD_compressionParameters* overrides) |
1322 | { |
1323 | if (overrides->windowLog) cParams->windowLog = overrides->windowLog; |
1324 | if (overrides->hashLog) cParams->hashLog = overrides->hashLog; |
1325 | if (overrides->chainLog) cParams->chainLog = overrides->chainLog; |
1326 | if (overrides->searchLog) cParams->searchLog = overrides->searchLog; |
1327 | if (overrides->minMatch) cParams->minMatch = overrides->minMatch; |
1328 | if (overrides->targetLength) cParams->targetLength = overrides->targetLength; |
1329 | if (overrides->strategy) cParams->strategy = overrides->strategy; |
1330 | } |
1331 | |
1332 | ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( |
1333 | const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) |
1334 | { |
1335 | ZSTD_compressionParameters cParams; |
1336 | if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { |
1337 | srcSizeHint = CCtxParams->srcSizeHint; |
1338 | } |
1339 | cParams = ZSTD_getCParams_internal(compressionLevel: CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); |
1340 | if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; |
1341 | ZSTD_overrideCParams(cParams: &cParams, overrides: &CCtxParams->cParams); |
1342 | assert(!ZSTD_checkCParams(cParams)); |
1343 | /* srcSizeHint == 0 means 0 */ |
1344 | return ZSTD_adjustCParams_internal(cPar: cParams, srcSize: srcSizeHint, dictSize, mode); |
1345 | } |
1346 | |
1347 | static size_t |
1348 | ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, |
1349 | const ZSTD_paramSwitch_e useRowMatchFinder, |
1350 | const U32 enableDedicatedDictSearch, |
1351 | const U32 forCCtx) |
1352 | { |
1353 | /* chain table size should be 0 for fast or row-hash strategies */ |
1354 | size_t const chainSize = ZSTD_allocateChainTable(strategy: cParams->strategy, useRowMatchFinder, forDDSDict: enableDedicatedDictSearch && !forCCtx) |
1355 | ? ((size_t)1 << cParams->chainLog) |
1356 | : 0; |
1357 | size_t const hSize = ((size_t)1) << cParams->hashLog; |
1358 | U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; |
1359 | size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; |
1360 | /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't |
1361 | * surrounded by redzones in ASAN. */ |
1362 | size_t const tableSpace = chainSize * sizeof(U32) |
1363 | + hSize * sizeof(U32) |
1364 | + h3Size * sizeof(U32); |
1365 | size_t const optPotentialSpace = |
1366 | ZSTD_cwksp_aligned_alloc_size(size: (MaxML+1) * sizeof(U32)) |
1367 | + ZSTD_cwksp_aligned_alloc_size(size: (MaxLL+1) * sizeof(U32)) |
1368 | + ZSTD_cwksp_aligned_alloc_size(size: (MaxOff+1) * sizeof(U32)) |
1369 | + ZSTD_cwksp_aligned_alloc_size(size: (1<<Litbits) * sizeof(U32)) |
1370 | + ZSTD_cwksp_aligned_alloc_size(size: (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)) |
1371 | + ZSTD_cwksp_aligned_alloc_size(size: (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); |
1372 | size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(strategy: cParams->strategy, mode: useRowMatchFinder) |
1373 | ? ZSTD_cwksp_aligned_alloc_size(size: hSize*sizeof(U16)) |
1374 | : 0; |
1375 | size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) |
1376 | ? optPotentialSpace |
1377 | : 0; |
1378 | size_t const slackSpace = ZSTD_cwksp_slack_space_required(); |
1379 | |
1380 | /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ |
1381 | ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); |
1382 | assert(useRowMatchFinder != ZSTD_ps_auto); |
1383 | |
1384 | DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u" , |
1385 | (U32)chainSize, (U32)hSize, (U32)h3Size); |
1386 | return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; |
1387 | } |
1388 | |
1389 | static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( |
1390 | const ZSTD_compressionParameters* cParams, |
1391 | const ldmParams_t* ldmParams, |
1392 | const int isStatic, |
1393 | const ZSTD_paramSwitch_e useRowMatchFinder, |
1394 | const size_t buffInSize, |
1395 | const size_t buffOutSize, |
1396 | const U64 pledgedSrcSize) |
1397 | { |
1398 | size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); |
1399 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); |
1400 | U32 const divider = (cParams->minMatch==3) ? 3 : 4; |
1401 | size_t const maxNbSeq = blockSize / divider; |
1402 | size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) |
1403 | + ZSTD_cwksp_aligned_alloc_size(size: maxNbSeq * sizeof(seqDef)) |
1404 | + 3 * ZSTD_cwksp_alloc_size(size: maxNbSeq * sizeof(BYTE)); |
1405 | size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); |
1406 | size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(size: sizeof(ZSTD_compressedBlockState_t)); |
1407 | size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); |
1408 | |
1409 | size_t const ldmSpace = ZSTD_ldm_getTableSize(params: *ldmParams); |
1410 | size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params: *ldmParams, maxChunkSize: blockSize); |
1411 | size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? |
1412 | ZSTD_cwksp_aligned_alloc_size(size: maxNbLdmSeq * sizeof(rawSeq)) : 0; |
1413 | |
1414 | |
1415 | size_t const bufferSpace = ZSTD_cwksp_alloc_size(size: buffInSize) |
1416 | + ZSTD_cwksp_alloc_size(size: buffOutSize); |
1417 | |
1418 | size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(size: sizeof(ZSTD_CCtx)) : 0; |
1419 | |
1420 | size_t const neededSpace = |
1421 | cctxSpace + |
1422 | entropySpace + |
1423 | blockStateSpace + |
1424 | ldmSpace + |
1425 | ldmSeqSpace + |
1426 | matchStateSize + |
1427 | tokenSpace + |
1428 | bufferSpace; |
1429 | |
1430 | DEBUGLOG(5, "estimate workspace : %u" , (U32)neededSpace); |
1431 | return neededSpace; |
1432 | } |
1433 | |
1434 | size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) |
1435 | { |
1436 | ZSTD_compressionParameters const cParams = |
1437 | ZSTD_getCParamsFromCCtxParams(CCtxParams: params, ZSTD_CONTENTSIZE_UNKNOWN, dictSize: 0, mode: ZSTD_cpm_noAttachDict); |
1438 | ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: params->useRowMatchFinder, |
1439 | cParams: &cParams); |
1440 | |
1441 | RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only." ); |
1442 | /* estimateCCtxSize is for one-shot compression. So no buffers should |
1443 | * be needed. However, we still allocate two 0-sized buffers, which can |
1444 | * take space under ASAN. */ |
1445 | return ZSTD_estimateCCtxSize_usingCCtxParams_internal( |
1446 | cParams: &cParams, ldmParams: ¶ms->ldmParams, isStatic: 1, useRowMatchFinder, buffInSize: 0, buffOutSize: 0, ZSTD_CONTENTSIZE_UNKNOWN); |
1447 | } |
1448 | |
1449 | size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) |
1450 | { |
1451 | ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); |
1452 | if (ZSTD_rowMatchFinderSupported(strategy: cParams.strategy)) { |
1453 | /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ |
1454 | size_t noRowCCtxSize; |
1455 | size_t rowCCtxSize; |
1456 | initialParams.useRowMatchFinder = ZSTD_ps_disable; |
1457 | noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params: &initialParams); |
1458 | initialParams.useRowMatchFinder = ZSTD_ps_enable; |
1459 | rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params: &initialParams); |
1460 | return MAX(noRowCCtxSize, rowCCtxSize); |
1461 | } else { |
1462 | return ZSTD_estimateCCtxSize_usingCCtxParams(params: &initialParams); |
1463 | } |
1464 | } |
1465 | |
1466 | static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) |
1467 | { |
1468 | int tier = 0; |
1469 | size_t largestSize = 0; |
1470 | static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; |
1471 | for (; tier < 4; ++tier) { |
1472 | /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ |
1473 | ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint: srcSizeTiers[tier], dictSize: 0, mode: ZSTD_cpm_noAttachDict); |
1474 | largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); |
1475 | } |
1476 | return largestSize; |
1477 | } |
1478 | |
1479 | size_t ZSTD_estimateCCtxSize(int compressionLevel) |
1480 | { |
1481 | int level; |
1482 | size_t memBudget = 0; |
1483 | for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { |
1484 | /* Ensure monotonically increasing memory usage as compression level increases */ |
1485 | size_t const newMB = ZSTD_estimateCCtxSize_internal(compressionLevel: level); |
1486 | if (newMB > memBudget) memBudget = newMB; |
1487 | } |
1488 | return memBudget; |
1489 | } |
1490 | |
1491 | size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) |
1492 | { |
1493 | RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only." ); |
1494 | { ZSTD_compressionParameters const cParams = |
1495 | ZSTD_getCParamsFromCCtxParams(CCtxParams: params, ZSTD_CONTENTSIZE_UNKNOWN, dictSize: 0, mode: ZSTD_cpm_noAttachDict); |
1496 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); |
1497 | size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) |
1498 | ? ((size_t)1 << cParams.windowLog) + blockSize |
1499 | : 0; |
1500 | size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) |
1501 | ? ZSTD_compressBound(srcSize: blockSize) + 1 |
1502 | : 0; |
1503 | ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: params->useRowMatchFinder, cParams: ¶ms->cParams); |
1504 | |
1505 | return ZSTD_estimateCCtxSize_usingCCtxParams_internal( |
1506 | cParams: &cParams, ldmParams: ¶ms->ldmParams, isStatic: 1, useRowMatchFinder, buffInSize: inBuffSize, buffOutSize: outBuffSize, |
1507 | ZSTD_CONTENTSIZE_UNKNOWN); |
1508 | } |
1509 | } |
1510 | |
1511 | size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) |
1512 | { |
1513 | ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); |
1514 | if (ZSTD_rowMatchFinderSupported(strategy: cParams.strategy)) { |
1515 | /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ |
1516 | size_t noRowCCtxSize; |
1517 | size_t rowCCtxSize; |
1518 | initialParams.useRowMatchFinder = ZSTD_ps_disable; |
1519 | noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(params: &initialParams); |
1520 | initialParams.useRowMatchFinder = ZSTD_ps_enable; |
1521 | rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(params: &initialParams); |
1522 | return MAX(noRowCCtxSize, rowCCtxSize); |
1523 | } else { |
1524 | return ZSTD_estimateCStreamSize_usingCCtxParams(params: &initialParams); |
1525 | } |
1526 | } |
1527 | |
1528 | static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) |
1529 | { |
1530 | ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize: 0, mode: ZSTD_cpm_noAttachDict); |
1531 | return ZSTD_estimateCStreamSize_usingCParams(cParams); |
1532 | } |
1533 | |
1534 | size_t ZSTD_estimateCStreamSize(int compressionLevel) |
1535 | { |
1536 | int level; |
1537 | size_t memBudget = 0; |
1538 | for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { |
1539 | size_t const newMB = ZSTD_estimateCStreamSize_internal(compressionLevel: level); |
1540 | if (newMB > memBudget) memBudget = newMB; |
1541 | } |
1542 | return memBudget; |
1543 | } |
1544 | |
1545 | /* ZSTD_getFrameProgression(): |
1546 | * tells how much data has been consumed (input) and produced (output) for current frame. |
1547 | * able to count progression inside worker threads (non-blocking mode). |
1548 | */ |
1549 | ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) |
1550 | { |
1551 | { ZSTD_frameProgression fp; |
1552 | size_t const buffered = (cctx->inBuff == NULL) ? 0 : |
1553 | cctx->inBuffPos - cctx->inToCompress; |
1554 | if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); |
1555 | assert(buffered <= ZSTD_BLOCKSIZE_MAX); |
1556 | fp.ingested = cctx->consumedSrcSize + buffered; |
1557 | fp.consumed = cctx->consumedSrcSize; |
1558 | fp.produced = cctx->producedCSize; |
1559 | fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ |
1560 | fp.currentJobID = 0; |
1561 | fp.nbActiveWorkers = 0; |
1562 | return fp; |
1563 | } } |
1564 | |
1565 | /*! ZSTD_toFlushNow() |
1566 | * Only useful for multithreading scenarios currently (nbWorkers >= 1). |
1567 | */ |
1568 | size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) |
1569 | { |
1570 | (void)cctx; |
1571 | return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ |
1572 | } |
1573 | |
1574 | static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, |
1575 | ZSTD_compressionParameters cParams2) |
1576 | { |
1577 | (void)cParams1; |
1578 | (void)cParams2; |
1579 | assert(cParams1.windowLog == cParams2.windowLog); |
1580 | assert(cParams1.chainLog == cParams2.chainLog); |
1581 | assert(cParams1.hashLog == cParams2.hashLog); |
1582 | assert(cParams1.searchLog == cParams2.searchLog); |
1583 | assert(cParams1.minMatch == cParams2.minMatch); |
1584 | assert(cParams1.targetLength == cParams2.targetLength); |
1585 | assert(cParams1.strategy == cParams2.strategy); |
1586 | } |
1587 | |
1588 | void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) |
1589 | { |
1590 | int i; |
1591 | for (i = 0; i < ZSTD_REP_NUM; ++i) |
1592 | bs->rep[i] = repStartValue[i]; |
1593 | bs->entropy.huf.repeatMode = HUF_repeat_none; |
1594 | bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; |
1595 | bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; |
1596 | bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; |
1597 | } |
1598 | |
1599 | /*! ZSTD_invalidateMatchState() |
1600 | * Invalidate all the matches in the match finder tables. |
1601 | * Requires nextSrc and base to be set (can be NULL). |
1602 | */ |
1603 | static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) |
1604 | { |
1605 | ZSTD_window_clear(window: &ms->window); |
1606 | |
1607 | ms->nextToUpdate = ms->window.dictLimit; |
1608 | ms->loadedDictEnd = 0; |
1609 | ms->opt.litLengthSum = 0; /* force reset of btopt stats */ |
1610 | ms->dictMatchState = NULL; |
1611 | } |
1612 | |
1613 | /* |
1614 | * Controls, for this matchState reset, whether the tables need to be cleared / |
1615 | * prepared for the coming compression (ZSTDcrp_makeClean), or whether the |
1616 | * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a |
1617 | * subsequent operation will overwrite the table space anyways (e.g., copying |
1618 | * the matchState contents in from a CDict). |
1619 | */ |
1620 | typedef enum { |
1621 | ZSTDcrp_makeClean, |
1622 | ZSTDcrp_leaveDirty |
1623 | } ZSTD_compResetPolicy_e; |
1624 | |
1625 | /* |
1626 | * Controls, for this matchState reset, whether indexing can continue where it |
1627 | * left off (ZSTDirp_continue), or whether it needs to be restarted from zero |
1628 | * (ZSTDirp_reset). |
1629 | */ |
1630 | typedef enum { |
1631 | ZSTDirp_continue, |
1632 | ZSTDirp_reset |
1633 | } ZSTD_indexResetPolicy_e; |
1634 | |
1635 | typedef enum { |
1636 | ZSTD_resetTarget_CDict, |
1637 | ZSTD_resetTarget_CCtx |
1638 | } ZSTD_resetTarget_e; |
1639 | |
1640 | |
1641 | static size_t |
1642 | ZSTD_reset_matchState(ZSTD_matchState_t* ms, |
1643 | ZSTD_cwksp* ws, |
1644 | const ZSTD_compressionParameters* cParams, |
1645 | const ZSTD_paramSwitch_e useRowMatchFinder, |
1646 | const ZSTD_compResetPolicy_e crp, |
1647 | const ZSTD_indexResetPolicy_e forceResetIndex, |
1648 | const ZSTD_resetTarget_e forWho) |
1649 | { |
1650 | /* disable chain table allocation for fast or row-based strategies */ |
1651 | size_t const chainSize = ZSTD_allocateChainTable(strategy: cParams->strategy, useRowMatchFinder, |
1652 | forDDSDict: ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) |
1653 | ? ((size_t)1 << cParams->chainLog) |
1654 | : 0; |
1655 | size_t const hSize = ((size_t)1) << cParams->hashLog; |
1656 | U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; |
1657 | size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; |
1658 | |
1659 | DEBUGLOG(4, "reset indices : %u" , forceResetIndex == ZSTDirp_reset); |
1660 | assert(useRowMatchFinder != ZSTD_ps_auto); |
1661 | if (forceResetIndex == ZSTDirp_reset) { |
1662 | ZSTD_window_init(window: &ms->window); |
1663 | ZSTD_cwksp_mark_tables_dirty(ws); |
1664 | } |
1665 | |
1666 | ms->hashLog3 = hashLog3; |
1667 | |
1668 | ZSTD_invalidateMatchState(ms); |
1669 | |
1670 | assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ |
1671 | |
1672 | ZSTD_cwksp_clear_tables(ws); |
1673 | |
1674 | DEBUGLOG(5, "reserving table space" ); |
1675 | /* table Space */ |
1676 | ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, bytes: hSize * sizeof(U32)); |
1677 | ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, bytes: chainSize * sizeof(U32)); |
1678 | ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, bytes: h3Size * sizeof(U32)); |
1679 | RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, |
1680 | "failed a workspace allocation in ZSTD_reset_matchState" ); |
1681 | |
1682 | DEBUGLOG(4, "reset table : %u" , crp!=ZSTDcrp_leaveDirty); |
1683 | if (crp!=ZSTDcrp_leaveDirty) { |
1684 | /* reset tables only */ |
1685 | ZSTD_cwksp_clean_tables(ws); |
1686 | } |
1687 | |
1688 | /* opt parser space */ |
1689 | if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { |
1690 | DEBUGLOG(4, "reserving optimal parser space" ); |
1691 | ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, bytes: (1<<Litbits) * sizeof(unsigned)); |
1692 | ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, bytes: (MaxLL+1) * sizeof(unsigned)); |
1693 | ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, bytes: (MaxML+1) * sizeof(unsigned)); |
1694 | ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, bytes: (MaxOff+1) * sizeof(unsigned)); |
1695 | ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, bytes: (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); |
1696 | ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, bytes: (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); |
1697 | } |
1698 | |
1699 | if (ZSTD_rowMatchFinderUsed(strategy: cParams->strategy, mode: useRowMatchFinder)) { |
1700 | { /* Row match finder needs an additional table of hashes ("tags") */ |
1701 | size_t const tagTableSize = hSize*sizeof(U16); |
1702 | ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, bytes: tagTableSize); |
1703 | if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); |
1704 | } |
1705 | { /* Switch to 32-entry rows if searchLog is 5 (or more) */ |
1706 | U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); |
1707 | assert(cParams->hashLog >= rowLog); |
1708 | ms->rowHashLog = cParams->hashLog - rowLog; |
1709 | } |
1710 | } |
1711 | |
1712 | ms->cParams = *cParams; |
1713 | |
1714 | RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, |
1715 | "failed a workspace allocation in ZSTD_reset_matchState" ); |
1716 | return 0; |
1717 | } |
1718 | |
1719 | /* ZSTD_indexTooCloseToMax() : |
1720 | * minor optimization : prefer memset() rather than reduceIndex() |
1721 | * which is measurably slow in some circumstances (reported for Visual Studio). |
1722 | * Works when re-using a context for a lot of smallish inputs : |
1723 | * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, |
1724 | * memset() will be triggered before reduceIndex(). |
1725 | */ |
1726 | #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) |
1727 | static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) |
1728 | { |
1729 | return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); |
1730 | } |
1731 | |
1732 | /* ZSTD_dictTooBig(): |
1733 | * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in |
1734 | * one go generically. So we ensure that in that case we reset the tables to zero, |
1735 | * so that we can load as much of the dictionary as possible. |
1736 | */ |
1737 | static int ZSTD_dictTooBig(size_t const loadedDictSize) |
1738 | { |
1739 | return loadedDictSize > ZSTD_CHUNKSIZE_MAX; |
1740 | } |
1741 | |
1742 | /*! ZSTD_resetCCtx_internal() : |
1743 | * @param loadedDictSize The size of the dictionary to be loaded |
1744 | * into the context, if any. If no dictionary is used, or the |
1745 | * dictionary is being attached / copied, then pass 0. |
1746 | * note : `params` are assumed fully validated at this stage. |
1747 | */ |
1748 | static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, |
1749 | ZSTD_CCtx_params const* params, |
1750 | U64 const pledgedSrcSize, |
1751 | size_t const loadedDictSize, |
1752 | ZSTD_compResetPolicy_e const crp, |
1753 | ZSTD_buffered_policy_e const zbuff) |
1754 | { |
1755 | ZSTD_cwksp* const ws = &zc->workspace; |
1756 | DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d" , |
1757 | (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); |
1758 | assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); |
1759 | |
1760 | zc->isFirstBlock = 1; |
1761 | |
1762 | /* Set applied params early so we can modify them for LDM, |
1763 | * and point params at the applied params. |
1764 | */ |
1765 | zc->appliedParams = *params; |
1766 | params = &zc->appliedParams; |
1767 | |
1768 | assert(params->useRowMatchFinder != ZSTD_ps_auto); |
1769 | assert(params->useBlockSplitter != ZSTD_ps_auto); |
1770 | assert(params->ldmParams.enableLdm != ZSTD_ps_auto); |
1771 | if (params->ldmParams.enableLdm == ZSTD_ps_enable) { |
1772 | /* Adjust long distance matching parameters */ |
1773 | ZSTD_ldm_adjustParameters(params: &zc->appliedParams.ldmParams, cParams: ¶ms->cParams); |
1774 | assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); |
1775 | assert(params->ldmParams.hashRateLog < 32); |
1776 | } |
1777 | |
1778 | { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); |
1779 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); |
1780 | U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; |
1781 | size_t const maxNbSeq = blockSize / divider; |
1782 | size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) |
1783 | ? ZSTD_compressBound(srcSize: blockSize) + 1 |
1784 | : 0; |
1785 | size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) |
1786 | ? windowSize + blockSize |
1787 | : 0; |
1788 | size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params: params->ldmParams, maxChunkSize: blockSize); |
1789 | |
1790 | int const indexTooClose = ZSTD_indexTooCloseToMax(w: zc->blockState.matchState.window); |
1791 | int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); |
1792 | ZSTD_indexResetPolicy_e needsIndexReset = |
1793 | (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; |
1794 | |
1795 | size_t const neededSpace = |
1796 | ZSTD_estimateCCtxSize_usingCCtxParams_internal( |
1797 | cParams: ¶ms->cParams, ldmParams: ¶ms->ldmParams, isStatic: zc->staticSize != 0, useRowMatchFinder: params->useRowMatchFinder, |
1798 | buffInSize, buffOutSize, pledgedSrcSize); |
1799 | int resizeWorkspace; |
1800 | |
1801 | FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!" ); |
1802 | |
1803 | if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, additionalNeededSpace: 0); |
1804 | |
1805 | { /* Check if workspace is large enough, alloc a new one if needed */ |
1806 | int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; |
1807 | int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, additionalNeededSpace: neededSpace); |
1808 | resizeWorkspace = workspaceTooSmall || workspaceWasteful; |
1809 | DEBUGLOG(4, "Need %zu B workspace" , neededSpace); |
1810 | DEBUGLOG(4, "windowSize: %zu - blockSize: %zu" , windowSize, blockSize); |
1811 | |
1812 | if (resizeWorkspace) { |
1813 | DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB" , |
1814 | ZSTD_cwksp_sizeof(ws) >> 10, |
1815 | neededSpace >> 10); |
1816 | |
1817 | RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize" ); |
1818 | |
1819 | needsIndexReset = ZSTDirp_reset; |
1820 | |
1821 | ZSTD_cwksp_free(ws, customMem: zc->customMem); |
1822 | FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "" ); |
1823 | |
1824 | DEBUGLOG(5, "reserving object space" ); |
1825 | /* Statically sized space. |
1826 | * entropyWorkspace never moves, |
1827 | * though prev/next block swap places */ |
1828 | assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); |
1829 | zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, bytes: sizeof(ZSTD_compressedBlockState_t)); |
1830 | RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock" ); |
1831 | zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, bytes: sizeof(ZSTD_compressedBlockState_t)); |
1832 | RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock" ); |
1833 | zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); |
1834 | RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace" ); |
1835 | } } |
1836 | |
1837 | ZSTD_cwksp_clear(ws); |
1838 | |
1839 | /* init params */ |
1840 | zc->blockState.matchState.cParams = params->cParams; |
1841 | zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; |
1842 | zc->consumedSrcSize = 0; |
1843 | zc->producedCSize = 0; |
1844 | if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) |
1845 | zc->appliedParams.fParams.contentSizeFlag = 0; |
1846 | DEBUGLOG(4, "pledged content size : %u ; flag : %u" , |
1847 | (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); |
1848 | zc->blockSize = blockSize; |
1849 | |
1850 | xxh64_reset(state: &zc->xxhState, seed: 0); |
1851 | zc->stage = ZSTDcs_init; |
1852 | zc->dictID = 0; |
1853 | zc->dictContentSize = 0; |
1854 | |
1855 | ZSTD_reset_compressedBlockState(bs: zc->blockState.prevCBlock); |
1856 | |
1857 | /* ZSTD_wildcopy() is used to copy into the literals buffer, |
1858 | * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. |
1859 | */ |
1860 | zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, bytes: blockSize + WILDCOPY_OVERLENGTH); |
1861 | zc->seqStore.maxNbLit = blockSize; |
1862 | |
1863 | /* buffers */ |
1864 | zc->bufferedPolicy = zbuff; |
1865 | zc->inBuffSize = buffInSize; |
1866 | zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, bytes: buffInSize); |
1867 | zc->outBuffSize = buffOutSize; |
1868 | zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, bytes: buffOutSize); |
1869 | |
1870 | /* ldm bucketOffsets table */ |
1871 | if (params->ldmParams.enableLdm == ZSTD_ps_enable) { |
1872 | /* TODO: avoid memset? */ |
1873 | size_t const numBuckets = |
1874 | ((size_t)1) << (params->ldmParams.hashLog - |
1875 | params->ldmParams.bucketSizeLog); |
1876 | zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, bytes: numBuckets); |
1877 | ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); |
1878 | } |
1879 | |
1880 | /* sequences storage */ |
1881 | ZSTD_referenceExternalSequences(cctx: zc, NULL, nbSeq: 0); |
1882 | zc->seqStore.maxNbSeq = maxNbSeq; |
1883 | zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, bytes: maxNbSeq * sizeof(BYTE)); |
1884 | zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, bytes: maxNbSeq * sizeof(BYTE)); |
1885 | zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, bytes: maxNbSeq * sizeof(BYTE)); |
1886 | zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, bytes: maxNbSeq * sizeof(seqDef)); |
1887 | |
1888 | FORWARD_IF_ERROR(ZSTD_reset_matchState( |
1889 | &zc->blockState.matchState, |
1890 | ws, |
1891 | ¶ms->cParams, |
1892 | params->useRowMatchFinder, |
1893 | crp, |
1894 | needsIndexReset, |
1895 | ZSTD_resetTarget_CCtx), "" ); |
1896 | |
1897 | /* ldm hash table */ |
1898 | if (params->ldmParams.enableLdm == ZSTD_ps_enable) { |
1899 | /* TODO: avoid memset? */ |
1900 | size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; |
1901 | zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, bytes: ldmHSize * sizeof(ldmEntry_t)); |
1902 | ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); |
1903 | zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, bytes: maxNbLdmSeq * sizeof(rawSeq)); |
1904 | zc->maxNbLdmSequences = maxNbLdmSeq; |
1905 | |
1906 | ZSTD_window_init(window: &zc->ldmState.window); |
1907 | zc->ldmState.loadedDictEnd = 0; |
1908 | } |
1909 | |
1910 | DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available" , ZSTD_cwksp_available_space(ws)); |
1911 | assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); |
1912 | |
1913 | zc->initialized = 1; |
1914 | |
1915 | return 0; |
1916 | } |
1917 | } |
1918 | |
1919 | /* ZSTD_invalidateRepCodes() : |
1920 | * ensures next compression will not use repcodes from previous block. |
1921 | * Note : only works with regular variant; |
1922 | * do not use with extDict variant ! */ |
1923 | void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { |
1924 | int i; |
1925 | for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0; |
1926 | assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); |
1927 | } |
1928 | |
1929 | /* These are the approximate sizes for each strategy past which copying the |
1930 | * dictionary tables into the working context is faster than using them |
1931 | * in-place. |
1932 | */ |
1933 | static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { |
1934 | 8 KB, /* unused */ |
1935 | 8 KB, /* ZSTD_fast */ |
1936 | 16 KB, /* ZSTD_dfast */ |
1937 | 32 KB, /* ZSTD_greedy */ |
1938 | 32 KB, /* ZSTD_lazy */ |
1939 | 32 KB, /* ZSTD_lazy2 */ |
1940 | 32 KB, /* ZSTD_btlazy2 */ |
1941 | 32 KB, /* ZSTD_btopt */ |
1942 | 8 KB, /* ZSTD_btultra */ |
1943 | 8 KB /* ZSTD_btultra2 */ |
1944 | }; |
1945 | |
1946 | static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, |
1947 | const ZSTD_CCtx_params* params, |
1948 | U64 pledgedSrcSize) |
1949 | { |
1950 | size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; |
1951 | int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; |
1952 | return dedicatedDictSearch |
1953 | || ( ( pledgedSrcSize <= cutoff |
1954 | || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN |
1955 | || params->attachDictPref == ZSTD_dictForceAttach ) |
1956 | && params->attachDictPref != ZSTD_dictForceCopy |
1957 | && !params->forceWindow ); /* dictMatchState isn't correctly |
1958 | * handled in _enforceMaxDist */ |
1959 | } |
1960 | |
1961 | static size_t |
1962 | ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, |
1963 | const ZSTD_CDict* cdict, |
1964 | ZSTD_CCtx_params params, |
1965 | U64 pledgedSrcSize, |
1966 | ZSTD_buffered_policy_e zbuff) |
1967 | { |
1968 | DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu" , |
1969 | (unsigned long long)pledgedSrcSize); |
1970 | { |
1971 | ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; |
1972 | unsigned const windowLog = params.cParams.windowLog; |
1973 | assert(windowLog != 0); |
1974 | /* Resize working context table params for input only, since the dict |
1975 | * has its own tables. */ |
1976 | /* pledgedSrcSize == 0 means 0! */ |
1977 | |
1978 | if (cdict->matchState.dedicatedDictSearch) { |
1979 | ZSTD_dedicatedDictSearch_revertCParams(cParams: &adjusted_cdict_cParams); |
1980 | } |
1981 | |
1982 | params.cParams = ZSTD_adjustCParams_internal(cPar: adjusted_cdict_cParams, srcSize: pledgedSrcSize, |
1983 | dictSize: cdict->dictContentSize, mode: ZSTD_cpm_attachDict); |
1984 | params.cParams.windowLog = windowLog; |
1985 | params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ |
1986 | FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, |
1987 | /* loadedDictSize */ 0, |
1988 | ZSTDcrp_makeClean, zbuff), "" ); |
1989 | assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); |
1990 | } |
1991 | |
1992 | { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc |
1993 | - cdict->matchState.window.base); |
1994 | const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; |
1995 | if (cdictLen == 0) { |
1996 | /* don't even attach dictionaries with no contents */ |
1997 | DEBUGLOG(4, "skipping attaching empty dictionary" ); |
1998 | } else { |
1999 | DEBUGLOG(4, "attaching dictionary into context" ); |
2000 | cctx->blockState.matchState.dictMatchState = &cdict->matchState; |
2001 | |
2002 | /* prep working match state so dict matches never have negative indices |
2003 | * when they are translated to the working context's index space. */ |
2004 | if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { |
2005 | cctx->blockState.matchState.window.nextSrc = |
2006 | cctx->blockState.matchState.window.base + cdictEnd; |
2007 | ZSTD_window_clear(window: &cctx->blockState.matchState.window); |
2008 | } |
2009 | /* loadedDictEnd is expressed within the referential of the active context */ |
2010 | cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; |
2011 | } } |
2012 | |
2013 | cctx->dictID = cdict->dictID; |
2014 | cctx->dictContentSize = cdict->dictContentSize; |
2015 | |
2016 | /* copy block state */ |
2017 | ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); |
2018 | |
2019 | return 0; |
2020 | } |
2021 | |
2022 | static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, |
2023 | const ZSTD_CDict* cdict, |
2024 | ZSTD_CCtx_params params, |
2025 | U64 pledgedSrcSize, |
2026 | ZSTD_buffered_policy_e zbuff) |
2027 | { |
2028 | const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; |
2029 | |
2030 | assert(!cdict->matchState.dedicatedDictSearch); |
2031 | DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu" , |
2032 | (unsigned long long)pledgedSrcSize); |
2033 | |
2034 | { unsigned const windowLog = params.cParams.windowLog; |
2035 | assert(windowLog != 0); |
2036 | /* Copy only compression parameters related to tables. */ |
2037 | params.cParams = *cdict_cParams; |
2038 | params.cParams.windowLog = windowLog; |
2039 | params.useRowMatchFinder = cdict->useRowMatchFinder; |
2040 | FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, |
2041 | /* loadedDictSize */ 0, |
2042 | ZSTDcrp_leaveDirty, zbuff), "" ); |
2043 | assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); |
2044 | assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); |
2045 | assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); |
2046 | } |
2047 | |
2048 | ZSTD_cwksp_mark_tables_dirty(ws: &cctx->workspace); |
2049 | assert(params.useRowMatchFinder != ZSTD_ps_auto); |
2050 | |
2051 | /* copy tables */ |
2052 | { size_t const chainSize = ZSTD_allocateChainTable(strategy: cdict_cParams->strategy, useRowMatchFinder: cdict->useRowMatchFinder, forDDSDict: 0 /* DDS guaranteed disabled */) |
2053 | ? ((size_t)1 << cdict_cParams->chainLog) |
2054 | : 0; |
2055 | size_t const hSize = (size_t)1 << cdict_cParams->hashLog; |
2056 | |
2057 | ZSTD_memcpy(cctx->blockState.matchState.hashTable, |
2058 | cdict->matchState.hashTable, |
2059 | hSize * sizeof(U32)); |
2060 | /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ |
2061 | if (ZSTD_allocateChainTable(strategy: cctx->appliedParams.cParams.strategy, useRowMatchFinder: cctx->appliedParams.useRowMatchFinder, forDDSDict: 0 /* forDDSDict */)) { |
2062 | ZSTD_memcpy(cctx->blockState.matchState.chainTable, |
2063 | cdict->matchState.chainTable, |
2064 | chainSize * sizeof(U32)); |
2065 | } |
2066 | /* copy tag table */ |
2067 | if (ZSTD_rowMatchFinderUsed(strategy: cdict_cParams->strategy, mode: cdict->useRowMatchFinder)) { |
2068 | size_t const tagTableSize = hSize*sizeof(U16); |
2069 | ZSTD_memcpy(cctx->blockState.matchState.tagTable, |
2070 | cdict->matchState.tagTable, |
2071 | tagTableSize); |
2072 | } |
2073 | } |
2074 | |
2075 | /* Zero the hashTable3, since the cdict never fills it */ |
2076 | { int const h3log = cctx->blockState.matchState.hashLog3; |
2077 | size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; |
2078 | assert(cdict->matchState.hashLog3 == 0); |
2079 | ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); |
2080 | } |
2081 | |
2082 | ZSTD_cwksp_mark_tables_clean(ws: &cctx->workspace); |
2083 | |
2084 | /* copy dictionary offsets */ |
2085 | { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; |
2086 | ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; |
2087 | dstMatchState->window = srcMatchState->window; |
2088 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; |
2089 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; |
2090 | } |
2091 | |
2092 | cctx->dictID = cdict->dictID; |
2093 | cctx->dictContentSize = cdict->dictContentSize; |
2094 | |
2095 | /* copy block state */ |
2096 | ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); |
2097 | |
2098 | return 0; |
2099 | } |
2100 | |
2101 | /* We have a choice between copying the dictionary context into the working |
2102 | * context, or referencing the dictionary context from the working context |
2103 | * in-place. We decide here which strategy to use. */ |
2104 | static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, |
2105 | const ZSTD_CDict* cdict, |
2106 | const ZSTD_CCtx_params* params, |
2107 | U64 pledgedSrcSize, |
2108 | ZSTD_buffered_policy_e zbuff) |
2109 | { |
2110 | |
2111 | DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)" , |
2112 | (unsigned)pledgedSrcSize); |
2113 | |
2114 | if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { |
2115 | return ZSTD_resetCCtx_byAttachingCDict( |
2116 | cctx, cdict, params: *params, pledgedSrcSize, zbuff); |
2117 | } else { |
2118 | return ZSTD_resetCCtx_byCopyingCDict( |
2119 | cctx, cdict, params: *params, pledgedSrcSize, zbuff); |
2120 | } |
2121 | } |
2122 | |
2123 | /*! ZSTD_copyCCtx_internal() : |
2124 | * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. |
2125 | * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). |
2126 | * The "context", in this case, refers to the hash and chain tables, |
2127 | * entropy tables, and dictionary references. |
2128 | * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. |
2129 | * @return : 0, or an error code */ |
2130 | static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, |
2131 | const ZSTD_CCtx* srcCCtx, |
2132 | ZSTD_frameParameters fParams, |
2133 | U64 pledgedSrcSize, |
2134 | ZSTD_buffered_policy_e zbuff) |
2135 | { |
2136 | RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, |
2137 | "Can't copy a ctx that's not in init stage." ); |
2138 | DEBUGLOG(5, "ZSTD_copyCCtx_internal" ); |
2139 | ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); |
2140 | { ZSTD_CCtx_params params = dstCCtx->requestedParams; |
2141 | /* Copy only compression parameters related to tables. */ |
2142 | params.cParams = srcCCtx->appliedParams.cParams; |
2143 | assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); |
2144 | assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); |
2145 | assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); |
2146 | params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; |
2147 | params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; |
2148 | params.ldmParams = srcCCtx->appliedParams.ldmParams; |
2149 | params.fParams = fParams; |
2150 | ZSTD_resetCCtx_internal(zc: dstCCtx, params: ¶ms, pledgedSrcSize, |
2151 | /* loadedDictSize */ 0, |
2152 | crp: ZSTDcrp_leaveDirty, zbuff); |
2153 | assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); |
2154 | assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); |
2155 | assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); |
2156 | assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); |
2157 | assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); |
2158 | } |
2159 | |
2160 | ZSTD_cwksp_mark_tables_dirty(ws: &dstCCtx->workspace); |
2161 | |
2162 | /* copy tables */ |
2163 | { size_t const chainSize = ZSTD_allocateChainTable(strategy: srcCCtx->appliedParams.cParams.strategy, |
2164 | useRowMatchFinder: srcCCtx->appliedParams.useRowMatchFinder, |
2165 | forDDSDict: 0 /* forDDSDict */) |
2166 | ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) |
2167 | : 0; |
2168 | size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; |
2169 | int const h3log = srcCCtx->blockState.matchState.hashLog3; |
2170 | size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; |
2171 | |
2172 | ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, |
2173 | srcCCtx->blockState.matchState.hashTable, |
2174 | hSize * sizeof(U32)); |
2175 | ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, |
2176 | srcCCtx->blockState.matchState.chainTable, |
2177 | chainSize * sizeof(U32)); |
2178 | ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, |
2179 | srcCCtx->blockState.matchState.hashTable3, |
2180 | h3Size * sizeof(U32)); |
2181 | } |
2182 | |
2183 | ZSTD_cwksp_mark_tables_clean(ws: &dstCCtx->workspace); |
2184 | |
2185 | /* copy dictionary offsets */ |
2186 | { |
2187 | const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; |
2188 | ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; |
2189 | dstMatchState->window = srcMatchState->window; |
2190 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; |
2191 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; |
2192 | } |
2193 | dstCCtx->dictID = srcCCtx->dictID; |
2194 | dstCCtx->dictContentSize = srcCCtx->dictContentSize; |
2195 | |
2196 | /* copy block state */ |
2197 | ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); |
2198 | |
2199 | return 0; |
2200 | } |
2201 | |
2202 | /*! ZSTD_copyCCtx() : |
2203 | * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. |
2204 | * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). |
2205 | * pledgedSrcSize==0 means "unknown". |
2206 | * @return : 0, or an error code */ |
2207 | size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) |
2208 | { |
2209 | ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; |
2210 | ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; |
2211 | ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); |
2212 | if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; |
2213 | fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); |
2214 | |
2215 | return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, |
2216 | fParams, pledgedSrcSize, |
2217 | zbuff); |
2218 | } |
2219 | |
2220 | |
2221 | #define ZSTD_ROWSIZE 16 |
2222 | /*! ZSTD_reduceTable() : |
2223 | * reduce table indexes by `reducerValue`, or squash to zero. |
2224 | * PreserveMark preserves "unsorted mark" for btlazy2 strategy. |
2225 | * It must be set to a clear 0/1 value, to remove branch during inlining. |
2226 | * Presume table size is a multiple of ZSTD_ROWSIZE |
2227 | * to help auto-vectorization */ |
2228 | FORCE_INLINE_TEMPLATE void |
2229 | ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) |
2230 | { |
2231 | int const nbRows = (int)size / ZSTD_ROWSIZE; |
2232 | int cellNb = 0; |
2233 | int rowNb; |
2234 | /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ |
2235 | U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; |
2236 | assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ |
2237 | assert(size < (1U<<31)); /* can be casted to int */ |
2238 | |
2239 | |
2240 | for (rowNb=0 ; rowNb < nbRows ; rowNb++) { |
2241 | int column; |
2242 | for (column=0; column<ZSTD_ROWSIZE; column++) { |
2243 | U32 newVal; |
2244 | if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) { |
2245 | /* This write is pointless, but is required(?) for the compiler |
2246 | * to auto-vectorize the loop. */ |
2247 | newVal = ZSTD_DUBT_UNSORTED_MARK; |
2248 | } else if (table[cellNb] < reducerThreshold) { |
2249 | newVal = 0; |
2250 | } else { |
2251 | newVal = table[cellNb] - reducerValue; |
2252 | } |
2253 | table[cellNb] = newVal; |
2254 | cellNb++; |
2255 | } } |
2256 | } |
2257 | |
2258 | static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue) |
2259 | { |
2260 | ZSTD_reduceTable_internal(table, size, reducerValue, preserveMark: 0); |
2261 | } |
2262 | |
2263 | static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue) |
2264 | { |
2265 | ZSTD_reduceTable_internal(table, size, reducerValue, preserveMark: 1); |
2266 | } |
2267 | |
2268 | /*! ZSTD_reduceIndex() : |
2269 | * rescale all indexes to avoid future overflow (indexes are U32) */ |
2270 | static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) |
2271 | { |
2272 | { U32 const hSize = (U32)1 << params->cParams.hashLog; |
2273 | ZSTD_reduceTable(table: ms->hashTable, size: hSize, reducerValue); |
2274 | } |
2275 | |
2276 | if (ZSTD_allocateChainTable(strategy: params->cParams.strategy, useRowMatchFinder: params->useRowMatchFinder, forDDSDict: (U32)ms->dedicatedDictSearch)) { |
2277 | U32 const chainSize = (U32)1 << params->cParams.chainLog; |
2278 | if (params->cParams.strategy == ZSTD_btlazy2) |
2279 | ZSTD_reduceTable_btlazy2(table: ms->chainTable, size: chainSize, reducerValue); |
2280 | else |
2281 | ZSTD_reduceTable(table: ms->chainTable, size: chainSize, reducerValue); |
2282 | } |
2283 | |
2284 | if (ms->hashLog3) { |
2285 | U32 const h3Size = (U32)1 << ms->hashLog3; |
2286 | ZSTD_reduceTable(table: ms->hashTable3, size: h3Size, reducerValue); |
2287 | } |
2288 | } |
2289 | |
2290 | |
2291 | /*-******************************************************* |
2292 | * Block entropic compression |
2293 | *********************************************************/ |
2294 | |
2295 | /* See doc/zstd_compression_format.md for detailed format description */ |
2296 | |
2297 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) |
2298 | { |
2299 | const seqDef* const sequences = seqStorePtr->sequencesStart; |
2300 | BYTE* const llCodeTable = seqStorePtr->llCode; |
2301 | BYTE* const ofCodeTable = seqStorePtr->ofCode; |
2302 | BYTE* const mlCodeTable = seqStorePtr->mlCode; |
2303 | U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); |
2304 | U32 u; |
2305 | assert(nbSeq <= seqStorePtr->maxNbSeq); |
2306 | for (u=0; u<nbSeq; u++) { |
2307 | U32 const llv = sequences[u].litLength; |
2308 | U32 const mlv = sequences[u].mlBase; |
2309 | llCodeTable[u] = (BYTE)ZSTD_LLcode(litLength: llv); |
2310 | ofCodeTable[u] = (BYTE)ZSTD_highbit32(val: sequences[u].offBase); |
2311 | mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlBase: mlv); |
2312 | } |
2313 | if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) |
2314 | llCodeTable[seqStorePtr->longLengthPos] = MaxLL; |
2315 | if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) |
2316 | mlCodeTable[seqStorePtr->longLengthPos] = MaxML; |
2317 | } |
2318 | |
2319 | /* ZSTD_useTargetCBlockSize(): |
2320 | * Returns if target compressed block size param is being used. |
2321 | * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. |
2322 | * Returns 1 if true, 0 otherwise. */ |
2323 | static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) |
2324 | { |
2325 | DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)" , cctxParams->targetCBlockSize); |
2326 | return (cctxParams->targetCBlockSize != 0); |
2327 | } |
2328 | |
2329 | /* ZSTD_blockSplitterEnabled(): |
2330 | * Returns if block splitting param is being used |
2331 | * If used, compression will do best effort to split a block in order to improve compression ratio. |
2332 | * At the time this function is called, the parameter must be finalized. |
2333 | * Returns 1 if true, 0 otherwise. */ |
2334 | static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) |
2335 | { |
2336 | DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)" , cctxParams->useBlockSplitter); |
2337 | assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); |
2338 | return (cctxParams->useBlockSplitter == ZSTD_ps_enable); |
2339 | } |
2340 | |
2341 | /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types |
2342 | * and size of the sequences statistics |
2343 | */ |
2344 | typedef struct { |
2345 | U32 LLtype; |
2346 | U32 Offtype; |
2347 | U32 MLtype; |
2348 | size_t size; |
2349 | size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ |
2350 | } ZSTD_symbolEncodingTypeStats_t; |
2351 | |
2352 | /* ZSTD_buildSequencesStatistics(): |
2353 | * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. |
2354 | * Modifies `nextEntropy` to have the appropriate values as a side effect. |
2355 | * nbSeq must be greater than 0. |
2356 | * |
2357 | * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) |
2358 | */ |
2359 | static ZSTD_symbolEncodingTypeStats_t |
2360 | ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, |
2361 | const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, |
2362 | BYTE* dst, const BYTE* const dstEnd, |
2363 | ZSTD_strategy strategy, unsigned* countWorkspace, |
2364 | void* entropyWorkspace, size_t entropyWkspSize) { |
2365 | BYTE* const ostart = dst; |
2366 | const BYTE* const oend = dstEnd; |
2367 | BYTE* op = ostart; |
2368 | FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; |
2369 | FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; |
2370 | FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; |
2371 | const BYTE* const ofCodeTable = seqStorePtr->ofCode; |
2372 | const BYTE* const llCodeTable = seqStorePtr->llCode; |
2373 | const BYTE* const mlCodeTable = seqStorePtr->mlCode; |
2374 | ZSTD_symbolEncodingTypeStats_t stats; |
2375 | |
2376 | stats.lastCountSize = 0; |
2377 | /* convert length/distances into codes */ |
2378 | ZSTD_seqToCodes(seqStorePtr); |
2379 | assert(op <= oend); |
2380 | assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ |
2381 | /* build CTable for Literal Lengths */ |
2382 | { unsigned max = MaxLL; |
2383 | size_t const mostFrequent = HIST_countFast_wksp(count: countWorkspace, maxSymbolValuePtr: &max, src: llCodeTable, srcSize: nbSeq, workSpace: entropyWorkspace, workSpaceSize: entropyWkspSize); /* can't fail */ |
2384 | DEBUGLOG(5, "Building LL table" ); |
2385 | nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; |
2386 | stats.LLtype = ZSTD_selectEncodingType(repeatMode: &nextEntropy->litlength_repeatMode, |
2387 | count: countWorkspace, max, mostFrequent, nbSeq, |
2388 | LLFSELog, prevCTable: prevEntropy->litlengthCTable, |
2389 | defaultNorm: LL_defaultNorm, defaultNormLog: LL_defaultNormLog, |
2390 | isDefaultAllowed: ZSTD_defaultAllowed, strategy); |
2391 | assert(set_basic < set_compressed && set_rle < set_compressed); |
2392 | assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
2393 | { size_t const countSize = ZSTD_buildCTable( |
2394 | dst: op, dstCapacity: (size_t)(oend - op), |
2395 | nextCTable: CTable_LitLength, LLFSELog, type: (symbolEncodingType_e)stats.LLtype, |
2396 | count: countWorkspace, max, codeTable: llCodeTable, nbSeq, |
2397 | defaultNorm: LL_defaultNorm, defaultNormLog: LL_defaultNormLog, MaxLL, |
2398 | prevCTable: prevEntropy->litlengthCTable, |
2399 | prevCTableSize: sizeof(prevEntropy->litlengthCTable), |
2400 | entropyWorkspace, entropyWorkspaceSize: entropyWkspSize); |
2401 | if (ZSTD_isError(code: countSize)) { |
2402 | DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed" ); |
2403 | stats.size = countSize; |
2404 | return stats; |
2405 | } |
2406 | if (stats.LLtype == set_compressed) |
2407 | stats.lastCountSize = countSize; |
2408 | op += countSize; |
2409 | assert(op <= oend); |
2410 | } } |
2411 | /* build CTable for Offsets */ |
2412 | { unsigned max = MaxOff; |
2413 | size_t const mostFrequent = HIST_countFast_wksp( |
2414 | count: countWorkspace, maxSymbolValuePtr: &max, src: ofCodeTable, srcSize: nbSeq, workSpace: entropyWorkspace, workSpaceSize: entropyWkspSize); /* can't fail */ |
2415 | /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ |
2416 | ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; |
2417 | DEBUGLOG(5, "Building OF table" ); |
2418 | nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; |
2419 | stats.Offtype = ZSTD_selectEncodingType(repeatMode: &nextEntropy->offcode_repeatMode, |
2420 | count: countWorkspace, max, mostFrequent, nbSeq, |
2421 | OffFSELog, prevCTable: prevEntropy->offcodeCTable, |
2422 | defaultNorm: OF_defaultNorm, defaultNormLog: OF_defaultNormLog, |
2423 | isDefaultAllowed: defaultPolicy, strategy); |
2424 | assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
2425 | { size_t const countSize = ZSTD_buildCTable( |
2426 | dst: op, dstCapacity: (size_t)(oend - op), |
2427 | nextCTable: CTable_OffsetBits, OffFSELog, type: (symbolEncodingType_e)stats.Offtype, |
2428 | count: countWorkspace, max, codeTable: ofCodeTable, nbSeq, |
2429 | defaultNorm: OF_defaultNorm, defaultNormLog: OF_defaultNormLog, DefaultMaxOff, |
2430 | prevCTable: prevEntropy->offcodeCTable, |
2431 | prevCTableSize: sizeof(prevEntropy->offcodeCTable), |
2432 | entropyWorkspace, entropyWorkspaceSize: entropyWkspSize); |
2433 | if (ZSTD_isError(code: countSize)) { |
2434 | DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed" ); |
2435 | stats.size = countSize; |
2436 | return stats; |
2437 | } |
2438 | if (stats.Offtype == set_compressed) |
2439 | stats.lastCountSize = countSize; |
2440 | op += countSize; |
2441 | assert(op <= oend); |
2442 | } } |
2443 | /* build CTable for MatchLengths */ |
2444 | { unsigned max = MaxML; |
2445 | size_t const mostFrequent = HIST_countFast_wksp( |
2446 | count: countWorkspace, maxSymbolValuePtr: &max, src: mlCodeTable, srcSize: nbSeq, workSpace: entropyWorkspace, workSpaceSize: entropyWkspSize); /* can't fail */ |
2447 | DEBUGLOG(5, "Building ML table (remaining space : %i)" , (int)(oend-op)); |
2448 | nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; |
2449 | stats.MLtype = ZSTD_selectEncodingType(repeatMode: &nextEntropy->matchlength_repeatMode, |
2450 | count: countWorkspace, max, mostFrequent, nbSeq, |
2451 | MLFSELog, prevCTable: prevEntropy->matchlengthCTable, |
2452 | defaultNorm: ML_defaultNorm, defaultNormLog: ML_defaultNormLog, |
2453 | isDefaultAllowed: ZSTD_defaultAllowed, strategy); |
2454 | assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
2455 | { size_t const countSize = ZSTD_buildCTable( |
2456 | dst: op, dstCapacity: (size_t)(oend - op), |
2457 | nextCTable: CTable_MatchLength, MLFSELog, type: (symbolEncodingType_e)stats.MLtype, |
2458 | count: countWorkspace, max, codeTable: mlCodeTable, nbSeq, |
2459 | defaultNorm: ML_defaultNorm, defaultNormLog: ML_defaultNormLog, MaxML, |
2460 | prevCTable: prevEntropy->matchlengthCTable, |
2461 | prevCTableSize: sizeof(prevEntropy->matchlengthCTable), |
2462 | entropyWorkspace, entropyWorkspaceSize: entropyWkspSize); |
2463 | if (ZSTD_isError(code: countSize)) { |
2464 | DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed" ); |
2465 | stats.size = countSize; |
2466 | return stats; |
2467 | } |
2468 | if (stats.MLtype == set_compressed) |
2469 | stats.lastCountSize = countSize; |
2470 | op += countSize; |
2471 | assert(op <= oend); |
2472 | } } |
2473 | stats.size = (size_t)(op-ostart); |
2474 | return stats; |
2475 | } |
2476 | |
2477 | /* ZSTD_entropyCompressSeqStore_internal(): |
2478 | * compresses both literals and sequences |
2479 | * Returns compressed size of block, or a zstd error. |
2480 | */ |
2481 | #define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 |
2482 | MEM_STATIC size_t |
2483 | ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, |
2484 | const ZSTD_entropyCTables_t* prevEntropy, |
2485 | ZSTD_entropyCTables_t* nextEntropy, |
2486 | const ZSTD_CCtx_params* cctxParams, |
2487 | void* dst, size_t dstCapacity, |
2488 | void* entropyWorkspace, size_t entropyWkspSize, |
2489 | const int bmi2) |
2490 | { |
2491 | const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; |
2492 | ZSTD_strategy const strategy = cctxParams->cParams.strategy; |
2493 | unsigned* count = (unsigned*)entropyWorkspace; |
2494 | FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; |
2495 | FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; |
2496 | FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; |
2497 | const seqDef* const sequences = seqStorePtr->sequencesStart; |
2498 | const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; |
2499 | const BYTE* const ofCodeTable = seqStorePtr->ofCode; |
2500 | const BYTE* const llCodeTable = seqStorePtr->llCode; |
2501 | const BYTE* const mlCodeTable = seqStorePtr->mlCode; |
2502 | BYTE* const ostart = (BYTE*)dst; |
2503 | BYTE* const oend = ostart + dstCapacity; |
2504 | BYTE* op = ostart; |
2505 | size_t lastCountSize; |
2506 | |
2507 | entropyWorkspace = count + (MaxSeq + 1); |
2508 | entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); |
2509 | |
2510 | DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)" , nbSeq); |
2511 | ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); |
2512 | assert(entropyWkspSize >= HUF_WORKSPACE_SIZE); |
2513 | |
2514 | /* Compress literals */ |
2515 | { const BYTE* const literals = seqStorePtr->litStart; |
2516 | size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart; |
2517 | size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart; |
2518 | /* Base suspicion of uncompressibility on ratio of literals to sequences */ |
2519 | unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); |
2520 | size_t const litSize = (size_t)(seqStorePtr->lit - literals); |
2521 | size_t const cSize = ZSTD_compressLiterals( |
2522 | prevHuf: &prevEntropy->huf, nextHuf: &nextEntropy->huf, |
2523 | strategy: cctxParams->cParams.strategy, |
2524 | disableLiteralCompression: ZSTD_literalsCompressionIsDisabled(cctxParams), |
2525 | dst: op, dstCapacity, |
2526 | src: literals, srcSize: litSize, |
2527 | entropyWorkspace, entropyWorkspaceSize: entropyWkspSize, |
2528 | bmi2, suspectUncompressible); |
2529 | FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed" ); |
2530 | assert(cSize <= dstCapacity); |
2531 | op += cSize; |
2532 | } |
2533 | |
2534 | /* Sequences Header */ |
2535 | RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, |
2536 | dstSize_tooSmall, "Can't fit seq hdr in output buf!" ); |
2537 | if (nbSeq < 128) { |
2538 | *op++ = (BYTE)nbSeq; |
2539 | } else if (nbSeq < LONGNBSEQ) { |
2540 | op[0] = (BYTE)((nbSeq>>8) + 0x80); |
2541 | op[1] = (BYTE)nbSeq; |
2542 | op+=2; |
2543 | } else { |
2544 | op[0]=0xFF; |
2545 | MEM_writeLE16(memPtr: op+1, val: (U16)(nbSeq - LONGNBSEQ)); |
2546 | op+=3; |
2547 | } |
2548 | assert(op <= oend); |
2549 | if (nbSeq==0) { |
2550 | /* Copy the old tables over as if we repeated them */ |
2551 | ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); |
2552 | return (size_t)(op - ostart); |
2553 | } |
2554 | { |
2555 | ZSTD_symbolEncodingTypeStats_t stats; |
2556 | BYTE* seqHead = op++; |
2557 | /* build stats for sequences */ |
2558 | stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, |
2559 | prevEntropy: &prevEntropy->fse, nextEntropy: &nextEntropy->fse, |
2560 | dst: op, dstEnd: oend, |
2561 | strategy, countWorkspace: count, |
2562 | entropyWorkspace, entropyWkspSize); |
2563 | FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!" ); |
2564 | *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); |
2565 | lastCountSize = stats.lastCountSize; |
2566 | op += stats.size; |
2567 | } |
2568 | |
2569 | { size_t const bitstreamSize = ZSTD_encodeSequences( |
2570 | dst: op, dstCapacity: (size_t)(oend - op), |
2571 | CTable_MatchLength, mlCodeTable, |
2572 | CTable_OffsetBits, ofCodeTable, |
2573 | CTable_LitLength, llCodeTable, |
2574 | sequences, nbSeq, |
2575 | longOffsets, bmi2); |
2576 | FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed" ); |
2577 | op += bitstreamSize; |
2578 | assert(op <= oend); |
2579 | /* zstd versions <= 1.3.4 mistakenly report corruption when |
2580 | * FSE_readNCount() receives a buffer < 4 bytes. |
2581 | * Fixed by https://github.com/facebook/zstd/pull/1146. |
2582 | * This can happen when the last set_compressed table present is 2 |
2583 | * bytes and the bitstream is only one byte. |
2584 | * In this exceedingly rare case, we will simply emit an uncompressed |
2585 | * block, since it isn't worth optimizing. |
2586 | */ |
2587 | if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { |
2588 | /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ |
2589 | assert(lastCountSize + bitstreamSize == 3); |
2590 | DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " |
2591 | "emitting an uncompressed block." ); |
2592 | return 0; |
2593 | } |
2594 | } |
2595 | |
2596 | DEBUGLOG(5, "compressed block size : %u" , (unsigned)(op - ostart)); |
2597 | return (size_t)(op - ostart); |
2598 | } |
2599 | |
2600 | MEM_STATIC size_t |
2601 | ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, |
2602 | const ZSTD_entropyCTables_t* prevEntropy, |
2603 | ZSTD_entropyCTables_t* nextEntropy, |
2604 | const ZSTD_CCtx_params* cctxParams, |
2605 | void* dst, size_t dstCapacity, |
2606 | size_t srcSize, |
2607 | void* entropyWorkspace, size_t entropyWkspSize, |
2608 | int bmi2) |
2609 | { |
2610 | size_t const cSize = ZSTD_entropyCompressSeqStore_internal( |
2611 | seqStorePtr, prevEntropy, nextEntropy, cctxParams, |
2612 | dst, dstCapacity, |
2613 | entropyWorkspace, entropyWkspSize, bmi2); |
2614 | if (cSize == 0) return 0; |
2615 | /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. |
2616 | * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. |
2617 | */ |
2618 | if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) |
2619 | return 0; /* block not compressed */ |
2620 | FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed" ); |
2621 | |
2622 | /* Check compressibility */ |
2623 | { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, strat: cctxParams->cParams.strategy); |
2624 | if (cSize >= maxCSize) return 0; /* block not compressed */ |
2625 | } |
2626 | DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu" , cSize); |
2627 | return cSize; |
2628 | } |
2629 | |
2630 | /* ZSTD_selectBlockCompressor() : |
2631 | * Not static, but internal use only (used by long distance matcher) |
2632 | * assumption : strat is a valid strategy */ |
2633 | ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) |
2634 | { |
2635 | static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { |
2636 | { ZSTD_compressBlock_fast /* default for 0 */, |
2637 | ZSTD_compressBlock_fast, |
2638 | ZSTD_compressBlock_doubleFast, |
2639 | ZSTD_compressBlock_greedy, |
2640 | ZSTD_compressBlock_lazy, |
2641 | ZSTD_compressBlock_lazy2, |
2642 | ZSTD_compressBlock_btlazy2, |
2643 | ZSTD_compressBlock_btopt, |
2644 | ZSTD_compressBlock_btultra, |
2645 | ZSTD_compressBlock_btultra2 }, |
2646 | { ZSTD_compressBlock_fast_extDict /* default for 0 */, |
2647 | ZSTD_compressBlock_fast_extDict, |
2648 | ZSTD_compressBlock_doubleFast_extDict, |
2649 | ZSTD_compressBlock_greedy_extDict, |
2650 | ZSTD_compressBlock_lazy_extDict, |
2651 | ZSTD_compressBlock_lazy2_extDict, |
2652 | ZSTD_compressBlock_btlazy2_extDict, |
2653 | ZSTD_compressBlock_btopt_extDict, |
2654 | ZSTD_compressBlock_btultra_extDict, |
2655 | ZSTD_compressBlock_btultra_extDict }, |
2656 | { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, |
2657 | ZSTD_compressBlock_fast_dictMatchState, |
2658 | ZSTD_compressBlock_doubleFast_dictMatchState, |
2659 | ZSTD_compressBlock_greedy_dictMatchState, |
2660 | ZSTD_compressBlock_lazy_dictMatchState, |
2661 | ZSTD_compressBlock_lazy2_dictMatchState, |
2662 | ZSTD_compressBlock_btlazy2_dictMatchState, |
2663 | ZSTD_compressBlock_btopt_dictMatchState, |
2664 | ZSTD_compressBlock_btultra_dictMatchState, |
2665 | ZSTD_compressBlock_btultra_dictMatchState }, |
2666 | { NULL /* default for 0 */, |
2667 | NULL, |
2668 | NULL, |
2669 | ZSTD_compressBlock_greedy_dedicatedDictSearch, |
2670 | ZSTD_compressBlock_lazy_dedicatedDictSearch, |
2671 | ZSTD_compressBlock_lazy2_dedicatedDictSearch, |
2672 | NULL, |
2673 | NULL, |
2674 | NULL, |
2675 | NULL } |
2676 | }; |
2677 | ZSTD_blockCompressor selectedCompressor; |
2678 | ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); |
2679 | |
2680 | assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); |
2681 | DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d" , (int)dictMode, (int)strat, (int)useRowMatchFinder); |
2682 | if (ZSTD_rowMatchFinderUsed(strategy: strat, mode: useRowMatchFinder)) { |
2683 | static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { |
2684 | { ZSTD_compressBlock_greedy_row, |
2685 | ZSTD_compressBlock_lazy_row, |
2686 | ZSTD_compressBlock_lazy2_row }, |
2687 | { ZSTD_compressBlock_greedy_extDict_row, |
2688 | ZSTD_compressBlock_lazy_extDict_row, |
2689 | ZSTD_compressBlock_lazy2_extDict_row }, |
2690 | { ZSTD_compressBlock_greedy_dictMatchState_row, |
2691 | ZSTD_compressBlock_lazy_dictMatchState_row, |
2692 | ZSTD_compressBlock_lazy2_dictMatchState_row }, |
2693 | { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, |
2694 | ZSTD_compressBlock_lazy_dedicatedDictSearch_row, |
2695 | ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } |
2696 | }; |
2697 | DEBUGLOG(4, "Selecting a row-based matchfinder" ); |
2698 | assert(useRowMatchFinder != ZSTD_ps_auto); |
2699 | selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; |
2700 | } else { |
2701 | selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; |
2702 | } |
2703 | assert(selectedCompressor != NULL); |
2704 | return selectedCompressor; |
2705 | } |
2706 | |
2707 | static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, |
2708 | const BYTE* anchor, size_t lastLLSize) |
2709 | { |
2710 | ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); |
2711 | seqStorePtr->lit += lastLLSize; |
2712 | } |
2713 | |
2714 | void ZSTD_resetSeqStore(seqStore_t* ssPtr) |
2715 | { |
2716 | ssPtr->lit = ssPtr->litStart; |
2717 | ssPtr->sequences = ssPtr->sequencesStart; |
2718 | ssPtr->longLengthType = ZSTD_llt_none; |
2719 | } |
2720 | |
2721 | typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; |
2722 | |
2723 | static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) |
2724 | { |
2725 | ZSTD_matchState_t* const ms = &zc->blockState.matchState; |
2726 | DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)" , srcSize); |
2727 | assert(srcSize <= ZSTD_BLOCKSIZE_MAX); |
2728 | /* Assert that we have correctly flushed the ctx params into the ms's copy */ |
2729 | ZSTD_assertEqualCParams(cParams1: zc->appliedParams.cParams, cParams2: ms->cParams); |
2730 | if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { |
2731 | if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { |
2732 | ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore: &zc->externSeqStore, nbBytes: srcSize); |
2733 | } else { |
2734 | ZSTD_ldm_skipSequences(rawSeqStore: &zc->externSeqStore, srcSize, minMatch: zc->appliedParams.cParams.minMatch); |
2735 | } |
2736 | return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ |
2737 | } |
2738 | ZSTD_resetSeqStore(ssPtr: &(zc->seqStore)); |
2739 | /* required for optimal parser to read stats from dictionary */ |
2740 | ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; |
2741 | /* tell the optimal parser how we expect to compress literals */ |
2742 | ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; |
2743 | /* a gap between an attached dict and the current window is not safe, |
2744 | * they must remain adjacent, |
2745 | * and when that stops being the case, the dict must be unset */ |
2746 | assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); |
2747 | |
2748 | /* limited update after a very long match */ |
2749 | { const BYTE* const base = ms->window.base; |
2750 | const BYTE* const istart = (const BYTE*)src; |
2751 | const U32 curr = (U32)(istart-base); |
2752 | if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ |
2753 | if (curr > ms->nextToUpdate + 384) |
2754 | ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); |
2755 | } |
2756 | |
2757 | /* select and store sequences */ |
2758 | { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); |
2759 | size_t lastLLSize; |
2760 | { int i; |
2761 | for (i = 0; i < ZSTD_REP_NUM; ++i) |
2762 | zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; |
2763 | } |
2764 | if (zc->externSeqStore.pos < zc->externSeqStore.size) { |
2765 | assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); |
2766 | /* Updates ldmSeqStore.pos */ |
2767 | lastLLSize = |
2768 | ZSTD_ldm_blockCompress(rawSeqStore: &zc->externSeqStore, |
2769 | ms, seqStore: &zc->seqStore, |
2770 | rep: zc->blockState.nextCBlock->rep, |
2771 | useRowMatchFinder: zc->appliedParams.useRowMatchFinder, |
2772 | src, srcSize); |
2773 | assert(zc->externSeqStore.pos <= zc->externSeqStore.size); |
2774 | } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { |
2775 | rawSeqStore_t ldmSeqStore = kNullRawSeqStore; |
2776 | |
2777 | ldmSeqStore.seq = zc->ldmSequences; |
2778 | ldmSeqStore.capacity = zc->maxNbLdmSequences; |
2779 | /* Updates ldmSeqStore.size */ |
2780 | FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, |
2781 | &zc->appliedParams.ldmParams, |
2782 | src, srcSize), "" ); |
2783 | /* Updates ldmSeqStore.pos */ |
2784 | lastLLSize = |
2785 | ZSTD_ldm_blockCompress(rawSeqStore: &ldmSeqStore, |
2786 | ms, seqStore: &zc->seqStore, |
2787 | rep: zc->blockState.nextCBlock->rep, |
2788 | useRowMatchFinder: zc->appliedParams.useRowMatchFinder, |
2789 | src, srcSize); |
2790 | assert(ldmSeqStore.pos == ldmSeqStore.size); |
2791 | } else { /* not long range mode */ |
2792 | ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(strat: zc->appliedParams.cParams.strategy, |
2793 | useRowMatchFinder: zc->appliedParams.useRowMatchFinder, |
2794 | dictMode); |
2795 | ms->ldmSeqStore = NULL; |
2796 | lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); |
2797 | } |
2798 | { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; |
2799 | ZSTD_storeLastLiterals(seqStorePtr: &zc->seqStore, anchor: lastLiterals, lastLLSize); |
2800 | } } |
2801 | return ZSTDbss_compress; |
2802 | } |
2803 | |
2804 | static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) |
2805 | { |
2806 | const seqStore_t* seqStore = ZSTD_getSeqStore(ctx: zc); |
2807 | const seqDef* seqStoreSeqs = seqStore->sequencesStart; |
2808 | size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; |
2809 | size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); |
2810 | size_t literalsRead = 0; |
2811 | size_t lastLLSize; |
2812 | |
2813 | ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; |
2814 | size_t i; |
2815 | repcodes_t updatedRepcodes; |
2816 | |
2817 | assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); |
2818 | /* Ensure we have enough space for last literals "sequence" */ |
2819 | assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); |
2820 | ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); |
2821 | for (i = 0; i < seqStoreSeqSize; ++i) { |
2822 | U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM; |
2823 | outSeqs[i].litLength = seqStoreSeqs[i].litLength; |
2824 | outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH; |
2825 | outSeqs[i].rep = 0; |
2826 | |
2827 | if (i == seqStore->longLengthPos) { |
2828 | if (seqStore->longLengthType == ZSTD_llt_literalLength) { |
2829 | outSeqs[i].litLength += 0x10000; |
2830 | } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { |
2831 | outSeqs[i].matchLength += 0x10000; |
2832 | } |
2833 | } |
2834 | |
2835 | if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) { |
2836 | /* Derive the correct offset corresponding to a repcode */ |
2837 | outSeqs[i].rep = seqStoreSeqs[i].offBase; |
2838 | if (outSeqs[i].litLength != 0) { |
2839 | rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; |
2840 | } else { |
2841 | if (outSeqs[i].rep == 3) { |
2842 | rawOffset = updatedRepcodes.rep[0] - 1; |
2843 | } else { |
2844 | rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; |
2845 | } |
2846 | } |
2847 | } |
2848 | outSeqs[i].offset = rawOffset; |
2849 | /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode |
2850 | so we provide seqStoreSeqs[i].offset - 1 */ |
2851 | ZSTD_updateRep(rep: updatedRepcodes.rep, |
2852 | offBase_minus1: seqStoreSeqs[i].offBase - 1, |
2853 | ll0: seqStoreSeqs[i].litLength == 0); |
2854 | literalsRead += outSeqs[i].litLength; |
2855 | } |
2856 | /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. |
2857 | * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker |
2858 | * for the block boundary, according to the API. |
2859 | */ |
2860 | assert(seqStoreLiteralsSize >= literalsRead); |
2861 | lastLLSize = seqStoreLiteralsSize - literalsRead; |
2862 | outSeqs[i].litLength = (U32)lastLLSize; |
2863 | outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; |
2864 | seqStoreSeqSize++; |
2865 | zc->seqCollector.seqIndex += seqStoreSeqSize; |
2866 | } |
2867 | |
2868 | size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, |
2869 | size_t outSeqsSize, const void* src, size_t srcSize) |
2870 | { |
2871 | const size_t dstCapacity = ZSTD_compressBound(srcSize); |
2872 | void* dst = ZSTD_customMalloc(size: dstCapacity, customMem: ZSTD_defaultCMem); |
2873 | SeqCollector seqCollector; |
2874 | |
2875 | RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!" ); |
2876 | |
2877 | seqCollector.collectSequences = 1; |
2878 | seqCollector.seqStart = outSeqs; |
2879 | seqCollector.seqIndex = 0; |
2880 | seqCollector.maxSequences = outSeqsSize; |
2881 | zc->seqCollector = seqCollector; |
2882 | |
2883 | ZSTD_compress2(cctx: zc, dst, dstCapacity, src, srcSize); |
2884 | ZSTD_customFree(ptr: dst, customMem: ZSTD_defaultCMem); |
2885 | return zc->seqCollector.seqIndex; |
2886 | } |
2887 | |
2888 | size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { |
2889 | size_t in = 0; |
2890 | size_t out = 0; |
2891 | for (; in < seqsSize; ++in) { |
2892 | if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { |
2893 | if (in != seqsSize - 1) { |
2894 | sequences[in+1].litLength += sequences[in].litLength; |
2895 | } |
2896 | } else { |
2897 | sequences[out] = sequences[in]; |
2898 | ++out; |
2899 | } |
2900 | } |
2901 | return out; |
2902 | } |
2903 | |
2904 | /* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ |
2905 | static int ZSTD_isRLE(const BYTE* src, size_t length) { |
2906 | const BYTE* ip = src; |
2907 | const BYTE value = ip[0]; |
2908 | const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); |
2909 | const size_t unrollSize = sizeof(size_t) * 4; |
2910 | const size_t unrollMask = unrollSize - 1; |
2911 | const size_t prefixLength = length & unrollMask; |
2912 | size_t i; |
2913 | size_t u; |
2914 | if (length == 1) return 1; |
2915 | /* Check if prefix is RLE first before using unrolled loop */ |
2916 | if (prefixLength && ZSTD_count(pIn: ip+1, pMatch: ip, pInLimit: ip+prefixLength) != prefixLength-1) { |
2917 | return 0; |
2918 | } |
2919 | for (i = prefixLength; i != length; i += unrollSize) { |
2920 | for (u = 0; u < unrollSize; u += sizeof(size_t)) { |
2921 | if (MEM_readST(memPtr: ip + i + u) != valueST) { |
2922 | return 0; |
2923 | } |
2924 | } |
2925 | } |
2926 | return 1; |
2927 | } |
2928 | |
2929 | /* Returns true if the given block may be RLE. |
2930 | * This is just a heuristic based on the compressibility. |
2931 | * It may return both false positives and false negatives. |
2932 | */ |
2933 | static int ZSTD_maybeRLE(seqStore_t const* seqStore) |
2934 | { |
2935 | size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); |
2936 | size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); |
2937 | |
2938 | return nbSeqs < 4 && nbLits < 10; |
2939 | } |
2940 | |
2941 | static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) |
2942 | { |
2943 | ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; |
2944 | bs->prevCBlock = bs->nextCBlock; |
2945 | bs->nextCBlock = tmp; |
2946 | } |
2947 | |
2948 | /* Writes the block header */ |
2949 | static void (void* op, size_t cSize, size_t blockSize, U32 lastBlock) { |
2950 | U32 const = cSize == 1 ? |
2951 | lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : |
2952 | lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); |
2953 | MEM_writeLE24(memPtr: op, val: cBlockHeader); |
2954 | DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u" , cSize, blockSize, lastBlock); |
2955 | } |
2956 | |
2957 | /* ZSTD_buildBlockEntropyStats_literals() : |
2958 | * Builds entropy for the literals. |
2959 | * Stores literals block type (raw, rle, compressed, repeat) and |
2960 | * huffman description table to hufMetadata. |
2961 | * Requires ENTROPY_WORKSPACE_SIZE workspace |
2962 | * @return : size of huffman description table or error code */ |
2963 | static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, |
2964 | const ZSTD_hufCTables_t* prevHuf, |
2965 | ZSTD_hufCTables_t* nextHuf, |
2966 | ZSTD_hufCTablesMetadata_t* hufMetadata, |
2967 | const int literalsCompressionIsDisabled, |
2968 | void* workspace, size_t wkspSize) |
2969 | { |
2970 | BYTE* const wkspStart = (BYTE*)workspace; |
2971 | BYTE* const wkspEnd = wkspStart + wkspSize; |
2972 | BYTE* const countWkspStart = wkspStart; |
2973 | unsigned* const countWksp = (unsigned*)workspace; |
2974 | const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); |
2975 | BYTE* const nodeWksp = countWkspStart + countWkspSize; |
2976 | const size_t nodeWkspSize = wkspEnd-nodeWksp; |
2977 | unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
2978 | unsigned huffLog = HUF_TABLELOG_DEFAULT; |
2979 | HUF_repeat repeat = prevHuf->repeatMode; |
2980 | DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)" , srcSize); |
2981 | |
2982 | /* Prepare nextEntropy assuming reusing the existing table */ |
2983 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
2984 | |
2985 | if (literalsCompressionIsDisabled) { |
2986 | DEBUGLOG(5, "set_basic - disabled" ); |
2987 | hufMetadata->hType = set_basic; |
2988 | return 0; |
2989 | } |
2990 | |
2991 | /* small ? don't even attempt compression (speed opt) */ |
2992 | #ifndef COMPRESS_LITERALS_SIZE_MIN |
2993 | #define COMPRESS_LITERALS_SIZE_MIN 63 |
2994 | #endif |
2995 | { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; |
2996 | if (srcSize <= minLitSize) { |
2997 | DEBUGLOG(5, "set_basic - too small" ); |
2998 | hufMetadata->hType = set_basic; |
2999 | return 0; |
3000 | } |
3001 | } |
3002 | |
3003 | /* Scan input and build symbol stats */ |
3004 | { size_t const largest = HIST_count_wksp (count: countWksp, maxSymbolValuePtr: &maxSymbolValue, src: (const BYTE*)src, srcSize, workSpace: workspace, workSpaceSize: wkspSize); |
3005 | FORWARD_IF_ERROR(largest, "HIST_count_wksp failed" ); |
3006 | if (largest == srcSize) { |
3007 | DEBUGLOG(5, "set_rle" ); |
3008 | hufMetadata->hType = set_rle; |
3009 | return 0; |
3010 | } |
3011 | if (largest <= (srcSize >> 7)+4) { |
3012 | DEBUGLOG(5, "set_basic - no gain" ); |
3013 | hufMetadata->hType = set_basic; |
3014 | return 0; |
3015 | } |
3016 | } |
3017 | |
3018 | /* Validate the previous Huffman table */ |
3019 | if (repeat == HUF_repeat_check && !HUF_validateCTable(CTable: (HUF_CElt const*)prevHuf->CTable, count: countWksp, maxSymbolValue)) { |
3020 | repeat = HUF_repeat_none; |
3021 | } |
3022 | |
3023 | /* Build Huffman Tree */ |
3024 | ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); |
3025 | huffLog = HUF_optimalTableLog(maxTableLog: huffLog, srcSize, maxSymbolValue); |
3026 | { size_t const maxBits = HUF_buildCTable_wksp(tree: (HUF_CElt*)nextHuf->CTable, count: countWksp, |
3027 | maxSymbolValue, maxNbBits: huffLog, |
3028 | workSpace: nodeWksp, wkspSize: nodeWkspSize); |
3029 | FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp" ); |
3030 | huffLog = (U32)maxBits; |
3031 | { /* Build and write the CTable */ |
3032 | size_t const newCSize = HUF_estimateCompressedSize( |
3033 | CTable: (HUF_CElt*)nextHuf->CTable, count: countWksp, maxSymbolValue); |
3034 | size_t const hSize = HUF_writeCTable_wksp( |
3035 | dst: hufMetadata->hufDesBuffer, maxDstSize: sizeof(hufMetadata->hufDesBuffer), |
3036 | CTable: (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, |
3037 | workspace: nodeWksp, workspaceSize: nodeWkspSize); |
3038 | /* Check against repeating the previous CTable */ |
3039 | if (repeat != HUF_repeat_none) { |
3040 | size_t const oldCSize = HUF_estimateCompressedSize( |
3041 | CTable: (HUF_CElt const*)prevHuf->CTable, count: countWksp, maxSymbolValue); |
3042 | if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { |
3043 | DEBUGLOG(5, "set_repeat - smaller" ); |
3044 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
3045 | hufMetadata->hType = set_repeat; |
3046 | return 0; |
3047 | } |
3048 | } |
3049 | if (newCSize + hSize >= srcSize) { |
3050 | DEBUGLOG(5, "set_basic - no gains" ); |
3051 | ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); |
3052 | hufMetadata->hType = set_basic; |
3053 | return 0; |
3054 | } |
3055 | DEBUGLOG(5, "set_compressed (hSize=%u)" , (U32)hSize); |
3056 | hufMetadata->hType = set_compressed; |
3057 | nextHuf->repeatMode = HUF_repeat_check; |
3058 | return hSize; |
3059 | } |
3060 | } |
3061 | } |
3062 | |
3063 | |
3064 | /* ZSTD_buildDummySequencesStatistics(): |
3065 | * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, |
3066 | * and updates nextEntropy to the appropriate repeatMode. |
3067 | */ |
3068 | static ZSTD_symbolEncodingTypeStats_t |
3069 | ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { |
3070 | ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; |
3071 | nextEntropy->litlength_repeatMode = FSE_repeat_none; |
3072 | nextEntropy->offcode_repeatMode = FSE_repeat_none; |
3073 | nextEntropy->matchlength_repeatMode = FSE_repeat_none; |
3074 | return stats; |
3075 | } |
3076 | |
3077 | /* ZSTD_buildBlockEntropyStats_sequences() : |
3078 | * Builds entropy for the sequences. |
3079 | * Stores symbol compression modes and fse table to fseMetadata. |
3080 | * Requires ENTROPY_WORKSPACE_SIZE wksp. |
3081 | * @return : size of fse tables or error code */ |
3082 | static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, |
3083 | const ZSTD_fseCTables_t* prevEntropy, |
3084 | ZSTD_fseCTables_t* nextEntropy, |
3085 | const ZSTD_CCtx_params* cctxParams, |
3086 | ZSTD_fseCTablesMetadata_t* fseMetadata, |
3087 | void* workspace, size_t wkspSize) |
3088 | { |
3089 | ZSTD_strategy const strategy = cctxParams->cParams.strategy; |
3090 | size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; |
3091 | BYTE* const ostart = fseMetadata->fseTablesBuffer; |
3092 | BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); |
3093 | BYTE* op = ostart; |
3094 | unsigned* countWorkspace = (unsigned*)workspace; |
3095 | unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); |
3096 | size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); |
3097 | ZSTD_symbolEncodingTypeStats_t stats; |
3098 | |
3099 | DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)" , nbSeq); |
3100 | stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, |
3101 | prevEntropy, nextEntropy, dst: op, dstEnd: oend, |
3102 | strategy, countWorkspace, |
3103 | entropyWorkspace, entropyWkspSize: entropyWorkspaceSize) |
3104 | : ZSTD_buildDummySequencesStatistics(nextEntropy); |
3105 | FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!" ); |
3106 | fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; |
3107 | fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; |
3108 | fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; |
3109 | fseMetadata->lastCountSize = stats.lastCountSize; |
3110 | return stats.size; |
3111 | } |
3112 | |
3113 | |
3114 | /* ZSTD_buildBlockEntropyStats() : |
3115 | * Builds entropy for the block. |
3116 | * Requires workspace size ENTROPY_WORKSPACE_SIZE |
3117 | * |
3118 | * @return : 0 on success or error code |
3119 | */ |
3120 | size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, |
3121 | const ZSTD_entropyCTables_t* prevEntropy, |
3122 | ZSTD_entropyCTables_t* nextEntropy, |
3123 | const ZSTD_CCtx_params* cctxParams, |
3124 | ZSTD_entropyCTablesMetadata_t* entropyMetadata, |
3125 | void* workspace, size_t wkspSize) |
3126 | { |
3127 | size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; |
3128 | entropyMetadata->hufMetadata.hufDesSize = |
3129 | ZSTD_buildBlockEntropyStats_literals(src: seqStorePtr->litStart, srcSize: litSize, |
3130 | prevHuf: &prevEntropy->huf, nextHuf: &nextEntropy->huf, |
3131 | hufMetadata: &entropyMetadata->hufMetadata, |
3132 | literalsCompressionIsDisabled: ZSTD_literalsCompressionIsDisabled(cctxParams), |
3133 | workspace, wkspSize); |
3134 | FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed" ); |
3135 | entropyMetadata->fseMetadata.fseTablesSize = |
3136 | ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, |
3137 | prevEntropy: &prevEntropy->fse, nextEntropy: &nextEntropy->fse, |
3138 | cctxParams, |
3139 | fseMetadata: &entropyMetadata->fseMetadata, |
3140 | workspace, wkspSize); |
3141 | FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed" ); |
3142 | return 0; |
3143 | } |
3144 | |
3145 | /* Returns the size estimate for the literals section (header + content) of a block */ |
3146 | static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, |
3147 | const ZSTD_hufCTables_t* huf, |
3148 | const ZSTD_hufCTablesMetadata_t* hufMetadata, |
3149 | void* workspace, size_t wkspSize, |
3150 | int writeEntropy) |
3151 | { |
3152 | unsigned* const countWksp = (unsigned*)workspace; |
3153 | unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
3154 | size_t = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); |
3155 | U32 singleStream = litSize < 256; |
3156 | |
3157 | if (hufMetadata->hType == set_basic) return litSize; |
3158 | else if (hufMetadata->hType == set_rle) return 1; |
3159 | else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { |
3160 | size_t const largest = HIST_count_wksp (count: countWksp, maxSymbolValuePtr: &maxSymbolValue, src: (const BYTE*)literals, srcSize: litSize, workSpace: workspace, workSpaceSize: wkspSize); |
3161 | if (ZSTD_isError(code: largest)) return litSize; |
3162 | { size_t cLitSizeEstimate = HUF_estimateCompressedSize(CTable: (const HUF_CElt*)huf->CTable, count: countWksp, maxSymbolValue); |
3163 | if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; |
3164 | if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ |
3165 | return cLitSizeEstimate + literalSectionHeaderSize; |
3166 | } } |
3167 | assert(0); /* impossible */ |
3168 | return 0; |
3169 | } |
3170 | |
3171 | /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ |
3172 | static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, |
3173 | const BYTE* codeTable, size_t nbSeq, unsigned maxCode, |
3174 | const FSE_CTable* fseCTable, |
3175 | const U8* additionalBits, |
3176 | short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, |
3177 | void* workspace, size_t wkspSize) |
3178 | { |
3179 | unsigned* const countWksp = (unsigned*)workspace; |
3180 | const BYTE* ctp = codeTable; |
3181 | const BYTE* const ctStart = ctp; |
3182 | const BYTE* const ctEnd = ctStart + nbSeq; |
3183 | size_t cSymbolTypeSizeEstimateInBits = 0; |
3184 | unsigned max = maxCode; |
3185 | |
3186 | HIST_countFast_wksp(count: countWksp, maxSymbolValuePtr: &max, src: codeTable, srcSize: nbSeq, workSpace: workspace, workSpaceSize: wkspSize); /* can't fail */ |
3187 | if (type == set_basic) { |
3188 | /* We selected this encoding type, so it must be valid. */ |
3189 | assert(max <= defaultMax); |
3190 | (void)defaultMax; |
3191 | cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(norm: defaultNorm, accuracyLog: defaultNormLog, count: countWksp, max); |
3192 | } else if (type == set_rle) { |
3193 | cSymbolTypeSizeEstimateInBits = 0; |
3194 | } else if (type == set_compressed || type == set_repeat) { |
3195 | cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(ctable: fseCTable, count: countWksp, max); |
3196 | } |
3197 | if (ZSTD_isError(code: cSymbolTypeSizeEstimateInBits)) { |
3198 | return nbSeq * 10; |
3199 | } |
3200 | while (ctp < ctEnd) { |
3201 | if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; |
3202 | else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ |
3203 | ctp++; |
3204 | } |
3205 | return cSymbolTypeSizeEstimateInBits >> 3; |
3206 | } |
3207 | |
3208 | /* Returns the size estimate for the sequences section (header + content) of a block */ |
3209 | static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, |
3210 | const BYTE* llCodeTable, |
3211 | const BYTE* mlCodeTable, |
3212 | size_t nbSeq, |
3213 | const ZSTD_fseCTables_t* fseTables, |
3214 | const ZSTD_fseCTablesMetadata_t* fseMetadata, |
3215 | void* workspace, size_t wkspSize, |
3216 | int writeEntropy) |
3217 | { |
3218 | size_t = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); |
3219 | size_t cSeqSizeEstimate = 0; |
3220 | cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(type: fseMetadata->ofType, codeTable: ofCodeTable, nbSeq, MaxOff, |
3221 | fseCTable: fseTables->offcodeCTable, NULL, |
3222 | defaultNorm: OF_defaultNorm, defaultNormLog: OF_defaultNormLog, DefaultMaxOff, |
3223 | workspace, wkspSize); |
3224 | cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(type: fseMetadata->llType, codeTable: llCodeTable, nbSeq, MaxLL, |
3225 | fseCTable: fseTables->litlengthCTable, additionalBits: LL_bits, |
3226 | defaultNorm: LL_defaultNorm, defaultNormLog: LL_defaultNormLog, MaxLL, |
3227 | workspace, wkspSize); |
3228 | cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(type: fseMetadata->mlType, codeTable: mlCodeTable, nbSeq, MaxML, |
3229 | fseCTable: fseTables->matchlengthCTable, additionalBits: ML_bits, |
3230 | defaultNorm: ML_defaultNorm, defaultNormLog: ML_defaultNormLog, MaxML, |
3231 | workspace, wkspSize); |
3232 | if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; |
3233 | return cSeqSizeEstimate + sequencesSectionHeaderSize; |
3234 | } |
3235 | |
3236 | /* Returns the size estimate for a given stream of literals, of, ll, ml */ |
3237 | static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, |
3238 | const BYTE* ofCodeTable, |
3239 | const BYTE* llCodeTable, |
3240 | const BYTE* mlCodeTable, |
3241 | size_t nbSeq, |
3242 | const ZSTD_entropyCTables_t* entropy, |
3243 | const ZSTD_entropyCTablesMetadata_t* entropyMetadata, |
3244 | void* workspace, size_t wkspSize, |
3245 | int writeLitEntropy, int writeSeqEntropy) { |
3246 | size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, |
3247 | huf: &entropy->huf, hufMetadata: &entropyMetadata->hufMetadata, |
3248 | workspace, wkspSize, writeEntropy: writeLitEntropy); |
3249 | size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, |
3250 | nbSeq, fseTables: &entropy->fse, fseMetadata: &entropyMetadata->fseMetadata, |
3251 | workspace, wkspSize, writeEntropy: writeSeqEntropy); |
3252 | return seqSize + literalsSize + ZSTD_blockHeaderSize; |
3253 | } |
3254 | |
3255 | /* Builds entropy statistics and uses them for blocksize estimation. |
3256 | * |
3257 | * Returns the estimated compressed size of the seqStore, or a zstd error. |
3258 | */ |
3259 | static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) { |
3260 | ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata; |
3261 | DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()" ); |
3262 | FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, |
3263 | &zc->blockState.prevCBlock->entropy, |
3264 | &zc->blockState.nextCBlock->entropy, |
3265 | &zc->appliedParams, |
3266 | entropyMetadata, |
3267 | zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "" ); |
3268 | return ZSTD_estimateBlockSize(literals: seqStore->litStart, litSize: (size_t)(seqStore->lit - seqStore->litStart), |
3269 | ofCodeTable: seqStore->ofCode, llCodeTable: seqStore->llCode, mlCodeTable: seqStore->mlCode, |
3270 | nbSeq: (size_t)(seqStore->sequences - seqStore->sequencesStart), |
3271 | entropy: &zc->blockState.nextCBlock->entropy, entropyMetadata, workspace: zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, |
3272 | writeLitEntropy: (int)(entropyMetadata->hufMetadata.hType == set_compressed), writeSeqEntropy: 1); |
3273 | } |
3274 | |
3275 | /* Returns literals bytes represented in a seqStore */ |
3276 | static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { |
3277 | size_t literalsBytes = 0; |
3278 | size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; |
3279 | size_t i; |
3280 | for (i = 0; i < nbSeqs; ++i) { |
3281 | seqDef seq = seqStore->sequencesStart[i]; |
3282 | literalsBytes += seq.litLength; |
3283 | if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { |
3284 | literalsBytes += 0x10000; |
3285 | } |
3286 | } |
3287 | return literalsBytes; |
3288 | } |
3289 | |
3290 | /* Returns match bytes represented in a seqStore */ |
3291 | static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { |
3292 | size_t matchBytes = 0; |
3293 | size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; |
3294 | size_t i; |
3295 | for (i = 0; i < nbSeqs; ++i) { |
3296 | seqDef seq = seqStore->sequencesStart[i]; |
3297 | matchBytes += seq.mlBase + MINMATCH; |
3298 | if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { |
3299 | matchBytes += 0x10000; |
3300 | } |
3301 | } |
3302 | return matchBytes; |
3303 | } |
3304 | |
3305 | /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). |
3306 | * Stores the result in resultSeqStore. |
3307 | */ |
3308 | static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, |
3309 | const seqStore_t* originalSeqStore, |
3310 | size_t startIdx, size_t endIdx) { |
3311 | BYTE* const litEnd = originalSeqStore->lit; |
3312 | size_t literalsBytes; |
3313 | size_t literalsBytesPreceding = 0; |
3314 | |
3315 | *resultSeqStore = *originalSeqStore; |
3316 | if (startIdx > 0) { |
3317 | resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; |
3318 | literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(seqStore: resultSeqStore); |
3319 | } |
3320 | |
3321 | /* Move longLengthPos into the correct position if necessary */ |
3322 | if (originalSeqStore->longLengthType != ZSTD_llt_none) { |
3323 | if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { |
3324 | resultSeqStore->longLengthType = ZSTD_llt_none; |
3325 | } else { |
3326 | resultSeqStore->longLengthPos -= (U32)startIdx; |
3327 | } |
3328 | } |
3329 | resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; |
3330 | resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; |
3331 | literalsBytes = ZSTD_countSeqStoreLiteralsBytes(seqStore: resultSeqStore); |
3332 | resultSeqStore->litStart += literalsBytesPreceding; |
3333 | if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { |
3334 | /* This accounts for possible last literals if the derived chunk reaches the end of the block */ |
3335 | resultSeqStore->lit = litEnd; |
3336 | } else { |
3337 | resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; |
3338 | } |
3339 | resultSeqStore->llCode += startIdx; |
3340 | resultSeqStore->mlCode += startIdx; |
3341 | resultSeqStore->ofCode += startIdx; |
3342 | } |
3343 | |
3344 | /* |
3345 | * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. |
3346 | * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq(). |
3347 | */ |
3348 | static U32 |
3349 | ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) |
3350 | { |
3351 | U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0; /* [ 0 - 3 ] */ |
3352 | assert(STORED_IS_REPCODE(offCode)); |
3353 | if (adjustedOffCode == ZSTD_REP_NUM) { |
3354 | /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ |
3355 | assert(rep[0] > 0); |
3356 | return rep[0] - 1; |
3357 | } |
3358 | return rep[adjustedOffCode]; |
3359 | } |
3360 | |
3361 | /* |
3362 | * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise |
3363 | * due to emission of RLE/raw blocks that disturb the offset history, |
3364 | * and replaces any repcodes within the seqStore that may be invalid. |
3365 | * |
3366 | * dRepcodes are updated as would be on the decompression side. |
3367 | * cRepcodes are updated exactly in accordance with the seqStore. |
3368 | * |
3369 | * Note : this function assumes seq->offBase respects the following numbering scheme : |
3370 | * 0 : invalid |
3371 | * 1-3 : repcode 1-3 |
3372 | * 4+ : real_offset+3 |
3373 | */ |
3374 | static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, |
3375 | seqStore_t* const seqStore, U32 const nbSeq) { |
3376 | U32 idx = 0; |
3377 | for (; idx < nbSeq; ++idx) { |
3378 | seqDef* const seq = seqStore->sequencesStart + idx; |
3379 | U32 const ll0 = (seq->litLength == 0); |
3380 | U32 const offCode = OFFBASE_TO_STORED(seq->offBase); |
3381 | assert(seq->offBase > 0); |
3382 | if (STORED_IS_REPCODE(offCode)) { |
3383 | U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(rep: dRepcodes->rep, offCode, ll0); |
3384 | U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(rep: cRepcodes->rep, offCode, ll0); |
3385 | /* Adjust simulated decompression repcode history if we come across a mismatch. Replace |
3386 | * the repcode with the offset it actually references, determined by the compression |
3387 | * repcode history. |
3388 | */ |
3389 | if (dRawOffset != cRawOffset) { |
3390 | seq->offBase = cRawOffset + ZSTD_REP_NUM; |
3391 | } |
3392 | } |
3393 | /* Compression repcode history is always updated with values directly from the unmodified seqStore. |
3394 | * Decompression repcode history may use modified seq->offset value taken from compression repcode history. |
3395 | */ |
3396 | ZSTD_updateRep(rep: dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0); |
3397 | ZSTD_updateRep(rep: cRepcodes->rep, offBase_minus1: offCode, ll0); |
3398 | } |
3399 | } |
3400 | |
3401 | /* ZSTD_compressSeqStore_singleBlock(): |
3402 | * Compresses a seqStore into a block with a block header, into the buffer dst. |
3403 | * |
3404 | * Returns the total size of that block (including header) or a ZSTD error code. |
3405 | */ |
3406 | static size_t |
3407 | ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, |
3408 | repcodes_t* const dRep, repcodes_t* const cRep, |
3409 | void* dst, size_t dstCapacity, |
3410 | const void* src, size_t srcSize, |
3411 | U32 lastBlock, U32 isPartition) |
3412 | { |
3413 | const U32 rleMaxLength = 25; |
3414 | BYTE* op = (BYTE*)dst; |
3415 | const BYTE* ip = (const BYTE*)src; |
3416 | size_t cSize; |
3417 | size_t cSeqsSize; |
3418 | |
3419 | /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ |
3420 | repcodes_t const dRepOriginal = *dRep; |
3421 | DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock" ); |
3422 | if (isPartition) |
3423 | ZSTD_seqStore_resolveOffCodes(dRepcodes: dRep, cRepcodes: cRep, seqStore, nbSeq: (U32)(seqStore->sequences - seqStore->sequencesStart)); |
3424 | |
3425 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit" ); |
3426 | cSeqsSize = ZSTD_entropyCompressSeqStore(seqStorePtr: seqStore, |
3427 | prevEntropy: &zc->blockState.prevCBlock->entropy, nextEntropy: &zc->blockState.nextCBlock->entropy, |
3428 | cctxParams: &zc->appliedParams, |
3429 | dst: op + ZSTD_blockHeaderSize, dstCapacity: dstCapacity - ZSTD_blockHeaderSize, |
3430 | srcSize, |
3431 | entropyWorkspace: zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, |
3432 | bmi2: zc->bmi2); |
3433 | FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!" ); |
3434 | |
3435 | if (!zc->isFirstBlock && |
3436 | cSeqsSize < rleMaxLength && |
3437 | ZSTD_isRLE(src: (BYTE const*)src, length: srcSize)) { |
3438 | /* We don't want to emit our first block as a RLE even if it qualifies because |
3439 | * doing so will cause the decoder (cli only) to throw a "should consume all input error." |
3440 | * This is only an issue for zstd <= v1.4.3 |
3441 | */ |
3442 | cSeqsSize = 1; |
3443 | } |
3444 | |
3445 | if (zc->seqCollector.collectSequences) { |
3446 | ZSTD_copyBlockSequences(zc); |
3447 | ZSTD_blockState_confirmRepcodesAndEntropyTables(bs: &zc->blockState); |
3448 | return 0; |
3449 | } |
3450 | |
3451 | if (cSeqsSize == 0) { |
3452 | cSize = ZSTD_noCompressBlock(dst: op, dstCapacity, src: ip, srcSize, lastBlock); |
3453 | FORWARD_IF_ERROR(cSize, "Nocompress block failed" ); |
3454 | DEBUGLOG(4, "Writing out nocompress block, size: %zu" , cSize); |
3455 | *dRep = dRepOriginal; /* reset simulated decompression repcode history */ |
3456 | } else if (cSeqsSize == 1) { |
3457 | cSize = ZSTD_rleCompressBlock(dst: op, dstCapacity, src: *ip, srcSize, lastBlock); |
3458 | FORWARD_IF_ERROR(cSize, "RLE compress block failed" ); |
3459 | DEBUGLOG(4, "Writing out RLE block, size: %zu" , cSize); |
3460 | *dRep = dRepOriginal; /* reset simulated decompression repcode history */ |
3461 | } else { |
3462 | ZSTD_blockState_confirmRepcodesAndEntropyTables(bs: &zc->blockState); |
3463 | writeBlockHeader(op, cSize: cSeqsSize, blockSize: srcSize, lastBlock); |
3464 | cSize = ZSTD_blockHeaderSize + cSeqsSize; |
3465 | DEBUGLOG(4, "Writing out compressed block, size: %zu" , cSize); |
3466 | } |
3467 | |
3468 | if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) |
3469 | zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; |
3470 | |
3471 | return cSize; |
3472 | } |
3473 | |
3474 | /* Struct to keep track of where we are in our recursive calls. */ |
3475 | typedef struct { |
3476 | U32* splitLocations; /* Array of split indices */ |
3477 | size_t idx; /* The current index within splitLocations being worked on */ |
3478 | } seqStoreSplits; |
3479 | |
3480 | #define MIN_SEQUENCES_BLOCK_SPLITTING 300 |
3481 | |
3482 | /* Helper function to perform the recursive search for block splits. |
3483 | * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. |
3484 | * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then |
3485 | * we do not recurse. |
3486 | * |
3487 | * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. |
3488 | * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). |
3489 | * In practice, recursion depth usually doesn't go beyond 4. |
3490 | * |
3491 | * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize |
3492 | * maximum of 128 KB, this value is actually impossible to reach. |
3493 | */ |
3494 | static void |
3495 | ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, |
3496 | ZSTD_CCtx* zc, const seqStore_t* origSeqStore) |
3497 | { |
3498 | seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; |
3499 | seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; |
3500 | seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; |
3501 | size_t estimatedOriginalSize; |
3502 | size_t estimatedFirstHalfSize; |
3503 | size_t estimatedSecondHalfSize; |
3504 | size_t midIdx = (startIdx + endIdx)/2; |
3505 | |
3506 | if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { |
3507 | DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences" ); |
3508 | return; |
3509 | } |
3510 | DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu" , startIdx, endIdx); |
3511 | ZSTD_deriveSeqStoreChunk(resultSeqStore: fullSeqStoreChunk, originalSeqStore: origSeqStore, startIdx, endIdx); |
3512 | ZSTD_deriveSeqStoreChunk(resultSeqStore: firstHalfSeqStore, originalSeqStore: origSeqStore, startIdx, endIdx: midIdx); |
3513 | ZSTD_deriveSeqStoreChunk(resultSeqStore: secondHalfSeqStore, originalSeqStore: origSeqStore, startIdx: midIdx, endIdx); |
3514 | estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore: fullSeqStoreChunk, zc); |
3515 | estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore: firstHalfSeqStore, zc); |
3516 | estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore: secondHalfSeqStore, zc); |
3517 | DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu" , |
3518 | estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); |
3519 | if (ZSTD_isError(code: estimatedOriginalSize) || ZSTD_isError(code: estimatedFirstHalfSize) || ZSTD_isError(code: estimatedSecondHalfSize)) { |
3520 | return; |
3521 | } |
3522 | if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { |
3523 | ZSTD_deriveBlockSplitsHelper(splits, startIdx, endIdx: midIdx, zc, origSeqStore); |
3524 | splits->splitLocations[splits->idx] = (U32)midIdx; |
3525 | splits->idx++; |
3526 | ZSTD_deriveBlockSplitsHelper(splits, startIdx: midIdx, endIdx, zc, origSeqStore); |
3527 | } |
3528 | } |
3529 | |
3530 | /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. |
3531 | * |
3532 | * Returns the number of splits made (which equals the size of the partition table - 1). |
3533 | */ |
3534 | static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { |
3535 | seqStoreSplits splits = {partitions, 0}; |
3536 | if (nbSeq <= 4) { |
3537 | DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split" ); |
3538 | /* Refuse to try and split anything with less than 4 sequences */ |
3539 | return 0; |
3540 | } |
3541 | ZSTD_deriveBlockSplitsHelper(splits: &splits, startIdx: 0, endIdx: nbSeq, zc, origSeqStore: &zc->seqStore); |
3542 | splits.splitLocations[splits.idx] = nbSeq; |
3543 | DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu" , splits.idx+1); |
3544 | return splits.idx; |
3545 | } |
3546 | |
3547 | /* ZSTD_compressBlock_splitBlock(): |
3548 | * Attempts to split a given block into multiple blocks to improve compression ratio. |
3549 | * |
3550 | * Returns combined size of all blocks (which includes headers), or a ZSTD error code. |
3551 | */ |
3552 | static size_t |
3553 | ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, |
3554 | const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) |
3555 | { |
3556 | size_t cSize = 0; |
3557 | const BYTE* ip = (const BYTE*)src; |
3558 | BYTE* op = (BYTE*)dst; |
3559 | size_t i = 0; |
3560 | size_t srcBytesTotal = 0; |
3561 | U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ |
3562 | seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore; |
3563 | seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore; |
3564 | size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); |
3565 | |
3566 | /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history |
3567 | * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two |
3568 | * separate repcode histories that simulate repcode history on compression and decompression side, |
3569 | * and use the histories to determine whether we must replace a particular repcode with its raw offset. |
3570 | * |
3571 | * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed |
3572 | * or RLE. This allows us to retrieve the offset value that an invalid repcode references within |
3573 | * a nocompress/RLE block. |
3574 | * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use |
3575 | * the replacement offset value rather than the original repcode to update the repcode history. |
3576 | * dRep also will be the final repcode history sent to the next block. |
3577 | * |
3578 | * See ZSTD_seqStore_resolveOffCodes() for more details. |
3579 | */ |
3580 | repcodes_t dRep; |
3581 | repcodes_t cRep; |
3582 | ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); |
3583 | ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); |
3584 | ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); |
3585 | |
3586 | DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)" , |
3587 | (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, |
3588 | (unsigned)zc->blockState.matchState.nextToUpdate); |
3589 | |
3590 | if (numSplits == 0) { |
3591 | size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, seqStore: &zc->seqStore, |
3592 | dRep: &dRep, cRep: &cRep, |
3593 | dst: op, dstCapacity, |
3594 | src: ip, srcSize: blockSize, |
3595 | lastBlock, isPartition: 0 /* isPartition */); |
3596 | FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!" ); |
3597 | DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits" ); |
3598 | assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); |
3599 | return cSizeSingleBlock; |
3600 | } |
3601 | |
3602 | ZSTD_deriveSeqStoreChunk(resultSeqStore: currSeqStore, originalSeqStore: &zc->seqStore, startIdx: 0, endIdx: partitions[0]); |
3603 | for (i = 0; i <= numSplits; ++i) { |
3604 | size_t srcBytes; |
3605 | size_t cSizeChunk; |
3606 | U32 const lastPartition = (i == numSplits); |
3607 | U32 lastBlockEntireSrc = 0; |
3608 | |
3609 | srcBytes = ZSTD_countSeqStoreLiteralsBytes(seqStore: currSeqStore) + ZSTD_countSeqStoreMatchBytes(seqStore: currSeqStore); |
3610 | srcBytesTotal += srcBytes; |
3611 | if (lastPartition) { |
3612 | /* This is the final partition, need to account for possible last literals */ |
3613 | srcBytes += blockSize - srcBytesTotal; |
3614 | lastBlockEntireSrc = lastBlock; |
3615 | } else { |
3616 | ZSTD_deriveSeqStoreChunk(resultSeqStore: nextSeqStore, originalSeqStore: &zc->seqStore, startIdx: partitions[i], endIdx: partitions[i+1]); |
3617 | } |
3618 | |
3619 | cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, seqStore: currSeqStore, |
3620 | dRep: &dRep, cRep: &cRep, |
3621 | dst: op, dstCapacity, |
3622 | src: ip, srcSize: srcBytes, |
3623 | lastBlock: lastBlockEntireSrc, isPartition: 1 /* isPartition */); |
3624 | DEBUGLOG(5, "Estimated size: %zu actual size: %zu" , ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); |
3625 | FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!" ); |
3626 | |
3627 | ip += srcBytes; |
3628 | op += cSizeChunk; |
3629 | dstCapacity -= cSizeChunk; |
3630 | cSize += cSizeChunk; |
3631 | *currSeqStore = *nextSeqStore; |
3632 | assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); |
3633 | } |
3634 | /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes |
3635 | * for the next block. |
3636 | */ |
3637 | ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); |
3638 | return cSize; |
3639 | } |
3640 | |
3641 | static size_t |
3642 | ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, |
3643 | void* dst, size_t dstCapacity, |
3644 | const void* src, size_t srcSize, U32 lastBlock) |
3645 | { |
3646 | const BYTE* ip = (const BYTE*)src; |
3647 | BYTE* op = (BYTE*)dst; |
3648 | U32 nbSeq; |
3649 | size_t cSize; |
3650 | DEBUGLOG(4, "ZSTD_compressBlock_splitBlock" ); |
3651 | assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); |
3652 | |
3653 | { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); |
3654 | FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed" ); |
3655 | if (bss == ZSTDbss_noCompress) { |
3656 | if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) |
3657 | zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; |
3658 | cSize = ZSTD_noCompressBlock(dst: op, dstCapacity, src: ip, srcSize, lastBlock); |
3659 | FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed" ); |
3660 | DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block" ); |
3661 | return cSize; |
3662 | } |
3663 | nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); |
3664 | } |
3665 | |
3666 | cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, blockSize: srcSize, lastBlock, nbSeq); |
3667 | FORWARD_IF_ERROR(cSize, "Splitting blocks failed!" ); |
3668 | return cSize; |
3669 | } |
3670 | |
3671 | static size_t |
3672 | ZSTD_compressBlock_internal(ZSTD_CCtx* zc, |
3673 | void* dst, size_t dstCapacity, |
3674 | const void* src, size_t srcSize, U32 frame) |
3675 | { |
3676 | /* This the upper bound for the length of an rle block. |
3677 | * This isn't the actual upper bound. Finding the real threshold |
3678 | * needs further investigation. |
3679 | */ |
3680 | const U32 rleMaxLength = 25; |
3681 | size_t cSize; |
3682 | const BYTE* ip = (const BYTE*)src; |
3683 | BYTE* op = (BYTE*)dst; |
3684 | DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)" , |
3685 | (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, |
3686 | (unsigned)zc->blockState.matchState.nextToUpdate); |
3687 | |
3688 | { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); |
3689 | FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed" ); |
3690 | if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } |
3691 | } |
3692 | |
3693 | if (zc->seqCollector.collectSequences) { |
3694 | ZSTD_copyBlockSequences(zc); |
3695 | ZSTD_blockState_confirmRepcodesAndEntropyTables(bs: &zc->blockState); |
3696 | return 0; |
3697 | } |
3698 | |
3699 | /* encode sequences and literals */ |
3700 | cSize = ZSTD_entropyCompressSeqStore(seqStorePtr: &zc->seqStore, |
3701 | prevEntropy: &zc->blockState.prevCBlock->entropy, nextEntropy: &zc->blockState.nextCBlock->entropy, |
3702 | cctxParams: &zc->appliedParams, |
3703 | dst, dstCapacity, |
3704 | srcSize, |
3705 | entropyWorkspace: zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, |
3706 | bmi2: zc->bmi2); |
3707 | |
3708 | if (frame && |
3709 | /* We don't want to emit our first block as a RLE even if it qualifies because |
3710 | * doing so will cause the decoder (cli only) to throw a "should consume all input error." |
3711 | * This is only an issue for zstd <= v1.4.3 |
3712 | */ |
3713 | !zc->isFirstBlock && |
3714 | cSize < rleMaxLength && |
3715 | ZSTD_isRLE(src: ip, length: srcSize)) |
3716 | { |
3717 | cSize = 1; |
3718 | op[0] = ip[0]; |
3719 | } |
3720 | |
3721 | out: |
3722 | if (!ZSTD_isError(code: cSize) && cSize > 1) { |
3723 | ZSTD_blockState_confirmRepcodesAndEntropyTables(bs: &zc->blockState); |
3724 | } |
3725 | /* We check that dictionaries have offset codes available for the first |
3726 | * block. After the first block, the offcode table might not have large |
3727 | * enough codes to represent the offsets in the data. |
3728 | */ |
3729 | if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) |
3730 | zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; |
3731 | |
3732 | return cSize; |
3733 | } |
3734 | |
3735 | static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, |
3736 | void* dst, size_t dstCapacity, |
3737 | const void* src, size_t srcSize, |
3738 | const size_t bss, U32 lastBlock) |
3739 | { |
3740 | DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()" ); |
3741 | if (bss == ZSTDbss_compress) { |
3742 | if (/* We don't want to emit our first block as a RLE even if it qualifies because |
3743 | * doing so will cause the decoder (cli only) to throw a "should consume all input error." |
3744 | * This is only an issue for zstd <= v1.4.3 |
3745 | */ |
3746 | !zc->isFirstBlock && |
3747 | ZSTD_maybeRLE(seqStore: &zc->seqStore) && |
3748 | ZSTD_isRLE(src: (BYTE const*)src, length: srcSize)) |
3749 | { |
3750 | return ZSTD_rleCompressBlock(dst, dstCapacity, src: *(BYTE const*)src, srcSize, lastBlock); |
3751 | } |
3752 | /* Attempt superblock compression. |
3753 | * |
3754 | * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the |
3755 | * standard ZSTD_compressBound(). This is a problem, because even if we have |
3756 | * space now, taking an extra byte now could cause us to run out of space later |
3757 | * and violate ZSTD_compressBound(). |
3758 | * |
3759 | * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. |
3760 | * |
3761 | * In order to respect ZSTD_compressBound() we must attempt to emit a raw |
3762 | * uncompressed block in these cases: |
3763 | * * cSize == 0: Return code for an uncompressed block. |
3764 | * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). |
3765 | * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of |
3766 | * output space. |
3767 | * * cSize >= blockBound(srcSize): We have expanded the block too much so |
3768 | * emit an uncompressed block. |
3769 | */ |
3770 | { |
3771 | size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); |
3772 | if (cSize != ERROR(dstSize_tooSmall)) { |
3773 | size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, strat: zc->appliedParams.cParams.strategy); |
3774 | FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed" ); |
3775 | if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { |
3776 | ZSTD_blockState_confirmRepcodesAndEntropyTables(bs: &zc->blockState); |
3777 | return cSize; |
3778 | } |
3779 | } |
3780 | } |
3781 | } |
3782 | |
3783 | DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()" ); |
3784 | /* Superblock compression failed, attempt to emit a single no compress block. |
3785 | * The decoder will be able to stream this block since it is uncompressed. |
3786 | */ |
3787 | return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); |
3788 | } |
3789 | |
3790 | static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, |
3791 | void* dst, size_t dstCapacity, |
3792 | const void* src, size_t srcSize, |
3793 | U32 lastBlock) |
3794 | { |
3795 | size_t cSize = 0; |
3796 | const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); |
3797 | DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)" , |
3798 | (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); |
3799 | FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed" ); |
3800 | |
3801 | cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); |
3802 | FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed" ); |
3803 | |
3804 | if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) |
3805 | zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; |
3806 | |
3807 | return cSize; |
3808 | } |
3809 | |
3810 | static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, |
3811 | ZSTD_cwksp* ws, |
3812 | ZSTD_CCtx_params const* params, |
3813 | void const* ip, |
3814 | void const* iend) |
3815 | { |
3816 | U32 const cycleLog = ZSTD_cycleLog(hashLog: params->cParams.chainLog, strat: params->cParams.strategy); |
3817 | U32 const maxDist = (U32)1 << params->cParams.windowLog; |
3818 | if (ZSTD_window_needOverflowCorrection(window: ms->window, cycleLog, maxDist, loadedDictEnd: ms->loadedDictEnd, src: ip, srcEnd: iend)) { |
3819 | U32 const correction = ZSTD_window_correctOverflow(window: &ms->window, cycleLog, maxDist, src: ip); |
3820 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); |
3821 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); |
3822 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); |
3823 | ZSTD_cwksp_mark_tables_dirty(ws); |
3824 | ZSTD_reduceIndex(ms, params, reducerValue: correction); |
3825 | ZSTD_cwksp_mark_tables_clean(ws); |
3826 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; |
3827 | else ms->nextToUpdate -= correction; |
3828 | /* invalidate dictionaries on overflow correction */ |
3829 | ms->loadedDictEnd = 0; |
3830 | ms->dictMatchState = NULL; |
3831 | } |
3832 | } |
3833 | |
3834 | /*! ZSTD_compress_frameChunk() : |
3835 | * Compress a chunk of data into one or multiple blocks. |
3836 | * All blocks will be terminated, all input will be consumed. |
3837 | * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. |
3838 | * Frame is supposed already started (header already produced) |
3839 | * @return : compressed size, or an error code |
3840 | */ |
3841 | static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, |
3842 | void* dst, size_t dstCapacity, |
3843 | const void* src, size_t srcSize, |
3844 | U32 lastFrameChunk) |
3845 | { |
3846 | size_t blockSize = cctx->blockSize; |
3847 | size_t remaining = srcSize; |
3848 | const BYTE* ip = (const BYTE*)src; |
3849 | BYTE* const ostart = (BYTE*)dst; |
3850 | BYTE* op = ostart; |
3851 | U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; |
3852 | |
3853 | assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); |
3854 | |
3855 | DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)" , (unsigned)blockSize); |
3856 | if (cctx->appliedParams.fParams.checksumFlag && srcSize) |
3857 | xxh64_update(state: &cctx->xxhState, input: src, length: srcSize); |
3858 | |
3859 | while (remaining) { |
3860 | ZSTD_matchState_t* const ms = &cctx->blockState.matchState; |
3861 | U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); |
3862 | |
3863 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, |
3864 | dstSize_tooSmall, |
3865 | "not enough space to store compressed block" ); |
3866 | if (remaining < blockSize) blockSize = remaining; |
3867 | |
3868 | ZSTD_overflowCorrectIfNeeded( |
3869 | ms, ws: &cctx->workspace, params: &cctx->appliedParams, ip, iend: ip + blockSize); |
3870 | ZSTD_checkDictValidity(window: &ms->window, blockEnd: ip + blockSize, maxDist, loadedDictEndPtr: &ms->loadedDictEnd, dictMatchStatePtr: &ms->dictMatchState); |
3871 | ZSTD_window_enforceMaxDist(window: &ms->window, blockEnd: ip, maxDist, loadedDictEndPtr: &ms->loadedDictEnd, dictMatchStatePtr: &ms->dictMatchState); |
3872 | |
3873 | /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ |
3874 | if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; |
3875 | |
3876 | { size_t cSize; |
3877 | if (ZSTD_useTargetCBlockSize(cctxParams: &cctx->appliedParams)) { |
3878 | cSize = ZSTD_compressBlock_targetCBlockSize(zc: cctx, dst: op, dstCapacity, src: ip, srcSize: blockSize, lastBlock); |
3879 | FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed" ); |
3880 | assert(cSize > 0); |
3881 | assert(cSize <= blockSize + ZSTD_blockHeaderSize); |
3882 | } else if (ZSTD_blockSplitterEnabled(cctxParams: &cctx->appliedParams)) { |
3883 | cSize = ZSTD_compressBlock_splitBlock(zc: cctx, dst: op, dstCapacity, src: ip, srcSize: blockSize, lastBlock); |
3884 | FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed" ); |
3885 | assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); |
3886 | } else { |
3887 | cSize = ZSTD_compressBlock_internal(zc: cctx, |
3888 | dst: op+ZSTD_blockHeaderSize, dstCapacity: dstCapacity-ZSTD_blockHeaderSize, |
3889 | src: ip, srcSize: blockSize, frame: 1 /* frame */); |
3890 | FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed" ); |
3891 | |
3892 | if (cSize == 0) { /* block is not compressible */ |
3893 | cSize = ZSTD_noCompressBlock(dst: op, dstCapacity, src: ip, srcSize: blockSize, lastBlock); |
3894 | FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed" ); |
3895 | } else { |
3896 | U32 const = cSize == 1 ? |
3897 | lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : |
3898 | lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); |
3899 | MEM_writeLE24(memPtr: op, val: cBlockHeader); |
3900 | cSize += ZSTD_blockHeaderSize; |
3901 | } |
3902 | } |
3903 | |
3904 | |
3905 | ip += blockSize; |
3906 | assert(remaining >= blockSize); |
3907 | remaining -= blockSize; |
3908 | op += cSize; |
3909 | assert(dstCapacity >= cSize); |
3910 | dstCapacity -= cSize; |
3911 | cctx->isFirstBlock = 0; |
3912 | DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u" , |
3913 | (unsigned)cSize); |
3914 | } } |
3915 | |
3916 | if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; |
3917 | return (size_t)(op-ostart); |
3918 | } |
3919 | |
3920 | |
3921 | static size_t (void* dst, size_t dstCapacity, |
3922 | const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) |
3923 | { BYTE* const op = (BYTE*)dst; |
3924 | U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ |
3925 | U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ |
3926 | U32 const checksumFlag = params->fParams.checksumFlag>0; |
3927 | U32 const windowSize = (U32)1 << params->cParams.windowLog; |
3928 | U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); |
3929 | BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); |
3930 | U32 const fcsCode = params->fParams.contentSizeFlag ? |
3931 | (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ |
3932 | BYTE const = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); |
3933 | size_t pos=0; |
3934 | |
3935 | assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); |
3936 | RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, |
3937 | "dst buf is too small to fit worst-case frame header size." ); |
3938 | DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u" , |
3939 | !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); |
3940 | if (params->format == ZSTD_f_zstd1) { |
3941 | MEM_writeLE32(memPtr: dst, ZSTD_MAGICNUMBER); |
3942 | pos = 4; |
3943 | } |
3944 | op[pos++] = frameHeaderDescriptionByte; |
3945 | if (!singleSegment) op[pos++] = windowLogByte; |
3946 | switch(dictIDSizeCode) |
3947 | { |
3948 | default: |
3949 | assert(0); /* impossible */ |
3950 | ZSTD_FALLTHROUGH; |
3951 | case 0 : break; |
3952 | case 1 : op[pos] = (BYTE)(dictID); pos++; break; |
3953 | case 2 : MEM_writeLE16(memPtr: op+pos, val: (U16)dictID); pos+=2; break; |
3954 | case 3 : MEM_writeLE32(memPtr: op+pos, val32: dictID); pos+=4; break; |
3955 | } |
3956 | switch(fcsCode) |
3957 | { |
3958 | default: |
3959 | assert(0); /* impossible */ |
3960 | ZSTD_FALLTHROUGH; |
3961 | case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; |
3962 | case 1 : MEM_writeLE16(memPtr: op+pos, val: (U16)(pledgedSrcSize-256)); pos+=2; break; |
3963 | case 2 : MEM_writeLE32(memPtr: op+pos, val32: (U32)(pledgedSrcSize)); pos+=4; break; |
3964 | case 3 : MEM_writeLE64(memPtr: op+pos, val64: (U64)(pledgedSrcSize)); pos+=8; break; |
3965 | } |
3966 | return pos; |
3967 | } |
3968 | |
3969 | /* ZSTD_writeSkippableFrame_advanced() : |
3970 | * Writes out a skippable frame with the specified magic number variant (16 are supported), |
3971 | * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. |
3972 | * |
3973 | * Returns the total number of bytes written, or a ZSTD error code. |
3974 | */ |
3975 | size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, |
3976 | const void* src, size_t srcSize, unsigned magicVariant) { |
3977 | BYTE* op = (BYTE*)dst; |
3978 | RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, |
3979 | dstSize_tooSmall, "Not enough room for skippable frame" ); |
3980 | RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame" ); |
3981 | RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported" ); |
3982 | |
3983 | MEM_writeLE32(memPtr: op, val32: (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); |
3984 | MEM_writeLE32(memPtr: op+4, val32: (U32)srcSize); |
3985 | ZSTD_memcpy(op+8, src, srcSize); |
3986 | return srcSize + ZSTD_SKIPPABLEHEADERSIZE; |
3987 | } |
3988 | |
3989 | /* ZSTD_writeLastEmptyBlock() : |
3990 | * output an empty Block with end-of-frame mark to complete a frame |
3991 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) |
3992 | * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) |
3993 | */ |
3994 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) |
3995 | { |
3996 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, |
3997 | "dst buf is too small to write frame trailer empty block." ); |
3998 | { U32 const = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ |
3999 | MEM_writeLE24(memPtr: dst, val: cBlockHeader24); |
4000 | return ZSTD_blockHeaderSize; |
4001 | } |
4002 | } |
4003 | |
4004 | size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) |
4005 | { |
4006 | RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong, |
4007 | "wrong cctx stage" ); |
4008 | RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable, |
4009 | parameter_unsupported, |
4010 | "incompatible with ldm" ); |
4011 | cctx->externSeqStore.seq = seq; |
4012 | cctx->externSeqStore.size = nbSeq; |
4013 | cctx->externSeqStore.capacity = nbSeq; |
4014 | cctx->externSeqStore.pos = 0; |
4015 | cctx->externSeqStore.posInSequence = 0; |
4016 | return 0; |
4017 | } |
4018 | |
4019 | |
4020 | static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, |
4021 | void* dst, size_t dstCapacity, |
4022 | const void* src, size_t srcSize, |
4023 | U32 frame, U32 lastFrameChunk) |
4024 | { |
4025 | ZSTD_matchState_t* const ms = &cctx->blockState.matchState; |
4026 | size_t fhSize = 0; |
4027 | |
4028 | DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u" , |
4029 | cctx->stage, (unsigned)srcSize); |
4030 | RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, |
4031 | "missing init (ZSTD_compressBegin)" ); |
4032 | |
4033 | if (frame && (cctx->stage==ZSTDcs_init)) { |
4034 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, params: &cctx->appliedParams, |
4035 | pledgedSrcSize: cctx->pledgedSrcSizePlusOne-1, dictID: cctx->dictID); |
4036 | FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed" ); |
4037 | assert(fhSize <= dstCapacity); |
4038 | dstCapacity -= fhSize; |
4039 | dst = (char*)dst + fhSize; |
4040 | cctx->stage = ZSTDcs_ongoing; |
4041 | } |
4042 | |
4043 | if (!srcSize) return fhSize; /* do not generate an empty block if no input */ |
4044 | |
4045 | if (!ZSTD_window_update(window: &ms->window, src, srcSize, forceNonContiguous: ms->forceNonContiguous)) { |
4046 | ms->forceNonContiguous = 0; |
4047 | ms->nextToUpdate = ms->window.dictLimit; |
4048 | } |
4049 | if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { |
4050 | ZSTD_window_update(window: &cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); |
4051 | } |
4052 | |
4053 | if (!frame) { |
4054 | /* overflow check and correction for block mode */ |
4055 | ZSTD_overflowCorrectIfNeeded( |
4056 | ms, ws: &cctx->workspace, params: &cctx->appliedParams, |
4057 | ip: src, iend: (BYTE const*)src + srcSize); |
4058 | } |
4059 | |
4060 | DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)" , (unsigned)cctx->blockSize); |
4061 | { size_t const cSize = frame ? |
4062 | ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : |
4063 | ZSTD_compressBlock_internal (zc: cctx, dst, dstCapacity, src, srcSize, frame: 0 /* frame */); |
4064 | FORWARD_IF_ERROR(cSize, "%s" , frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed" ); |
4065 | cctx->consumedSrcSize += srcSize; |
4066 | cctx->producedCSize += (cSize + fhSize); |
4067 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); |
4068 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ |
4069 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); |
4070 | RETURN_ERROR_IF( |
4071 | cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, |
4072 | srcSize_wrong, |
4073 | "error : pledgedSrcSize = %u, while realSrcSize >= %u" , |
4074 | (unsigned)cctx->pledgedSrcSizePlusOne-1, |
4075 | (unsigned)cctx->consumedSrcSize); |
4076 | } |
4077 | return cSize + fhSize; |
4078 | } |
4079 | } |
4080 | |
4081 | size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, |
4082 | void* dst, size_t dstCapacity, |
4083 | const void* src, size_t srcSize) |
4084 | { |
4085 | DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)" , (unsigned)srcSize); |
4086 | return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, frame: 1 /* frame mode */, lastFrameChunk: 0 /* last chunk */); |
4087 | } |
4088 | |
4089 | |
4090 | size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) |
4091 | { |
4092 | ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; |
4093 | assert(!ZSTD_checkCParams(cParams)); |
4094 | return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); |
4095 | } |
4096 | |
4097 | size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
4098 | { |
4099 | DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u" , (unsigned)srcSize); |
4100 | { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); |
4101 | RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block" ); } |
4102 | |
4103 | return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, frame: 0 /* frame mode */, lastFrameChunk: 0 /* last chunk */); |
4104 | } |
4105 | |
4106 | /*! ZSTD_loadDictionaryContent() : |
4107 | * @return : 0, or an error code |
4108 | */ |
4109 | static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, |
4110 | ldmState_t* ls, |
4111 | ZSTD_cwksp* ws, |
4112 | ZSTD_CCtx_params const* params, |
4113 | const void* src, size_t srcSize, |
4114 | ZSTD_dictTableLoadMethod_e dtlm) |
4115 | { |
4116 | const BYTE* ip = (const BYTE*) src; |
4117 | const BYTE* const iend = ip + srcSize; |
4118 | int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; |
4119 | |
4120 | /* Assert that we the ms params match the params we're being given */ |
4121 | ZSTD_assertEqualCParams(cParams1: params->cParams, cParams2: ms->cParams); |
4122 | |
4123 | if (srcSize > ZSTD_CHUNKSIZE_MAX) { |
4124 | /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. |
4125 | * Dictionaries right at the edge will immediately trigger overflow |
4126 | * correction, but I don't want to insert extra constraints here. |
4127 | */ |
4128 | U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; |
4129 | /* We must have cleared our windows when our source is this large. */ |
4130 | assert(ZSTD_window_isEmpty(ms->window)); |
4131 | if (loadLdmDict) |
4132 | assert(ZSTD_window_isEmpty(ls->window)); |
4133 | /* If the dictionary is too large, only load the suffix of the dictionary. */ |
4134 | if (srcSize > maxDictSize) { |
4135 | ip = iend - maxDictSize; |
4136 | src = ip; |
4137 | srcSize = maxDictSize; |
4138 | } |
4139 | } |
4140 | |
4141 | DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d" , (int)params->useRowMatchFinder); |
4142 | ZSTD_window_update(window: &ms->window, src, srcSize, /* forceNonContiguous */ 0); |
4143 | ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); |
4144 | ms->forceNonContiguous = params->deterministicRefPrefix; |
4145 | |
4146 | if (loadLdmDict) { |
4147 | ZSTD_window_update(window: &ls->window, src, srcSize, /* forceNonContiguous */ 0); |
4148 | ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); |
4149 | } |
4150 | |
4151 | if (srcSize <= HASH_READ_SIZE) return 0; |
4152 | |
4153 | ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); |
4154 | |
4155 | if (loadLdmDict) |
4156 | ZSTD_ldm_fillHashTable(state: ls, ip, iend, params: ¶ms->ldmParams); |
4157 | |
4158 | switch(params->cParams.strategy) |
4159 | { |
4160 | case ZSTD_fast: |
4161 | ZSTD_fillHashTable(ms, end: iend, dtlm); |
4162 | break; |
4163 | case ZSTD_dfast: |
4164 | ZSTD_fillDoubleHashTable(ms, end: iend, dtlm); |
4165 | break; |
4166 | |
4167 | case ZSTD_greedy: |
4168 | case ZSTD_lazy: |
4169 | case ZSTD_lazy2: |
4170 | assert(srcSize >= HASH_READ_SIZE); |
4171 | if (ms->dedicatedDictSearch) { |
4172 | assert(ms->chainTable != NULL); |
4173 | ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ip: iend-HASH_READ_SIZE); |
4174 | } else { |
4175 | assert(params->useRowMatchFinder != ZSTD_ps_auto); |
4176 | if (params->useRowMatchFinder == ZSTD_ps_enable) { |
4177 | size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); |
4178 | ZSTD_memset(ms->tagTable, 0, tagTableSize); |
4179 | ZSTD_row_update(ms, ip: iend-HASH_READ_SIZE); |
4180 | DEBUGLOG(4, "Using row-based hash table for lazy dict" ); |
4181 | } else { |
4182 | ZSTD_insertAndFindFirstIndex(ms, ip: iend-HASH_READ_SIZE); |
4183 | DEBUGLOG(4, "Using chain-based hash table for lazy dict" ); |
4184 | } |
4185 | } |
4186 | break; |
4187 | |
4188 | case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ |
4189 | case ZSTD_btopt: |
4190 | case ZSTD_btultra: |
4191 | case ZSTD_btultra2: |
4192 | assert(srcSize >= HASH_READ_SIZE); |
4193 | ZSTD_updateTree(ms, ip: iend-HASH_READ_SIZE, iend); |
4194 | break; |
4195 | |
4196 | default: |
4197 | assert(0); /* not possible : not a valid strategy id */ |
4198 | } |
4199 | |
4200 | ms->nextToUpdate = (U32)(iend - ms->window.base); |
4201 | return 0; |
4202 | } |
4203 | |
4204 | |
4205 | /* Dictionaries that assign zero probability to symbols that show up causes problems |
4206 | * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check |
4207 | * and only dictionaries with 100% valid symbols can be assumed valid. |
4208 | */ |
4209 | static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) |
4210 | { |
4211 | U32 s; |
4212 | if (dictMaxSymbolValue < maxSymbolValue) { |
4213 | return FSE_repeat_check; |
4214 | } |
4215 | for (s = 0; s <= maxSymbolValue; ++s) { |
4216 | if (normalizedCounter[s] == 0) { |
4217 | return FSE_repeat_check; |
4218 | } |
4219 | } |
4220 | return FSE_repeat_valid; |
4221 | } |
4222 | |
4223 | size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, |
4224 | const void* const dict, size_t dictSize) |
4225 | { |
4226 | short offcodeNCount[MaxOff+1]; |
4227 | unsigned offcodeMaxValue = MaxOff; |
4228 | const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ |
4229 | const BYTE* const dictEnd = dictPtr + dictSize; |
4230 | dictPtr += 8; |
4231 | bs->entropy.huf.repeatMode = HUF_repeat_check; |
4232 | |
4233 | { unsigned maxSymbolValue = 255; |
4234 | unsigned hasZeroWeights = 1; |
4235 | size_t const = HUF_readCTable(CTable: (HUF_CElt*)bs->entropy.huf.CTable, maxSymbolValuePtr: &maxSymbolValue, src: dictPtr, |
4236 | srcSize: dictEnd-dictPtr, hasZeroWeights: &hasZeroWeights); |
4237 | |
4238 | /* We only set the loaded table as valid if it contains all non-zero |
4239 | * weights. Otherwise, we set it to check */ |
4240 | if (!hasZeroWeights) |
4241 | bs->entropy.huf.repeatMode = HUF_repeat_valid; |
4242 | |
4243 | RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "" ); |
4244 | RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "" ); |
4245 | dictPtr += hufHeaderSize; |
4246 | } |
4247 | |
4248 | { unsigned offcodeLog; |
4249 | size_t const = FSE_readNCount(normalizedCounter: offcodeNCount, maxSymbolValuePtr: &offcodeMaxValue, tableLogPtr: &offcodeLog, rBuffer: dictPtr, rBuffSize: dictEnd-dictPtr); |
4250 | RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "" ); |
4251 | RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "" ); |
4252 | /* fill all offset symbols to avoid garbage at end of table */ |
4253 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( |
4254 | bs->entropy.fse.offcodeCTable, |
4255 | offcodeNCount, MaxOff, offcodeLog, |
4256 | workspace, HUF_WORKSPACE_SIZE)), |
4257 | dictionary_corrupted, "" ); |
4258 | /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ |
4259 | dictPtr += offcodeHeaderSize; |
4260 | } |
4261 | |
4262 | { short matchlengthNCount[MaxML+1]; |
4263 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; |
4264 | size_t const = FSE_readNCount(normalizedCounter: matchlengthNCount, maxSymbolValuePtr: &matchlengthMaxValue, tableLogPtr: &matchlengthLog, rBuffer: dictPtr, rBuffSize: dictEnd-dictPtr); |
4265 | RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "" ); |
4266 | RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "" ); |
4267 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( |
4268 | bs->entropy.fse.matchlengthCTable, |
4269 | matchlengthNCount, matchlengthMaxValue, matchlengthLog, |
4270 | workspace, HUF_WORKSPACE_SIZE)), |
4271 | dictionary_corrupted, "" ); |
4272 | bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(normalizedCounter: matchlengthNCount, dictMaxSymbolValue: matchlengthMaxValue, MaxML); |
4273 | dictPtr += matchlengthHeaderSize; |
4274 | } |
4275 | |
4276 | { short litlengthNCount[MaxLL+1]; |
4277 | unsigned litlengthMaxValue = MaxLL, litlengthLog; |
4278 | size_t const = FSE_readNCount(normalizedCounter: litlengthNCount, maxSymbolValuePtr: &litlengthMaxValue, tableLogPtr: &litlengthLog, rBuffer: dictPtr, rBuffSize: dictEnd-dictPtr); |
4279 | RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "" ); |
4280 | RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "" ); |
4281 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( |
4282 | bs->entropy.fse.litlengthCTable, |
4283 | litlengthNCount, litlengthMaxValue, litlengthLog, |
4284 | workspace, HUF_WORKSPACE_SIZE)), |
4285 | dictionary_corrupted, "" ); |
4286 | bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(normalizedCounter: litlengthNCount, dictMaxSymbolValue: litlengthMaxValue, MaxLL); |
4287 | dictPtr += litlengthHeaderSize; |
4288 | } |
4289 | |
4290 | RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "" ); |
4291 | bs->rep[0] = MEM_readLE32(memPtr: dictPtr+0); |
4292 | bs->rep[1] = MEM_readLE32(memPtr: dictPtr+4); |
4293 | bs->rep[2] = MEM_readLE32(memPtr: dictPtr+8); |
4294 | dictPtr += 12; |
4295 | |
4296 | { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); |
4297 | U32 offcodeMax = MaxOff; |
4298 | if (dictContentSize <= ((U32)-1) - 128 KB) { |
4299 | U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ |
4300 | offcodeMax = ZSTD_highbit32(val: maxOffset); /* Calculate minimum offset code required to represent maxOffset */ |
4301 | } |
4302 | /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ |
4303 | bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(normalizedCounter: offcodeNCount, dictMaxSymbolValue: offcodeMaxValue, MIN(offcodeMax, MaxOff)); |
4304 | |
4305 | /* All repCodes must be <= dictContentSize and != 0 */ |
4306 | { U32 u; |
4307 | for (u=0; u<3; u++) { |
4308 | RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "" ); |
4309 | RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "" ); |
4310 | } } } |
4311 | |
4312 | return dictPtr - (const BYTE*)dict; |
4313 | } |
4314 | |
4315 | /* Dictionary format : |
4316 | * See : |
4317 | * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format |
4318 | */ |
4319 | /*! ZSTD_loadZstdDictionary() : |
4320 | * @return : dictID, or an error code |
4321 | * assumptions : magic number supposed already checked |
4322 | * dictSize supposed >= 8 |
4323 | */ |
4324 | static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, |
4325 | ZSTD_matchState_t* ms, |
4326 | ZSTD_cwksp* ws, |
4327 | ZSTD_CCtx_params const* params, |
4328 | const void* dict, size_t dictSize, |
4329 | ZSTD_dictTableLoadMethod_e dtlm, |
4330 | void* workspace) |
4331 | { |
4332 | const BYTE* dictPtr = (const BYTE*)dict; |
4333 | const BYTE* const dictEnd = dictPtr + dictSize; |
4334 | size_t dictID; |
4335 | size_t eSize; |
4336 | ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); |
4337 | assert(dictSize >= 8); |
4338 | assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); |
4339 | |
4340 | dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(memPtr: dictPtr + 4 /* skip magic number */ ); |
4341 | eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); |
4342 | FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed" ); |
4343 | dictPtr += eSize; |
4344 | |
4345 | { |
4346 | size_t const dictContentSize = (size_t)(dictEnd - dictPtr); |
4347 | FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( |
4348 | ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "" ); |
4349 | } |
4350 | return dictID; |
4351 | } |
4352 | |
4353 | /* ZSTD_compress_insertDictionary() : |
4354 | * @return : dictID, or an error code */ |
4355 | static size_t |
4356 | ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, |
4357 | ZSTD_matchState_t* ms, |
4358 | ldmState_t* ls, |
4359 | ZSTD_cwksp* ws, |
4360 | const ZSTD_CCtx_params* params, |
4361 | const void* dict, size_t dictSize, |
4362 | ZSTD_dictContentType_e dictContentType, |
4363 | ZSTD_dictTableLoadMethod_e dtlm, |
4364 | void* workspace) |
4365 | { |
4366 | DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)" , (U32)dictSize); |
4367 | if ((dict==NULL) || (dictSize<8)) { |
4368 | RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "" ); |
4369 | return 0; |
4370 | } |
4371 | |
4372 | ZSTD_reset_compressedBlockState(bs); |
4373 | |
4374 | /* dict restricted modes */ |
4375 | if (dictContentType == ZSTD_dct_rawContent) |
4376 | return ZSTD_loadDictionaryContent(ms, ls, ws, params, src: dict, srcSize: dictSize, dtlm); |
4377 | |
4378 | if (MEM_readLE32(memPtr: dict) != ZSTD_MAGIC_DICTIONARY) { |
4379 | if (dictContentType == ZSTD_dct_auto) { |
4380 | DEBUGLOG(4, "raw content dictionary detected" ); |
4381 | return ZSTD_loadDictionaryContent( |
4382 | ms, ls, ws, params, src: dict, srcSize: dictSize, dtlm); |
4383 | } |
4384 | RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "" ); |
4385 | assert(0); /* impossible */ |
4386 | } |
4387 | |
4388 | /* dict as full zstd dictionary */ |
4389 | return ZSTD_loadZstdDictionary( |
4390 | bs, ms, ws, params, dict, dictSize, dtlm, workspace); |
4391 | } |
4392 | |
4393 | #define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) |
4394 | #define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) |
4395 | |
4396 | /*! ZSTD_compressBegin_internal() : |
4397 | * @return : 0, or an error code */ |
4398 | static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, |
4399 | const void* dict, size_t dictSize, |
4400 | ZSTD_dictContentType_e dictContentType, |
4401 | ZSTD_dictTableLoadMethod_e dtlm, |
4402 | const ZSTD_CDict* cdict, |
4403 | const ZSTD_CCtx_params* params, U64 pledgedSrcSize, |
4404 | ZSTD_buffered_policy_e zbuff) |
4405 | { |
4406 | size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; |
4407 | DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u" , params->cParams.windowLog); |
4408 | /* params are supposed to be fully validated at this point */ |
4409 | assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); |
4410 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
4411 | if ( (cdict) |
4412 | && (cdict->dictContentSize > 0) |
4413 | && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF |
4414 | || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER |
4415 | || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN |
4416 | || cdict->compressionLevel == 0) |
4417 | && (params->attachDictPref != ZSTD_dictForceLoad) ) { |
4418 | return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); |
4419 | } |
4420 | |
4421 | FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, |
4422 | dictContentSize, |
4423 | ZSTDcrp_makeClean, zbuff) , "" ); |
4424 | { size_t const dictID = cdict ? |
4425 | ZSTD_compress_insertDictionary( |
4426 | bs: cctx->blockState.prevCBlock, ms: &cctx->blockState.matchState, |
4427 | ls: &cctx->ldmState, ws: &cctx->workspace, params: &cctx->appliedParams, dict: cdict->dictContent, |
4428 | dictSize: cdict->dictContentSize, dictContentType: cdict->dictContentType, dtlm, |
4429 | workspace: cctx->entropyWorkspace) |
4430 | : ZSTD_compress_insertDictionary( |
4431 | bs: cctx->blockState.prevCBlock, ms: &cctx->blockState.matchState, |
4432 | ls: &cctx->ldmState, ws: &cctx->workspace, params: &cctx->appliedParams, dict, dictSize, |
4433 | dictContentType, dtlm, workspace: cctx->entropyWorkspace); |
4434 | FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed" ); |
4435 | assert(dictID <= UINT_MAX); |
4436 | cctx->dictID = (U32)dictID; |
4437 | cctx->dictContentSize = dictContentSize; |
4438 | } |
4439 | return 0; |
4440 | } |
4441 | |
4442 | size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, |
4443 | const void* dict, size_t dictSize, |
4444 | ZSTD_dictContentType_e dictContentType, |
4445 | ZSTD_dictTableLoadMethod_e dtlm, |
4446 | const ZSTD_CDict* cdict, |
4447 | const ZSTD_CCtx_params* params, |
4448 | unsigned long long pledgedSrcSize) |
4449 | { |
4450 | DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u" , params->cParams.windowLog); |
4451 | /* compression parameters verification and optimization */ |
4452 | FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "" ); |
4453 | return ZSTD_compressBegin_internal(cctx, |
4454 | dict, dictSize, dictContentType, dtlm, |
4455 | cdict, |
4456 | params, pledgedSrcSize, |
4457 | zbuff: ZSTDb_not_buffered); |
4458 | } |
4459 | |
4460 | /*! ZSTD_compressBegin_advanced() : |
4461 | * @return : 0, or an error code */ |
4462 | size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, |
4463 | const void* dict, size_t dictSize, |
4464 | ZSTD_parameters params, unsigned long long pledgedSrcSize) |
4465 | { |
4466 | ZSTD_CCtx_params cctxParams; |
4467 | ZSTD_CCtxParams_init_internal(cctxParams: &cctxParams, params: ¶ms, ZSTD_NO_CLEVEL); |
4468 | return ZSTD_compressBegin_advanced_internal(cctx, |
4469 | dict, dictSize, dictContentType: ZSTD_dct_auto, dtlm: ZSTD_dtlm_fast, |
4470 | NULL /*cdict*/, |
4471 | params: &cctxParams, pledgedSrcSize); |
4472 | } |
4473 | |
4474 | size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) |
4475 | { |
4476 | ZSTD_CCtx_params cctxParams; |
4477 | { |
4478 | ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, mode: ZSTD_cpm_noAttachDict); |
4479 | ZSTD_CCtxParams_init_internal(cctxParams: &cctxParams, params: ¶ms, compressionLevel: (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); |
4480 | } |
4481 | DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)" , (unsigned)dictSize); |
4482 | return ZSTD_compressBegin_internal(cctx, dict, dictSize, dictContentType: ZSTD_dct_auto, dtlm: ZSTD_dtlm_fast, NULL, |
4483 | params: &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, zbuff: ZSTDb_not_buffered); |
4484 | } |
4485 | |
4486 | size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) |
4487 | { |
4488 | return ZSTD_compressBegin_usingDict(cctx, NULL, dictSize: 0, compressionLevel); |
4489 | } |
4490 | |
4491 | |
4492 | /*! ZSTD_writeEpilogue() : |
4493 | * Ends a frame. |
4494 | * @return : nb of bytes written into dst (or an error code) */ |
4495 | static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) |
4496 | { |
4497 | BYTE* const ostart = (BYTE*)dst; |
4498 | BYTE* op = ostart; |
4499 | size_t fhSize = 0; |
4500 | |
4501 | DEBUGLOG(4, "ZSTD_writeEpilogue" ); |
4502 | RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing" ); |
4503 | |
4504 | /* special case : empty frame */ |
4505 | if (cctx->stage == ZSTDcs_init) { |
4506 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, params: &cctx->appliedParams, pledgedSrcSize: 0, dictID: 0); |
4507 | FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed" ); |
4508 | dstCapacity -= fhSize; |
4509 | op += fhSize; |
4510 | cctx->stage = ZSTDcs_ongoing; |
4511 | } |
4512 | |
4513 | if (cctx->stage != ZSTDcs_ending) { |
4514 | /* write one last empty block, make it the "last" block */ |
4515 | U32 const = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; |
4516 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue" ); |
4517 | MEM_writeLE32(memPtr: op, val32: cBlockHeader24); |
4518 | op += ZSTD_blockHeaderSize; |
4519 | dstCapacity -= ZSTD_blockHeaderSize; |
4520 | } |
4521 | |
4522 | if (cctx->appliedParams.fParams.checksumFlag) { |
4523 | U32 const checksum = (U32) xxh64_digest(state: &cctx->xxhState); |
4524 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum" ); |
4525 | DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X" , (unsigned)checksum); |
4526 | MEM_writeLE32(memPtr: op, val32: checksum); |
4527 | op += 4; |
4528 | } |
4529 | |
4530 | cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ |
4531 | return op-ostart; |
4532 | } |
4533 | |
4534 | void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t ) |
4535 | { |
4536 | (void)cctx; |
4537 | (void)extraCSize; |
4538 | } |
4539 | |
4540 | size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, |
4541 | void* dst, size_t dstCapacity, |
4542 | const void* src, size_t srcSize) |
4543 | { |
4544 | size_t endResult; |
4545 | size_t const cSize = ZSTD_compressContinue_internal(cctx, |
4546 | dst, dstCapacity, src, srcSize, |
4547 | frame: 1 /* frame mode */, lastFrameChunk: 1 /* last chunk */); |
4548 | FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed" ); |
4549 | endResult = ZSTD_writeEpilogue(cctx, dst: (char*)dst + cSize, dstCapacity: dstCapacity-cSize); |
4550 | FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed" ); |
4551 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); |
4552 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ |
4553 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); |
4554 | DEBUGLOG(4, "end of frame : controlling src size" ); |
4555 | RETURN_ERROR_IF( |
4556 | cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, |
4557 | srcSize_wrong, |
4558 | "error : pledgedSrcSize = %u, while realSrcSize = %u" , |
4559 | (unsigned)cctx->pledgedSrcSizePlusOne-1, |
4560 | (unsigned)cctx->consumedSrcSize); |
4561 | } |
4562 | ZSTD_CCtx_trace(cctx, extraCSize: endResult); |
4563 | return cSize + endResult; |
4564 | } |
4565 | |
4566 | size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, |
4567 | void* dst, size_t dstCapacity, |
4568 | const void* src, size_t srcSize, |
4569 | const void* dict,size_t dictSize, |
4570 | ZSTD_parameters params) |
4571 | { |
4572 | DEBUGLOG(4, "ZSTD_compress_advanced" ); |
4573 | FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "" ); |
4574 | ZSTD_CCtxParams_init_internal(cctxParams: &cctx->simpleApiParams, params: ¶ms, ZSTD_NO_CLEVEL); |
4575 | return ZSTD_compress_advanced_internal(cctx, |
4576 | dst, dstCapacity, |
4577 | src, srcSize, |
4578 | dict, dictSize, |
4579 | params: &cctx->simpleApiParams); |
4580 | } |
4581 | |
4582 | /* Internal */ |
4583 | size_t ZSTD_compress_advanced_internal( |
4584 | ZSTD_CCtx* cctx, |
4585 | void* dst, size_t dstCapacity, |
4586 | const void* src, size_t srcSize, |
4587 | const void* dict,size_t dictSize, |
4588 | const ZSTD_CCtx_params* params) |
4589 | { |
4590 | DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)" , (unsigned)srcSize); |
4591 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, |
4592 | dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, |
4593 | params, srcSize, ZSTDb_not_buffered) , "" ); |
4594 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); |
4595 | } |
4596 | |
4597 | size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, |
4598 | void* dst, size_t dstCapacity, |
4599 | const void* src, size_t srcSize, |
4600 | const void* dict, size_t dictSize, |
4601 | int compressionLevel) |
4602 | { |
4603 | { |
4604 | ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSizeHint: srcSize, dictSize: dict ? dictSize : 0, mode: ZSTD_cpm_noAttachDict); |
4605 | assert(params.fParams.contentSizeFlag == 1); |
4606 | ZSTD_CCtxParams_init_internal(cctxParams: &cctx->simpleApiParams, params: ¶ms, compressionLevel: (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); |
4607 | } |
4608 | DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)" , (unsigned)srcSize); |
4609 | return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, params: &cctx->simpleApiParams); |
4610 | } |
4611 | |
4612 | size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, |
4613 | void* dst, size_t dstCapacity, |
4614 | const void* src, size_t srcSize, |
4615 | int compressionLevel) |
4616 | { |
4617 | DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)" , (unsigned)srcSize); |
4618 | assert(cctx != NULL); |
4619 | return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, dictSize: 0, compressionLevel); |
4620 | } |
4621 | |
4622 | size_t ZSTD_compress(void* dst, size_t dstCapacity, |
4623 | const void* src, size_t srcSize, |
4624 | int compressionLevel) |
4625 | { |
4626 | size_t result; |
4627 | ZSTD_CCtx* cctx = ZSTD_createCCtx(); |
4628 | RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed" ); |
4629 | result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); |
4630 | ZSTD_freeCCtx(cctx); |
4631 | return result; |
4632 | } |
4633 | |
4634 | |
4635 | /* ===== Dictionary API ===== */ |
4636 | |
4637 | /*! ZSTD_estimateCDictSize_advanced() : |
4638 | * Estimate amount of memory that will be needed to create a dictionary with following arguments */ |
4639 | size_t ZSTD_estimateCDictSize_advanced( |
4640 | size_t dictSize, ZSTD_compressionParameters cParams, |
4641 | ZSTD_dictLoadMethod_e dictLoadMethod) |
4642 | { |
4643 | DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u" , (unsigned)sizeof(ZSTD_CDict)); |
4644 | return ZSTD_cwksp_alloc_size(size: sizeof(ZSTD_CDict)) |
4645 | + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) |
4646 | /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small |
4647 | * in case we are using DDS with row-hash. */ |
4648 | + ZSTD_sizeof_matchState(cParams: &cParams, useRowMatchFinder: ZSTD_resolveRowMatchFinderMode(mode: ZSTD_ps_auto, cParams: &cParams), |
4649 | /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) |
4650 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 |
4651 | : ZSTD_cwksp_alloc_size(size: ZSTD_cwksp_align(size: dictSize, align: sizeof(void *)))); |
4652 | } |
4653 | |
4654 | size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) |
4655 | { |
4656 | ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, mode: ZSTD_cpm_createCDict); |
4657 | return ZSTD_estimateCDictSize_advanced(dictSize, cParams, dictLoadMethod: ZSTD_dlm_byCopy); |
4658 | } |
4659 | |
4660 | size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) |
4661 | { |
4662 | if (cdict==NULL) return 0; /* support sizeof on NULL */ |
4663 | DEBUGLOG(5, "sizeof(*cdict) : %u" , (unsigned)sizeof(*cdict)); |
4664 | /* cdict may be in the workspace */ |
4665 | return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) |
4666 | + ZSTD_cwksp_sizeof(ws: &cdict->workspace); |
4667 | } |
4668 | |
4669 | static size_t ZSTD_initCDict_internal( |
4670 | ZSTD_CDict* cdict, |
4671 | const void* dictBuffer, size_t dictSize, |
4672 | ZSTD_dictLoadMethod_e dictLoadMethod, |
4673 | ZSTD_dictContentType_e dictContentType, |
4674 | ZSTD_CCtx_params params) |
4675 | { |
4676 | DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)" , (unsigned)dictContentType); |
4677 | assert(!ZSTD_checkCParams(params.cParams)); |
4678 | cdict->matchState.cParams = params.cParams; |
4679 | cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; |
4680 | if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { |
4681 | cdict->dictContent = dictBuffer; |
4682 | } else { |
4683 | void *internalBuffer = ZSTD_cwksp_reserve_object(ws: &cdict->workspace, bytes: ZSTD_cwksp_align(size: dictSize, align: sizeof(void*))); |
4684 | RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!" ); |
4685 | cdict->dictContent = internalBuffer; |
4686 | ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); |
4687 | } |
4688 | cdict->dictContentSize = dictSize; |
4689 | cdict->dictContentType = dictContentType; |
4690 | |
4691 | cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(ws: &cdict->workspace, HUF_WORKSPACE_SIZE); |
4692 | |
4693 | |
4694 | /* Reset the state to no dictionary */ |
4695 | ZSTD_reset_compressedBlockState(bs: &cdict->cBlockState); |
4696 | FORWARD_IF_ERROR(ZSTD_reset_matchState( |
4697 | &cdict->matchState, |
4698 | &cdict->workspace, |
4699 | ¶ms.cParams, |
4700 | params.useRowMatchFinder, |
4701 | ZSTDcrp_makeClean, |
4702 | ZSTDirp_reset, |
4703 | ZSTD_resetTarget_CDict), "" ); |
4704 | /* (Maybe) load the dictionary |
4705 | * Skips loading the dictionary if it is < 8 bytes. |
4706 | */ |
4707 | { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; |
4708 | params.fParams.contentSizeFlag = 1; |
4709 | { size_t const dictID = ZSTD_compress_insertDictionary( |
4710 | bs: &cdict->cBlockState, ms: &cdict->matchState, NULL, ws: &cdict->workspace, |
4711 | params: ¶ms, dict: cdict->dictContent, dictSize: cdict->dictContentSize, |
4712 | dictContentType, dtlm: ZSTD_dtlm_full, workspace: cdict->entropyWorkspace); |
4713 | FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed" ); |
4714 | assert(dictID <= (size_t)(U32)-1); |
4715 | cdict->dictID = (U32)dictID; |
4716 | } |
4717 | } |
4718 | |
4719 | return 0; |
4720 | } |
4721 | |
4722 | static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, |
4723 | ZSTD_dictLoadMethod_e dictLoadMethod, |
4724 | ZSTD_compressionParameters cParams, |
4725 | ZSTD_paramSwitch_e useRowMatchFinder, |
4726 | U32 enableDedicatedDictSearch, |
4727 | ZSTD_customMem customMem) |
4728 | { |
4729 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; |
4730 | |
4731 | { size_t const workspaceSize = |
4732 | ZSTD_cwksp_alloc_size(size: sizeof(ZSTD_CDict)) + |
4733 | ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + |
4734 | ZSTD_sizeof_matchState(cParams: &cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + |
4735 | (dictLoadMethod == ZSTD_dlm_byRef ? 0 |
4736 | : ZSTD_cwksp_alloc_size(size: ZSTD_cwksp_align(size: dictSize, align: sizeof(void*)))); |
4737 | void* const workspace = ZSTD_customMalloc(size: workspaceSize, customMem); |
4738 | ZSTD_cwksp ws; |
4739 | ZSTD_CDict* cdict; |
4740 | |
4741 | if (!workspace) { |
4742 | ZSTD_customFree(ptr: workspace, customMem); |
4743 | return NULL; |
4744 | } |
4745 | |
4746 | ZSTD_cwksp_init(ws: &ws, start: workspace, size: workspaceSize, isStatic: ZSTD_cwksp_dynamic_alloc); |
4747 | |
4748 | cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(ws: &ws, bytes: sizeof(ZSTD_CDict)); |
4749 | assert(cdict != NULL); |
4750 | ZSTD_cwksp_move(dst: &cdict->workspace, src: &ws); |
4751 | cdict->customMem = customMem; |
4752 | cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ |
4753 | cdict->useRowMatchFinder = useRowMatchFinder; |
4754 | return cdict; |
4755 | } |
4756 | } |
4757 | |
4758 | ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, |
4759 | ZSTD_dictLoadMethod_e dictLoadMethod, |
4760 | ZSTD_dictContentType_e dictContentType, |
4761 | ZSTD_compressionParameters cParams, |
4762 | ZSTD_customMem customMem) |
4763 | { |
4764 | ZSTD_CCtx_params cctxParams; |
4765 | ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); |
4766 | ZSTD_CCtxParams_init(cctxParams: &cctxParams, compressionLevel: 0); |
4767 | cctxParams.cParams = cParams; |
4768 | cctxParams.customMem = customMem; |
4769 | return ZSTD_createCDict_advanced2( |
4770 | dict: dictBuffer, dictSize, |
4771 | dictLoadMethod, dictContentType, |
4772 | cctxParams: &cctxParams, customMem); |
4773 | } |
4774 | |
4775 | ZSTD_CDict* ZSTD_createCDict_advanced2( |
4776 | const void* dict, size_t dictSize, |
4777 | ZSTD_dictLoadMethod_e dictLoadMethod, |
4778 | ZSTD_dictContentType_e dictContentType, |
4779 | const ZSTD_CCtx_params* originalCctxParams, |
4780 | ZSTD_customMem customMem) |
4781 | { |
4782 | ZSTD_CCtx_params cctxParams = *originalCctxParams; |
4783 | ZSTD_compressionParameters cParams; |
4784 | ZSTD_CDict* cdict; |
4785 | |
4786 | DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u" , (unsigned)dictContentType); |
4787 | if (!customMem.customAlloc ^ !customMem.customFree) return NULL; |
4788 | |
4789 | if (cctxParams.enableDedicatedDictSearch) { |
4790 | cParams = ZSTD_dedicatedDictSearch_getCParams( |
4791 | compressionLevel: cctxParams.compressionLevel, dictSize); |
4792 | ZSTD_overrideCParams(cParams: &cParams, overrides: &cctxParams.cParams); |
4793 | } else { |
4794 | cParams = ZSTD_getCParamsFromCCtxParams( |
4795 | CCtxParams: &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, mode: ZSTD_cpm_createCDict); |
4796 | } |
4797 | |
4798 | if (!ZSTD_dedicatedDictSearch_isSupported(cParams: &cParams)) { |
4799 | /* Fall back to non-DDSS params */ |
4800 | cctxParams.enableDedicatedDictSearch = 0; |
4801 | cParams = ZSTD_getCParamsFromCCtxParams( |
4802 | CCtxParams: &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, mode: ZSTD_cpm_createCDict); |
4803 | } |
4804 | |
4805 | DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u" , cctxParams.enableDedicatedDictSearch); |
4806 | cctxParams.cParams = cParams; |
4807 | cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: cctxParams.useRowMatchFinder, cParams: &cParams); |
4808 | |
4809 | cdict = ZSTD_createCDict_advanced_internal(dictSize, |
4810 | dictLoadMethod, cParams: cctxParams.cParams, |
4811 | useRowMatchFinder: cctxParams.useRowMatchFinder, enableDedicatedDictSearch: cctxParams.enableDedicatedDictSearch, |
4812 | customMem); |
4813 | |
4814 | if (ZSTD_isError( code: ZSTD_initCDict_internal(cdict, |
4815 | dictBuffer: dict, dictSize, |
4816 | dictLoadMethod, dictContentType, |
4817 | params: cctxParams) )) { |
4818 | ZSTD_freeCDict(CDict: cdict); |
4819 | return NULL; |
4820 | } |
4821 | |
4822 | return cdict; |
4823 | } |
4824 | |
4825 | ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) |
4826 | { |
4827 | ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, mode: ZSTD_cpm_createCDict); |
4828 | ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer: dict, dictSize, |
4829 | dictLoadMethod: ZSTD_dlm_byCopy, dictContentType: ZSTD_dct_auto, |
4830 | cParams, customMem: ZSTD_defaultCMem); |
4831 | if (cdict) |
4832 | cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; |
4833 | return cdict; |
4834 | } |
4835 | |
4836 | ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) |
4837 | { |
4838 | ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, mode: ZSTD_cpm_createCDict); |
4839 | ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dictBuffer: dict, dictSize, |
4840 | dictLoadMethod: ZSTD_dlm_byRef, dictContentType: ZSTD_dct_auto, |
4841 | cParams, customMem: ZSTD_defaultCMem); |
4842 | if (cdict) |
4843 | cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; |
4844 | return cdict; |
4845 | } |
4846 | |
4847 | size_t ZSTD_freeCDict(ZSTD_CDict* cdict) |
4848 | { |
4849 | if (cdict==NULL) return 0; /* support free on NULL */ |
4850 | { ZSTD_customMem const cMem = cdict->customMem; |
4851 | int cdictInWorkspace = ZSTD_cwksp_owns_buffer(ws: &cdict->workspace, ptr: cdict); |
4852 | ZSTD_cwksp_free(ws: &cdict->workspace, customMem: cMem); |
4853 | if (!cdictInWorkspace) { |
4854 | ZSTD_customFree(ptr: cdict, customMem: cMem); |
4855 | } |
4856 | return 0; |
4857 | } |
4858 | } |
4859 | |
4860 | /*! ZSTD_initStaticCDict_advanced() : |
4861 | * Generate a digested dictionary in provided memory area. |
4862 | * workspace: The memory area to emplace the dictionary into. |
4863 | * Provided pointer must 8-bytes aligned. |
4864 | * It must outlive dictionary usage. |
4865 | * workspaceSize: Use ZSTD_estimateCDictSize() |
4866 | * to determine how large workspace must be. |
4867 | * cParams : use ZSTD_getCParams() to transform a compression level |
4868 | * into its relevants cParams. |
4869 | * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) |
4870 | * Note : there is no corresponding "free" function. |
4871 | * Since workspace was allocated externally, it must be freed externally. |
4872 | */ |
4873 | const ZSTD_CDict* ZSTD_initStaticCDict( |
4874 | void* workspace, size_t workspaceSize, |
4875 | const void* dict, size_t dictSize, |
4876 | ZSTD_dictLoadMethod_e dictLoadMethod, |
4877 | ZSTD_dictContentType_e dictContentType, |
4878 | ZSTD_compressionParameters cParams) |
4879 | { |
4880 | ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: ZSTD_ps_auto, cParams: &cParams); |
4881 | /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ |
4882 | size_t const matchStateSize = ZSTD_sizeof_matchState(cParams: &cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); |
4883 | size_t const neededSize = ZSTD_cwksp_alloc_size(size: sizeof(ZSTD_CDict)) |
4884 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 |
4885 | : ZSTD_cwksp_alloc_size(size: ZSTD_cwksp_align(size: dictSize, align: sizeof(void*)))) |
4886 | + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) |
4887 | + matchStateSize; |
4888 | ZSTD_CDict* cdict; |
4889 | ZSTD_CCtx_params params; |
4890 | |
4891 | if ((size_t)workspace & 7) return NULL; /* 8-aligned */ |
4892 | |
4893 | { |
4894 | ZSTD_cwksp ws; |
4895 | ZSTD_cwksp_init(ws: &ws, start: workspace, size: workspaceSize, isStatic: ZSTD_cwksp_static_alloc); |
4896 | cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(ws: &ws, bytes: sizeof(ZSTD_CDict)); |
4897 | if (cdict == NULL) return NULL; |
4898 | ZSTD_cwksp_move(dst: &cdict->workspace, src: &ws); |
4899 | } |
4900 | |
4901 | DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u" , |
4902 | (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); |
4903 | if (workspaceSize < neededSize) return NULL; |
4904 | |
4905 | ZSTD_CCtxParams_init(cctxParams: ¶ms, compressionLevel: 0); |
4906 | params.cParams = cParams; |
4907 | params.useRowMatchFinder = useRowMatchFinder; |
4908 | cdict->useRowMatchFinder = useRowMatchFinder; |
4909 | |
4910 | if (ZSTD_isError( code: ZSTD_initCDict_internal(cdict, |
4911 | dictBuffer: dict, dictSize, |
4912 | dictLoadMethod, dictContentType, |
4913 | params) )) |
4914 | return NULL; |
4915 | |
4916 | return cdict; |
4917 | } |
4918 | |
4919 | ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) |
4920 | { |
4921 | assert(cdict != NULL); |
4922 | return cdict->matchState.cParams; |
4923 | } |
4924 | |
4925 | /*! ZSTD_getDictID_fromCDict() : |
4926 | * Provides the dictID of the dictionary loaded into `cdict`. |
4927 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. |
4928 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ |
4929 | unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) |
4930 | { |
4931 | if (cdict==NULL) return 0; |
4932 | return cdict->dictID; |
4933 | } |
4934 | |
4935 | /* ZSTD_compressBegin_usingCDict_internal() : |
4936 | * Implementation of various ZSTD_compressBegin_usingCDict* functions. |
4937 | */ |
4938 | static size_t ZSTD_compressBegin_usingCDict_internal( |
4939 | ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, |
4940 | ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) |
4941 | { |
4942 | ZSTD_CCtx_params cctxParams; |
4943 | DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal" ); |
4944 | RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!" ); |
4945 | /* Initialize the cctxParams from the cdict */ |
4946 | { |
4947 | ZSTD_parameters params; |
4948 | params.fParams = fParams; |
4949 | params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF |
4950 | || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER |
4951 | || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN |
4952 | || cdict->compressionLevel == 0 ) ? |
4953 | ZSTD_getCParamsFromCDict(cdict) |
4954 | : ZSTD_getCParams(compressionLevel: cdict->compressionLevel, |
4955 | estimatedSrcSize: pledgedSrcSize, |
4956 | dictSize: cdict->dictContentSize); |
4957 | ZSTD_CCtxParams_init_internal(cctxParams: &cctxParams, params: ¶ms, compressionLevel: cdict->compressionLevel); |
4958 | } |
4959 | /* Increase window log to fit the entire dictionary and source if the |
4960 | * source size is known. Limit the increase to 19, which is the |
4961 | * window log for compression level 1 with the largest source size. |
4962 | */ |
4963 | if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { |
4964 | U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); |
4965 | U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(val: limitedSrcSize - 1) + 1 : 1; |
4966 | cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); |
4967 | } |
4968 | return ZSTD_compressBegin_internal(cctx, |
4969 | NULL, dictSize: 0, dictContentType: ZSTD_dct_auto, dtlm: ZSTD_dtlm_fast, |
4970 | cdict, |
4971 | params: &cctxParams, pledgedSrcSize, |
4972 | zbuff: ZSTDb_not_buffered); |
4973 | } |
4974 | |
4975 | |
4976 | /* ZSTD_compressBegin_usingCDict_advanced() : |
4977 | * This function is DEPRECATED. |
4978 | * cdict must be != NULL */ |
4979 | size_t ZSTD_compressBegin_usingCDict_advanced( |
4980 | ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, |
4981 | ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) |
4982 | { |
4983 | return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); |
4984 | } |
4985 | |
4986 | /* ZSTD_compressBegin_usingCDict() : |
4987 | * cdict must be != NULL */ |
4988 | size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) |
4989 | { |
4990 | ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; |
4991 | return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); |
4992 | } |
4993 | |
4994 | /*! ZSTD_compress_usingCDict_internal(): |
4995 | * Implementation of various ZSTD_compress_usingCDict* functions. |
4996 | */ |
4997 | static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, |
4998 | void* dst, size_t dstCapacity, |
4999 | const void* src, size_t srcSize, |
5000 | const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) |
5001 | { |
5002 | FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), "" ); /* will check if cdict != NULL */ |
5003 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); |
5004 | } |
5005 | |
5006 | /*! ZSTD_compress_usingCDict_advanced(): |
5007 | * This function is DEPRECATED. |
5008 | */ |
5009 | size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, |
5010 | void* dst, size_t dstCapacity, |
5011 | const void* src, size_t srcSize, |
5012 | const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) |
5013 | { |
5014 | return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); |
5015 | } |
5016 | |
5017 | /*! ZSTD_compress_usingCDict() : |
5018 | * Compression using a digested Dictionary. |
5019 | * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. |
5020 | * Note that compression parameters are decided at CDict creation time |
5021 | * while frame parameters are hardcoded */ |
5022 | size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, |
5023 | void* dst, size_t dstCapacity, |
5024 | const void* src, size_t srcSize, |
5025 | const ZSTD_CDict* cdict) |
5026 | { |
5027 | ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; |
5028 | return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); |
5029 | } |
5030 | |
5031 | |
5032 | |
5033 | /* ****************************************************************** |
5034 | * Streaming |
5035 | ********************************************************************/ |
5036 | |
5037 | ZSTD_CStream* ZSTD_createCStream(void) |
5038 | { |
5039 | DEBUGLOG(3, "ZSTD_createCStream" ); |
5040 | return ZSTD_createCStream_advanced(customMem: ZSTD_defaultCMem); |
5041 | } |
5042 | |
5043 | ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) |
5044 | { |
5045 | return ZSTD_initStaticCCtx(workspace, workspaceSize); |
5046 | } |
5047 | |
5048 | ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) |
5049 | { /* CStream and CCtx are now same object */ |
5050 | return ZSTD_createCCtx_advanced(customMem); |
5051 | } |
5052 | |
5053 | size_t ZSTD_freeCStream(ZSTD_CStream* zcs) |
5054 | { |
5055 | return ZSTD_freeCCtx(cctx: zcs); /* same object */ |
5056 | } |
5057 | |
5058 | |
5059 | |
5060 | /*====== Initialization ======*/ |
5061 | |
5062 | size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } |
5063 | |
5064 | size_t ZSTD_CStreamOutSize(void) |
5065 | { |
5066 | return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; |
5067 | } |
5068 | |
5069 | static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) |
5070 | { |
5071 | if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) |
5072 | return ZSTD_cpm_attachDict; |
5073 | else |
5074 | return ZSTD_cpm_noAttachDict; |
5075 | } |
5076 | |
5077 | /* ZSTD_resetCStream(): |
5078 | * pledgedSrcSize == 0 means "unknown" */ |
5079 | size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) |
5080 | { |
5081 | /* temporary : 0 interpreted as "unknown" during transition period. |
5082 | * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. |
5083 | * 0 will be interpreted as "empty" in the future. |
5084 | */ |
5085 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; |
5086 | DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u" , (unsigned)pledgedSrcSize); |
5087 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5088 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "" ); |
5089 | return 0; |
5090 | } |
5091 | |
5092 | /*! ZSTD_initCStream_internal() : |
5093 | * Note : for lib/compress only. Used by zstdmt_compress.c. |
5094 | * Assumption 1 : params are valid |
5095 | * Assumption 2 : either dict, or cdict, is defined, not both */ |
5096 | size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, |
5097 | const void* dict, size_t dictSize, const ZSTD_CDict* cdict, |
5098 | const ZSTD_CCtx_params* params, |
5099 | unsigned long long pledgedSrcSize) |
5100 | { |
5101 | DEBUGLOG(4, "ZSTD_initCStream_internal" ); |
5102 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5103 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "" ); |
5104 | assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); |
5105 | zcs->requestedParams = *params; |
5106 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
5107 | if (dict) { |
5108 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "" ); |
5109 | } else { |
5110 | /* Dictionary is cleared if !cdict */ |
5111 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "" ); |
5112 | } |
5113 | return 0; |
5114 | } |
5115 | |
5116 | /* ZSTD_initCStream_usingCDict_advanced() : |
5117 | * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ |
5118 | size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, |
5119 | const ZSTD_CDict* cdict, |
5120 | ZSTD_frameParameters fParams, |
5121 | unsigned long long pledgedSrcSize) |
5122 | { |
5123 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced" ); |
5124 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5125 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "" ); |
5126 | zcs->requestedParams.fParams = fParams; |
5127 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "" ); |
5128 | return 0; |
5129 | } |
5130 | |
5131 | /* note : cdict must outlive compression session */ |
5132 | size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) |
5133 | { |
5134 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict" ); |
5135 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5136 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "" ); |
5137 | return 0; |
5138 | } |
5139 | |
5140 | |
5141 | /* ZSTD_initCStream_advanced() : |
5142 | * pledgedSrcSize must be exact. |
5143 | * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. |
5144 | * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ |
5145 | size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, |
5146 | const void* dict, size_t dictSize, |
5147 | ZSTD_parameters params, unsigned long long pss) |
5148 | { |
5149 | /* for compatibility with older programs relying on this behavior. |
5150 | * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. |
5151 | * This line will be removed in the future. |
5152 | */ |
5153 | U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; |
5154 | DEBUGLOG(4, "ZSTD_initCStream_advanced" ); |
5155 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5156 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "" ); |
5157 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "" ); |
5158 | ZSTD_CCtxParams_setZstdParams(cctxParams: &zcs->requestedParams, params: ¶ms); |
5159 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "" ); |
5160 | return 0; |
5161 | } |
5162 | |
5163 | size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) |
5164 | { |
5165 | DEBUGLOG(4, "ZSTD_initCStream_usingDict" ); |
5166 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5167 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "" ); |
5168 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "" ); |
5169 | return 0; |
5170 | } |
5171 | |
5172 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) |
5173 | { |
5174 | /* temporary : 0 interpreted as "unknown" during transition period. |
5175 | * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. |
5176 | * 0 will be interpreted as "empty" in the future. |
5177 | */ |
5178 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; |
5179 | DEBUGLOG(4, "ZSTD_initCStream_srcSize" ); |
5180 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5181 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "" ); |
5182 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "" ); |
5183 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "" ); |
5184 | return 0; |
5185 | } |
5186 | |
5187 | size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) |
5188 | { |
5189 | DEBUGLOG(4, "ZSTD_initCStream" ); |
5190 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "" ); |
5191 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "" ); |
5192 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "" ); |
5193 | return 0; |
5194 | } |
5195 | |
5196 | /*====== Compression ======*/ |
5197 | |
5198 | static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) |
5199 | { |
5200 | size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; |
5201 | if (hintInSize==0) hintInSize = cctx->blockSize; |
5202 | return hintInSize; |
5203 | } |
5204 | |
5205 | /* ZSTD_compressStream_generic(): |
5206 | * internal function for all *compressStream*() variants |
5207 | * non-static, because can be called from zstdmt_compress.c |
5208 | * @return : hint size for next input */ |
5209 | static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, |
5210 | ZSTD_outBuffer* output, |
5211 | ZSTD_inBuffer* input, |
5212 | ZSTD_EndDirective const flushMode) |
5213 | { |
5214 | const char* const istart = (const char*)input->src; |
5215 | const char* const iend = input->size != 0 ? istart + input->size : istart; |
5216 | const char* ip = input->pos != 0 ? istart + input->pos : istart; |
5217 | char* const ostart = (char*)output->dst; |
5218 | char* const oend = output->size != 0 ? ostart + output->size : ostart; |
5219 | char* op = output->pos != 0 ? ostart + output->pos : ostart; |
5220 | U32 someMoreWork = 1; |
5221 | |
5222 | /* check expectations */ |
5223 | DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u" , (unsigned)flushMode); |
5224 | if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { |
5225 | assert(zcs->inBuff != NULL); |
5226 | assert(zcs->inBuffSize > 0); |
5227 | } |
5228 | if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { |
5229 | assert(zcs->outBuff != NULL); |
5230 | assert(zcs->outBuffSize > 0); |
5231 | } |
5232 | assert(output->pos <= output->size); |
5233 | assert(input->pos <= input->size); |
5234 | assert((U32)flushMode <= (U32)ZSTD_e_end); |
5235 | |
5236 | while (someMoreWork) { |
5237 | switch(zcs->streamStage) |
5238 | { |
5239 | case zcss_init: |
5240 | RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!" ); |
5241 | |
5242 | case zcss_load: |
5243 | if ( (flushMode == ZSTD_e_end) |
5244 | && ( (size_t)(oend-op) >= ZSTD_compressBound(srcSize: iend-ip) /* Enough output space */ |
5245 | || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ |
5246 | && (zcs->inBuffPos == 0) ) { |
5247 | /* shortcut to compression pass directly into output buffer */ |
5248 | size_t const cSize = ZSTD_compressEnd(cctx: zcs, |
5249 | dst: op, dstCapacity: oend-op, src: ip, srcSize: iend-ip); |
5250 | DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u" , (unsigned)cSize); |
5251 | FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed" ); |
5252 | ip = iend; |
5253 | op += cSize; |
5254 | zcs->frameEnded = 1; |
5255 | ZSTD_CCtx_reset(cctx: zcs, reset: ZSTD_reset_session_only); |
5256 | someMoreWork = 0; break; |
5257 | } |
5258 | /* complete loading into inBuffer in buffered mode */ |
5259 | if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { |
5260 | size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; |
5261 | size_t const loaded = ZSTD_limitCopy( |
5262 | dst: zcs->inBuff + zcs->inBuffPos, dstCapacity: toLoad, |
5263 | src: ip, srcSize: iend-ip); |
5264 | zcs->inBuffPos += loaded; |
5265 | if (loaded != 0) |
5266 | ip += loaded; |
5267 | if ( (flushMode == ZSTD_e_continue) |
5268 | && (zcs->inBuffPos < zcs->inBuffTarget) ) { |
5269 | /* not enough input to fill full block : stop here */ |
5270 | someMoreWork = 0; break; |
5271 | } |
5272 | if ( (flushMode == ZSTD_e_flush) |
5273 | && (zcs->inBuffPos == zcs->inToCompress) ) { |
5274 | /* empty */ |
5275 | someMoreWork = 0; break; |
5276 | } |
5277 | } |
5278 | /* compress current block (note : this stage cannot be stopped in the middle) */ |
5279 | DEBUGLOG(5, "stream compression stage (flushMode==%u)" , flushMode); |
5280 | { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); |
5281 | void* cDst; |
5282 | size_t cSize; |
5283 | size_t oSize = oend-op; |
5284 | size_t const iSize = inputBuffered |
5285 | ? zcs->inBuffPos - zcs->inToCompress |
5286 | : MIN((size_t)(iend - ip), zcs->blockSize); |
5287 | if (oSize >= ZSTD_compressBound(srcSize: iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) |
5288 | cDst = op; /* compress into output buffer, to skip flush stage */ |
5289 | else |
5290 | cDst = zcs->outBuff, oSize = zcs->outBuffSize; |
5291 | if (inputBuffered) { |
5292 | unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); |
5293 | cSize = lastBlock ? |
5294 | ZSTD_compressEnd(cctx: zcs, dst: cDst, dstCapacity: oSize, |
5295 | src: zcs->inBuff + zcs->inToCompress, srcSize: iSize) : |
5296 | ZSTD_compressContinue(cctx: zcs, dst: cDst, dstCapacity: oSize, |
5297 | src: zcs->inBuff + zcs->inToCompress, srcSize: iSize); |
5298 | FORWARD_IF_ERROR(cSize, "%s" , lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed" ); |
5299 | zcs->frameEnded = lastBlock; |
5300 | /* prepare next block */ |
5301 | zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; |
5302 | if (zcs->inBuffTarget > zcs->inBuffSize) |
5303 | zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; |
5304 | DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u" , |
5305 | (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); |
5306 | if (!lastBlock) |
5307 | assert(zcs->inBuffTarget <= zcs->inBuffSize); |
5308 | zcs->inToCompress = zcs->inBuffPos; |
5309 | } else { |
5310 | unsigned const lastBlock = (ip + iSize == iend); |
5311 | assert(flushMode == ZSTD_e_end /* Already validated */); |
5312 | cSize = lastBlock ? |
5313 | ZSTD_compressEnd(cctx: zcs, dst: cDst, dstCapacity: oSize, src: ip, srcSize: iSize) : |
5314 | ZSTD_compressContinue(cctx: zcs, dst: cDst, dstCapacity: oSize, src: ip, srcSize: iSize); |
5315 | /* Consume the input prior to error checking to mirror buffered mode. */ |
5316 | if (iSize > 0) |
5317 | ip += iSize; |
5318 | FORWARD_IF_ERROR(cSize, "%s" , lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed" ); |
5319 | zcs->frameEnded = lastBlock; |
5320 | if (lastBlock) |
5321 | assert(ip == iend); |
5322 | } |
5323 | if (cDst == op) { /* no need to flush */ |
5324 | op += cSize; |
5325 | if (zcs->frameEnded) { |
5326 | DEBUGLOG(5, "Frame completed directly in outBuffer" ); |
5327 | someMoreWork = 0; |
5328 | ZSTD_CCtx_reset(cctx: zcs, reset: ZSTD_reset_session_only); |
5329 | } |
5330 | break; |
5331 | } |
5332 | zcs->outBuffContentSize = cSize; |
5333 | zcs->outBuffFlushedSize = 0; |
5334 | zcs->streamStage = zcss_flush; /* pass-through to flush stage */ |
5335 | } |
5336 | ZSTD_FALLTHROUGH; |
5337 | case zcss_flush: |
5338 | DEBUGLOG(5, "flush stage" ); |
5339 | assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); |
5340 | { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; |
5341 | size_t const flushed = ZSTD_limitCopy(dst: op, dstCapacity: (size_t)(oend-op), |
5342 | src: zcs->outBuff + zcs->outBuffFlushedSize, srcSize: toFlush); |
5343 | DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u" , |
5344 | (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); |
5345 | if (flushed) |
5346 | op += flushed; |
5347 | zcs->outBuffFlushedSize += flushed; |
5348 | if (toFlush!=flushed) { |
5349 | /* flush not fully completed, presumably because dst is too small */ |
5350 | assert(op==oend); |
5351 | someMoreWork = 0; |
5352 | break; |
5353 | } |
5354 | zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; |
5355 | if (zcs->frameEnded) { |
5356 | DEBUGLOG(5, "Frame completed on flush" ); |
5357 | someMoreWork = 0; |
5358 | ZSTD_CCtx_reset(cctx: zcs, reset: ZSTD_reset_session_only); |
5359 | break; |
5360 | } |
5361 | zcs->streamStage = zcss_load; |
5362 | break; |
5363 | } |
5364 | |
5365 | default: /* impossible */ |
5366 | assert(0); |
5367 | } |
5368 | } |
5369 | |
5370 | input->pos = ip - istart; |
5371 | output->pos = op - ostart; |
5372 | if (zcs->frameEnded) return 0; |
5373 | return ZSTD_nextInputSizeHint(cctx: zcs); |
5374 | } |
5375 | |
5376 | static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) |
5377 | { |
5378 | return ZSTD_nextInputSizeHint(cctx); |
5379 | |
5380 | } |
5381 | |
5382 | size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) |
5383 | { |
5384 | FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "" ); |
5385 | return ZSTD_nextInputSizeHint_MTorST(cctx: zcs); |
5386 | } |
5387 | |
5388 | /* After a compression call set the expected input/output buffer. |
5389 | * This is validated at the start of the next compression call. |
5390 | */ |
5391 | static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) |
5392 | { |
5393 | if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { |
5394 | cctx->expectedInBuffer = *input; |
5395 | } |
5396 | if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { |
5397 | cctx->expectedOutBufferSize = output->size - output->pos; |
5398 | } |
5399 | } |
5400 | |
5401 | /* Validate that the input/output buffers match the expectations set by |
5402 | * ZSTD_setBufferExpectations. |
5403 | */ |
5404 | static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, |
5405 | ZSTD_outBuffer const* output, |
5406 | ZSTD_inBuffer const* input, |
5407 | ZSTD_EndDirective endOp) |
5408 | { |
5409 | if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { |
5410 | ZSTD_inBuffer const expect = cctx->expectedInBuffer; |
5411 | if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) |
5412 | RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!" ); |
5413 | if (endOp != ZSTD_e_end) |
5414 | RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!" ); |
5415 | } |
5416 | if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { |
5417 | size_t const outBufferSize = output->size - output->pos; |
5418 | if (cctx->expectedOutBufferSize != outBufferSize) |
5419 | RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!" ); |
5420 | } |
5421 | return 0; |
5422 | } |
5423 | |
5424 | static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, |
5425 | ZSTD_EndDirective endOp, |
5426 | size_t inSize) { |
5427 | ZSTD_CCtx_params params = cctx->requestedParams; |
5428 | ZSTD_prefixDict const prefixDict = cctx->prefixDict; |
5429 | FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , "" ); /* Init the local dict if present. */ |
5430 | ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ |
5431 | assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ |
5432 | if (cctx->cdict && !cctx->localDict.cdict) { |
5433 | /* Let the cdict's compression level take priority over the requested params. |
5434 | * But do not take the cdict's compression level if the "cdict" is actually a localDict |
5435 | * generated from ZSTD_initLocalDict(). |
5436 | */ |
5437 | params.compressionLevel = cctx->cdict->compressionLevel; |
5438 | } |
5439 | DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage" ); |
5440 | if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ |
5441 | { |
5442 | size_t const dictSize = prefixDict.dict |
5443 | ? prefixDict.dictSize |
5444 | : (cctx->cdict ? cctx->cdict->dictContentSize : 0); |
5445 | ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cdict: cctx->cdict, params: ¶ms, pledgedSrcSize: cctx->pledgedSrcSizePlusOne - 1); |
5446 | params.cParams = ZSTD_getCParamsFromCCtxParams( |
5447 | CCtxParams: ¶ms, srcSizeHint: cctx->pledgedSrcSizePlusOne-1, |
5448 | dictSize, mode); |
5449 | } |
5450 | |
5451 | params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(mode: params.useBlockSplitter, cParams: ¶ms.cParams); |
5452 | params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(mode: params.ldmParams.enableLdm, cParams: ¶ms.cParams); |
5453 | params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(mode: params.useRowMatchFinder, cParams: ¶ms.cParams); |
5454 | |
5455 | { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; |
5456 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); |
5457 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, |
5458 | prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, |
5459 | cctx->cdict, |
5460 | ¶ms, pledgedSrcSize, |
5461 | ZSTDb_buffered) , "" ); |
5462 | assert(cctx->appliedParams.nbWorkers == 0); |
5463 | cctx->inToCompress = 0; |
5464 | cctx->inBuffPos = 0; |
5465 | if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { |
5466 | /* for small input: avoid automatic flush on reaching end of block, since |
5467 | * it would require to add a 3-bytes null block to end frame |
5468 | */ |
5469 | cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); |
5470 | } else { |
5471 | cctx->inBuffTarget = 0; |
5472 | } |
5473 | cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; |
5474 | cctx->streamStage = zcss_load; |
5475 | cctx->frameEnded = 0; |
5476 | } |
5477 | return 0; |
5478 | } |
5479 | |
5480 | size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, |
5481 | ZSTD_outBuffer* output, |
5482 | ZSTD_inBuffer* input, |
5483 | ZSTD_EndDirective endOp) |
5484 | { |
5485 | DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u " , (unsigned)endOp); |
5486 | /* check conditions */ |
5487 | RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer" ); |
5488 | RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer" ); |
5489 | RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective" ); |
5490 | assert(cctx != NULL); |
5491 | |
5492 | /* transparent initialization stage */ |
5493 | if (cctx->streamStage == zcss_init) { |
5494 | FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed" ); |
5495 | ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ |
5496 | } |
5497 | /* end of transparent initialization stage */ |
5498 | |
5499 | FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers" ); |
5500 | /* compression stage */ |
5501 | FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "" ); |
5502 | DEBUGLOG(5, "completed ZSTD_compressStream2" ); |
5503 | ZSTD_setBufferExpectations(cctx, output, input); |
5504 | return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ |
5505 | } |
5506 | |
5507 | size_t ZSTD_compressStream2_simpleArgs ( |
5508 | ZSTD_CCtx* cctx, |
5509 | void* dst, size_t dstCapacity, size_t* dstPos, |
5510 | const void* src, size_t srcSize, size_t* srcPos, |
5511 | ZSTD_EndDirective endOp) |
5512 | { |
5513 | ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; |
5514 | ZSTD_inBuffer input = { src, srcSize, *srcPos }; |
5515 | /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ |
5516 | size_t const cErr = ZSTD_compressStream2(cctx, output: &output, input: &input, endOp); |
5517 | *dstPos = output.pos; |
5518 | *srcPos = input.pos; |
5519 | return cErr; |
5520 | } |
5521 | |
5522 | size_t ZSTD_compress2(ZSTD_CCtx* cctx, |
5523 | void* dst, size_t dstCapacity, |
5524 | const void* src, size_t srcSize) |
5525 | { |
5526 | ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; |
5527 | ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; |
5528 | DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)" , (unsigned)srcSize); |
5529 | ZSTD_CCtx_reset(cctx, reset: ZSTD_reset_session_only); |
5530 | /* Enable stable input/output buffers. */ |
5531 | cctx->requestedParams.inBufferMode = ZSTD_bm_stable; |
5532 | cctx->requestedParams.outBufferMode = ZSTD_bm_stable; |
5533 | { size_t oPos = 0; |
5534 | size_t iPos = 0; |
5535 | size_t const result = ZSTD_compressStream2_simpleArgs(cctx, |
5536 | dst, dstCapacity, dstPos: &oPos, |
5537 | src, srcSize, srcPos: &iPos, |
5538 | endOp: ZSTD_e_end); |
5539 | /* Reset to the original values. */ |
5540 | cctx->requestedParams.inBufferMode = originalInBufferMode; |
5541 | cctx->requestedParams.outBufferMode = originalOutBufferMode; |
5542 | FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed" ); |
5543 | if (result != 0) { /* compression not completed, due to lack of output space */ |
5544 | assert(oPos == dstCapacity); |
5545 | RETURN_ERROR(dstSize_tooSmall, "" ); |
5546 | } |
5547 | assert(iPos == srcSize); /* all input is expected consumed */ |
5548 | return oPos; |
5549 | } |
5550 | } |
5551 | |
5552 | typedef struct { |
5553 | U32 idx; /* Index in array of ZSTD_Sequence */ |
5554 | U32 posInSequence; /* Position within sequence at idx */ |
5555 | size_t posInSrc; /* Number of bytes given by sequences provided so far */ |
5556 | } ZSTD_sequencePosition; |
5557 | |
5558 | /* ZSTD_validateSequence() : |
5559 | * @offCode : is presumed to follow format required by ZSTD_storeSeq() |
5560 | * @returns a ZSTD error code if sequence is not valid |
5561 | */ |
5562 | static size_t |
5563 | ZSTD_validateSequence(U32 offCode, U32 matchLength, |
5564 | size_t posInSrc, U32 windowLog, size_t dictSize) |
5565 | { |
5566 | U32 const windowSize = 1 << windowLog; |
5567 | /* posInSrc represents the amount of data the decoder would decode up to this point. |
5568 | * As long as the amount of data decoded is less than or equal to window size, offsets may be |
5569 | * larger than the total length of output decoded in order to reference the dict, even larger than |
5570 | * window size. After output surpasses windowSize, we're limited to windowSize offsets again. |
5571 | */ |
5572 | size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; |
5573 | RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!" ); |
5574 | RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small" ); |
5575 | return 0; |
5576 | } |
5577 | |
5578 | /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ |
5579 | static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) |
5580 | { |
5581 | U32 offCode = STORE_OFFSET(rawOffset); |
5582 | |
5583 | if (!ll0 && rawOffset == rep[0]) { |
5584 | offCode = STORE_REPCODE_1; |
5585 | } else if (rawOffset == rep[1]) { |
5586 | offCode = STORE_REPCODE(2 - ll0); |
5587 | } else if (rawOffset == rep[2]) { |
5588 | offCode = STORE_REPCODE(3 - ll0); |
5589 | } else if (ll0 && rawOffset == rep[0] - 1) { |
5590 | offCode = STORE_REPCODE_3; |
5591 | } |
5592 | return offCode; |
5593 | } |
5594 | |
5595 | /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of |
5596 | * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. |
5597 | */ |
5598 | static size_t |
5599 | ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, |
5600 | ZSTD_sequencePosition* seqPos, |
5601 | const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, |
5602 | const void* src, size_t blockSize) |
5603 | { |
5604 | U32 idx = seqPos->idx; |
5605 | BYTE const* ip = (BYTE const*)(src); |
5606 | const BYTE* const iend = ip + blockSize; |
5607 | repcodes_t updatedRepcodes; |
5608 | U32 dictSize; |
5609 | |
5610 | if (cctx->cdict) { |
5611 | dictSize = (U32)cctx->cdict->dictContentSize; |
5612 | } else if (cctx->prefixDict.dict) { |
5613 | dictSize = (U32)cctx->prefixDict.dictSize; |
5614 | } else { |
5615 | dictSize = 0; |
5616 | } |
5617 | ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); |
5618 | for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { |
5619 | U32 const litLength = inSeqs[idx].litLength; |
5620 | U32 const ll0 = (litLength == 0); |
5621 | U32 const matchLength = inSeqs[idx].matchLength; |
5622 | U32 const offCode = ZSTD_finalizeOffCode(rawOffset: inSeqs[idx].offset, rep: updatedRepcodes.rep, ll0); |
5623 | ZSTD_updateRep(rep: updatedRepcodes.rep, offBase_minus1: offCode, ll0); |
5624 | |
5625 | DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)" , offCode, matchLength, litLength); |
5626 | if (cctx->appliedParams.validateSequences) { |
5627 | seqPos->posInSrc += litLength + matchLength; |
5628 | FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, |
5629 | cctx->appliedParams.cParams.windowLog, dictSize), |
5630 | "Sequence validation failed" ); |
5631 | } |
5632 | RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, |
5633 | "Not enough memory allocated. Try adjusting ZSTD_c_minMatch." ); |
5634 | ZSTD_storeSeq(seqStorePtr: &cctx->seqStore, litLength, literals: ip, litLimit: iend, offBase_minus1: offCode, matchLength); |
5635 | ip += matchLength + litLength; |
5636 | } |
5637 | ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); |
5638 | |
5639 | if (inSeqs[idx].litLength) { |
5640 | DEBUGLOG(6, "Storing last literals of size: %u" , inSeqs[idx].litLength); |
5641 | ZSTD_storeLastLiterals(seqStorePtr: &cctx->seqStore, anchor: ip, lastLLSize: inSeqs[idx].litLength); |
5642 | ip += inSeqs[idx].litLength; |
5643 | seqPos->posInSrc += inSeqs[idx].litLength; |
5644 | } |
5645 | RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!" ); |
5646 | seqPos->idx = idx+1; |
5647 | return 0; |
5648 | } |
5649 | |
5650 | /* Returns the number of bytes to move the current read position back by. Only non-zero |
5651 | * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something |
5652 | * went wrong. |
5653 | * |
5654 | * This function will attempt to scan through blockSize bytes represented by the sequences |
5655 | * in inSeqs, storing any (partial) sequences. |
5656 | * |
5657 | * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to |
5658 | * avoid splitting a match, or to avoid splitting a match such that it would produce a match |
5659 | * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. |
5660 | */ |
5661 | static size_t |
5662 | ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, |
5663 | const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, |
5664 | const void* src, size_t blockSize) |
5665 | { |
5666 | U32 idx = seqPos->idx; |
5667 | U32 startPosInSequence = seqPos->posInSequence; |
5668 | U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; |
5669 | size_t dictSize; |
5670 | BYTE const* ip = (BYTE const*)(src); |
5671 | BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ |
5672 | repcodes_t updatedRepcodes; |
5673 | U32 bytesAdjustment = 0; |
5674 | U32 finalMatchSplit = 0; |
5675 | |
5676 | if (cctx->cdict) { |
5677 | dictSize = cctx->cdict->dictContentSize; |
5678 | } else if (cctx->prefixDict.dict) { |
5679 | dictSize = cctx->prefixDict.dictSize; |
5680 | } else { |
5681 | dictSize = 0; |
5682 | } |
5683 | DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu" , idx, startPosInSequence, blockSize); |
5684 | DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)" , idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); |
5685 | ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); |
5686 | while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { |
5687 | const ZSTD_Sequence currSeq = inSeqs[idx]; |
5688 | U32 litLength = currSeq.litLength; |
5689 | U32 matchLength = currSeq.matchLength; |
5690 | U32 const rawOffset = currSeq.offset; |
5691 | U32 offCode; |
5692 | |
5693 | /* Modify the sequence depending on where endPosInSequence lies */ |
5694 | if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { |
5695 | if (startPosInSequence >= litLength) { |
5696 | startPosInSequence -= litLength; |
5697 | litLength = 0; |
5698 | matchLength -= startPosInSequence; |
5699 | } else { |
5700 | litLength -= startPosInSequence; |
5701 | } |
5702 | /* Move to the next sequence */ |
5703 | endPosInSequence -= currSeq.litLength + currSeq.matchLength; |
5704 | startPosInSequence = 0; |
5705 | idx++; |
5706 | } else { |
5707 | /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence |
5708 | does not reach the end of the match. So, we have to split the sequence */ |
5709 | DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u" , |
5710 | currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); |
5711 | if (endPosInSequence > litLength) { |
5712 | U32 firstHalfMatchLength; |
5713 | litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; |
5714 | firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; |
5715 | if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { |
5716 | /* Only ever split the match if it is larger than the block size */ |
5717 | U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; |
5718 | if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { |
5719 | /* Move the endPosInSequence backward so that it creates match of minMatch length */ |
5720 | endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; |
5721 | bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; |
5722 | firstHalfMatchLength -= bytesAdjustment; |
5723 | } |
5724 | matchLength = firstHalfMatchLength; |
5725 | /* Flag that we split the last match - after storing the sequence, exit the loop, |
5726 | but keep the value of endPosInSequence */ |
5727 | finalMatchSplit = 1; |
5728 | } else { |
5729 | /* Move the position in sequence backwards so that we don't split match, and break to store |
5730 | * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence |
5731 | * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so |
5732 | * would cause the first half of the match to be too small |
5733 | */ |
5734 | bytesAdjustment = endPosInSequence - currSeq.litLength; |
5735 | endPosInSequence = currSeq.litLength; |
5736 | break; |
5737 | } |
5738 | } else { |
5739 | /* This sequence ends inside the literals, break to store the last literals */ |
5740 | break; |
5741 | } |
5742 | } |
5743 | /* Check if this offset can be represented with a repcode */ |
5744 | { U32 const ll0 = (litLength == 0); |
5745 | offCode = ZSTD_finalizeOffCode(rawOffset, rep: updatedRepcodes.rep, ll0); |
5746 | ZSTD_updateRep(rep: updatedRepcodes.rep, offBase_minus1: offCode, ll0); |
5747 | } |
5748 | |
5749 | if (cctx->appliedParams.validateSequences) { |
5750 | seqPos->posInSrc += litLength + matchLength; |
5751 | FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, |
5752 | cctx->appliedParams.cParams.windowLog, dictSize), |
5753 | "Sequence validation failed" ); |
5754 | } |
5755 | DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)" , offCode, matchLength, litLength); |
5756 | RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, |
5757 | "Not enough memory allocated. Try adjusting ZSTD_c_minMatch." ); |
5758 | ZSTD_storeSeq(seqStorePtr: &cctx->seqStore, litLength, literals: ip, litLimit: iend, offBase_minus1: offCode, matchLength); |
5759 | ip += matchLength + litLength; |
5760 | } |
5761 | DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)" , idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); |
5762 | assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); |
5763 | seqPos->idx = idx; |
5764 | seqPos->posInSequence = endPosInSequence; |
5765 | ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); |
5766 | |
5767 | iend -= bytesAdjustment; |
5768 | if (ip != iend) { |
5769 | /* Store any last literals */ |
5770 | U32 lastLLSize = (U32)(iend - ip); |
5771 | assert(ip <= iend); |
5772 | DEBUGLOG(6, "Storing last literals of size: %u" , lastLLSize); |
5773 | ZSTD_storeLastLiterals(seqStorePtr: &cctx->seqStore, anchor: ip, lastLLSize); |
5774 | seqPos->posInSrc += lastLLSize; |
5775 | } |
5776 | |
5777 | return bytesAdjustment; |
5778 | } |
5779 | |
5780 | typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, |
5781 | const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, |
5782 | const void* src, size_t blockSize); |
5783 | static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) |
5784 | { |
5785 | ZSTD_sequenceCopier sequenceCopier = NULL; |
5786 | assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); |
5787 | if (mode == ZSTD_sf_explicitBlockDelimiters) { |
5788 | return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; |
5789 | } else if (mode == ZSTD_sf_noBlockDelimiters) { |
5790 | return ZSTD_copySequencesToSeqStoreNoBlockDelim; |
5791 | } |
5792 | assert(sequenceCopier != NULL); |
5793 | return sequenceCopier; |
5794 | } |
5795 | |
5796 | /* Compress, block-by-block, all of the sequences given. |
5797 | * |
5798 | * Returns the cumulative size of all compressed blocks (including their headers), |
5799 | * otherwise a ZSTD error. |
5800 | */ |
5801 | static size_t |
5802 | ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, |
5803 | void* dst, size_t dstCapacity, |
5804 | const ZSTD_Sequence* inSeqs, size_t inSeqsSize, |
5805 | const void* src, size_t srcSize) |
5806 | { |
5807 | size_t cSize = 0; |
5808 | U32 lastBlock; |
5809 | size_t blockSize; |
5810 | size_t compressedSeqsSize; |
5811 | size_t remaining = srcSize; |
5812 | ZSTD_sequencePosition seqPos = {0, 0, 0}; |
5813 | |
5814 | BYTE const* ip = (BYTE const*)src; |
5815 | BYTE* op = (BYTE*)dst; |
5816 | ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(mode: cctx->appliedParams.blockDelimiters); |
5817 | |
5818 | DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu" , srcSize, inSeqsSize); |
5819 | /* Special case: empty frame */ |
5820 | if (remaining == 0) { |
5821 | U32 const = 1 /* last block */ + (((U32)bt_raw)<<1); |
5822 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header" ); |
5823 | MEM_writeLE32(memPtr: op, val32: cBlockHeader24); |
5824 | op += ZSTD_blockHeaderSize; |
5825 | dstCapacity -= ZSTD_blockHeaderSize; |
5826 | cSize += ZSTD_blockHeaderSize; |
5827 | } |
5828 | |
5829 | while (remaining) { |
5830 | size_t cBlockSize; |
5831 | size_t additionalByteAdjustment; |
5832 | lastBlock = remaining <= cctx->blockSize; |
5833 | blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; |
5834 | ZSTD_resetSeqStore(ssPtr: &cctx->seqStore); |
5835 | DEBUGLOG(4, "Working on new block. Blocksize: %zu" , blockSize); |
5836 | |
5837 | additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); |
5838 | FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy" ); |
5839 | blockSize -= additionalByteAdjustment; |
5840 | |
5841 | /* If blocks are too small, emit as a nocompress block */ |
5842 | if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { |
5843 | cBlockSize = ZSTD_noCompressBlock(dst: op, dstCapacity, src: ip, srcSize: blockSize, lastBlock); |
5844 | FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed" ); |
5845 | DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu" , cBlockSize); |
5846 | cSize += cBlockSize; |
5847 | ip += blockSize; |
5848 | op += cBlockSize; |
5849 | remaining -= blockSize; |
5850 | dstCapacity -= cBlockSize; |
5851 | continue; |
5852 | } |
5853 | |
5854 | compressedSeqsSize = ZSTD_entropyCompressSeqStore(seqStorePtr: &cctx->seqStore, |
5855 | prevEntropy: &cctx->blockState.prevCBlock->entropy, nextEntropy: &cctx->blockState.nextCBlock->entropy, |
5856 | cctxParams: &cctx->appliedParams, |
5857 | dst: op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity: dstCapacity - ZSTD_blockHeaderSize, |
5858 | srcSize: blockSize, |
5859 | entropyWorkspace: cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, |
5860 | bmi2: cctx->bmi2); |
5861 | FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed" ); |
5862 | DEBUGLOG(4, "Compressed sequences size: %zu" , compressedSeqsSize); |
5863 | |
5864 | if (!cctx->isFirstBlock && |
5865 | ZSTD_maybeRLE(seqStore: &cctx->seqStore) && |
5866 | ZSTD_isRLE(src: (BYTE const*)src, length: srcSize)) { |
5867 | /* We don't want to emit our first block as a RLE even if it qualifies because |
5868 | * doing so will cause the decoder (cli only) to throw a "should consume all input error." |
5869 | * This is only an issue for zstd <= v1.4.3 |
5870 | */ |
5871 | compressedSeqsSize = 1; |
5872 | } |
5873 | |
5874 | if (compressedSeqsSize == 0) { |
5875 | /* ZSTD_noCompressBlock writes the block header as well */ |
5876 | cBlockSize = ZSTD_noCompressBlock(dst: op, dstCapacity, src: ip, srcSize: blockSize, lastBlock); |
5877 | FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed" ); |
5878 | DEBUGLOG(4, "Writing out nocompress block, size: %zu" , cBlockSize); |
5879 | } else if (compressedSeqsSize == 1) { |
5880 | cBlockSize = ZSTD_rleCompressBlock(dst: op, dstCapacity, src: *ip, srcSize: blockSize, lastBlock); |
5881 | FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed" ); |
5882 | DEBUGLOG(4, "Writing out RLE block, size: %zu" , cBlockSize); |
5883 | } else { |
5884 | U32 ; |
5885 | /* Error checking and repcodes update */ |
5886 | ZSTD_blockState_confirmRepcodesAndEntropyTables(bs: &cctx->blockState); |
5887 | if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) |
5888 | cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; |
5889 | |
5890 | /* Write block header into beginning of block*/ |
5891 | cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); |
5892 | MEM_writeLE24(memPtr: op, val: cBlockHeader); |
5893 | cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; |
5894 | DEBUGLOG(4, "Writing out compressed block, size: %zu" , cBlockSize); |
5895 | } |
5896 | |
5897 | cSize += cBlockSize; |
5898 | DEBUGLOG(4, "cSize running total: %zu" , cSize); |
5899 | |
5900 | if (lastBlock) { |
5901 | break; |
5902 | } else { |
5903 | ip += blockSize; |
5904 | op += cBlockSize; |
5905 | remaining -= blockSize; |
5906 | dstCapacity -= cBlockSize; |
5907 | cctx->isFirstBlock = 0; |
5908 | } |
5909 | } |
5910 | |
5911 | return cSize; |
5912 | } |
5913 | |
5914 | size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, |
5915 | const ZSTD_Sequence* inSeqs, size_t inSeqsSize, |
5916 | const void* src, size_t srcSize) |
5917 | { |
5918 | BYTE* op = (BYTE*)dst; |
5919 | size_t cSize = 0; |
5920 | size_t compressedBlocksSize = 0; |
5921 | size_t = 0; |
5922 | |
5923 | /* Transparent initialization stage, same as compressStream2() */ |
5924 | DEBUGLOG(3, "ZSTD_compressSequences()" ); |
5925 | assert(cctx != NULL); |
5926 | FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed" ); |
5927 | /* Begin writing output, starting with frame header */ |
5928 | frameHeaderSize = ZSTD_writeFrameHeader(dst: op, dstCapacity, params: &cctx->appliedParams, pledgedSrcSize: srcSize, dictID: cctx->dictID); |
5929 | op += frameHeaderSize; |
5930 | dstCapacity -= frameHeaderSize; |
5931 | cSize += frameHeaderSize; |
5932 | if (cctx->appliedParams.fParams.checksumFlag && srcSize) { |
5933 | xxh64_update(state: &cctx->xxhState, input: src, length: srcSize); |
5934 | } |
5935 | /* cSize includes block header size and compressed sequences size */ |
5936 | compressedBlocksSize = ZSTD_compressSequences_internal(cctx, |
5937 | dst: op, dstCapacity, |
5938 | inSeqs, inSeqsSize, |
5939 | src, srcSize); |
5940 | FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!" ); |
5941 | cSize += compressedBlocksSize; |
5942 | dstCapacity -= compressedBlocksSize; |
5943 | |
5944 | if (cctx->appliedParams.fParams.checksumFlag) { |
5945 | U32 const checksum = (U32) xxh64_digest(state: &cctx->xxhState); |
5946 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum" ); |
5947 | DEBUGLOG(4, "Write checksum : %08X" , (unsigned)checksum); |
5948 | MEM_writeLE32(memPtr: (char*)dst + cSize, val32: checksum); |
5949 | cSize += 4; |
5950 | } |
5951 | |
5952 | DEBUGLOG(3, "Final compressed size: %zu" , cSize); |
5953 | return cSize; |
5954 | } |
5955 | |
5956 | /*====== Finalize ======*/ |
5957 | |
5958 | /*! ZSTD_flushStream() : |
5959 | * @return : amount of data remaining to flush */ |
5960 | size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) |
5961 | { |
5962 | ZSTD_inBuffer input = { NULL, 0, 0 }; |
5963 | return ZSTD_compressStream2(cctx: zcs, output, input: &input, endOp: ZSTD_e_flush); |
5964 | } |
5965 | |
5966 | |
5967 | size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) |
5968 | { |
5969 | ZSTD_inBuffer input = { NULL, 0, 0 }; |
5970 | size_t const remainingToFlush = ZSTD_compressStream2(cctx: zcs, output, input: &input, endOp: ZSTD_e_end); |
5971 | FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed" ); |
5972 | if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ |
5973 | /* single thread mode : attempt to calculate remaining to flush more precisely */ |
5974 | { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; |
5975 | size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); |
5976 | size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; |
5977 | DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u" , (unsigned)toFlush); |
5978 | return toFlush; |
5979 | } |
5980 | } |
5981 | |
5982 | |
5983 | /*-===== Pre-defined compression levels =====-*/ |
5984 | #include "clevels.h" |
5985 | |
5986 | int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } |
5987 | int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } |
5988 | int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } |
5989 | |
5990 | static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) |
5991 | { |
5992 | ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint: 0, dictSize, mode: ZSTD_cpm_createCDict); |
5993 | switch (cParams.strategy) { |
5994 | case ZSTD_fast: |
5995 | case ZSTD_dfast: |
5996 | break; |
5997 | case ZSTD_greedy: |
5998 | case ZSTD_lazy: |
5999 | case ZSTD_lazy2: |
6000 | cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; |
6001 | break; |
6002 | case ZSTD_btlazy2: |
6003 | case ZSTD_btopt: |
6004 | case ZSTD_btultra: |
6005 | case ZSTD_btultra2: |
6006 | break; |
6007 | } |
6008 | return cParams; |
6009 | } |
6010 | |
6011 | static int ZSTD_dedicatedDictSearch_isSupported( |
6012 | ZSTD_compressionParameters const* cParams) |
6013 | { |
6014 | return (cParams->strategy >= ZSTD_greedy) |
6015 | && (cParams->strategy <= ZSTD_lazy2) |
6016 | && (cParams->hashLog > cParams->chainLog) |
6017 | && (cParams->chainLog <= 24); |
6018 | } |
6019 | |
6020 | /* |
6021 | * Reverses the adjustment applied to cparams when enabling dedicated dict |
6022 | * search. This is used to recover the params set to be used in the working |
6023 | * context. (Otherwise, those tables would also grow.) |
6024 | */ |
6025 | static void ZSTD_dedicatedDictSearch_revertCParams( |
6026 | ZSTD_compressionParameters* cParams) { |
6027 | switch (cParams->strategy) { |
6028 | case ZSTD_fast: |
6029 | case ZSTD_dfast: |
6030 | break; |
6031 | case ZSTD_greedy: |
6032 | case ZSTD_lazy: |
6033 | case ZSTD_lazy2: |
6034 | cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; |
6035 | if (cParams->hashLog < ZSTD_HASHLOG_MIN) { |
6036 | cParams->hashLog = ZSTD_HASHLOG_MIN; |
6037 | } |
6038 | break; |
6039 | case ZSTD_btlazy2: |
6040 | case ZSTD_btopt: |
6041 | case ZSTD_btultra: |
6042 | case ZSTD_btultra2: |
6043 | break; |
6044 | } |
6045 | } |
6046 | |
6047 | static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) |
6048 | { |
6049 | switch (mode) { |
6050 | case ZSTD_cpm_unknown: |
6051 | case ZSTD_cpm_noAttachDict: |
6052 | case ZSTD_cpm_createCDict: |
6053 | break; |
6054 | case ZSTD_cpm_attachDict: |
6055 | dictSize = 0; |
6056 | break; |
6057 | default: |
6058 | assert(0); |
6059 | break; |
6060 | } |
6061 | { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; |
6062 | size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; |
6063 | return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; |
6064 | } |
6065 | } |
6066 | |
6067 | /*! ZSTD_getCParams_internal() : |
6068 | * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. |
6069 | * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. |
6070 | * Use dictSize == 0 for unknown or unused. |
6071 | * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ |
6072 | static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) |
6073 | { |
6074 | U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); |
6075 | U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); |
6076 | int row; |
6077 | DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)" , compressionLevel); |
6078 | |
6079 | /* row */ |
6080 | if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ |
6081 | else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ |
6082 | else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; |
6083 | else row = compressionLevel; |
6084 | |
6085 | { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; |
6086 | DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u" , tableID, row, (U32)cp.strategy); |
6087 | /* acceleration factor */ |
6088 | if (compressionLevel < 0) { |
6089 | int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); |
6090 | cp.targetLength = (unsigned)(-clampedCompressionLevel); |
6091 | } |
6092 | /* refine parameters based on srcSize & dictSize */ |
6093 | return ZSTD_adjustCParams_internal(cPar: cp, srcSize: srcSizeHint, dictSize, mode); |
6094 | } |
6095 | } |
6096 | |
6097 | /*! ZSTD_getCParams() : |
6098 | * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. |
6099 | * Size values are optional, provide 0 if not known or unused */ |
6100 | ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) |
6101 | { |
6102 | if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; |
6103 | return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode: ZSTD_cpm_unknown); |
6104 | } |
6105 | |
6106 | /*! ZSTD_getParams() : |
6107 | * same idea as ZSTD_getCParams() |
6108 | * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). |
6109 | * Fields of `ZSTD_frameParameters` are set to default values */ |
6110 | static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { |
6111 | ZSTD_parameters params; |
6112 | ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); |
6113 | DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)" , compressionLevel); |
6114 | ZSTD_memset(¶ms, 0, sizeof(params)); |
6115 | params.cParams = cParams; |
6116 | params.fParams.contentSizeFlag = 1; |
6117 | return params; |
6118 | } |
6119 | |
6120 | /*! ZSTD_getParams() : |
6121 | * same idea as ZSTD_getCParams() |
6122 | * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). |
6123 | * Fields of `ZSTD_frameParameters` are set to default values */ |
6124 | ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { |
6125 | if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; |
6126 | return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, mode: ZSTD_cpm_unknown); |
6127 | } |
6128 | |