zstd_compress_internal.h source code [linux/lib/zstd/compress/zstd_compress_internal.h]

1	/ SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause /
2	/*
3	* Copyright (c) Meta Platforms, Inc. and affiliates.
4	* All rights reserved.
5	*
6	* This source code is licensed under both the BSD-style license (found in the
7	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
8	* in the COPYING file in the root directory of this source tree).
9	* You may select, at your option, one of the above-listed licenses.
10	*/
11
12	/ This header contains definitions*
13	* that shall only be used by modules within lib/compress.
14	*/
15
16	#ifndef ZSTD_COMPRESS_H
17	#define ZSTD_COMPRESS_H
18
19	/-************************************
20	* Dependencies
21	***************************************/
22	#include "../common/zstd_internal.h"
23	#include "zstd_cwksp.h"
24	#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */
25	#include "zstd_preSplit.h" /* ZSTD_SLIPBLOCK_WORKSPACESIZE */
26
27	/-************************************
28	* Constants
29	***************************************/
30	#define kSearchStrength 8
31	#define HASH_READ_SIZE 8
32	#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
33	It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
34	It's not a big deal though : candidate will just be sorted again.
35	Additionally, candidate position 1 will be lost.
36	But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
37	The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
38	This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
39
40
41	/-************************************
42	* Context memory management
43	***************************************/
44	typedef enum { ZSTDcs_created=`0`, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
45	typedef enum { zcss_init=`0`, zcss_load, zcss_flush } ZSTD_cStreamStage;
46
47	typedef struct ZSTD_prefixDict_s {
48	const void* dict;
49	size_t dictSize;
50	ZSTD_dictContentType_e dictContentType;
51	} ZSTD_prefixDict;
52
53	typedef struct {
54	void* dictBuffer;
55	void const* dict;
56	size_t dictSize;
57	ZSTD_dictContentType_e dictContentType;
58	ZSTD_CDict* cdict;
59	} ZSTD_localDict;
60
61	typedef struct {
62	HUF_CElt CTable[HUF_CTABLE_SIZE_ST(`255`)];
63	HUF_repeat repeatMode;
64	} ZSTD_hufCTables_t;
65
66	typedef struct {
67	FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
68	FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
69	FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
70	FSE_repeat offcode_repeatMode;
71	FSE_repeat matchlength_repeatMode;
72	FSE_repeat litlength_repeatMode;
73	} ZSTD_fseCTables_t;
74
75	typedef struct {
76	ZSTD_hufCTables_t huf;
77	ZSTD_fseCTables_t fse;
78	} ZSTD_entropyCTables_t;
79
80	/* *********************************************
81	* Sequences *
82	***********************************************/
83	typedef struct SeqDef_s {
84	U32 offBase; / offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 /
85	U16 litLength;
86	U16 mlBase; / mlBase == matchLength - MINMATCH /
87	} SeqDef;
88
89	/ Controls whether seqStore has a single "long" litLength or matchLength. See SeqStore_t. /
90	typedef enum {
91	ZSTD_llt_none = `0`, / no longLengthType /
92	ZSTD_llt_literalLength = `1`, / represents a long literal /
93	ZSTD_llt_matchLength = `2` / represents a long match /
94	} ZSTD_longLengthType_e;
95
96	typedef struct {
97	SeqDef* sequencesStart;
98	SeqDef* sequences; / ptr to end of sequences /
99	BYTE* litStart;
100	BYTE* lit; / ptr to end of literals /
101	BYTE* llCode;
102	BYTE* mlCode;
103	BYTE* ofCode;
104	size_t maxNbSeq;
105	size_t maxNbLit;
106
107	/ longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength*
108	* in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
109	* the existing value of the litLength or matchLength by 0x10000.
110	*/
111	ZSTD_longLengthType_e longLengthType;
112	U32 longLengthPos; / Index of the sequence to apply long length modification to /
113	} SeqStore_t;
114
115	typedef struct {
116	U32 litLength;
117	U32 matchLength;
118	} ZSTD_SequenceLength;
119
120	/*
121	* Returns the ZSTD_SequenceLength for the given sequences. It handles the decoding of long sequences
122	* indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
123	*/
124	MEM_STATIC ZSTD_SequenceLength ZSTD_getSequenceLength(SeqStore_t const* seqStore, SeqDef const* seq)
125	{
126	ZSTD_SequenceLength seqLen;
127	seqLen.litLength = seq->litLength;
128	seqLen.matchLength = seq->mlBase + MINMATCH;
129	if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
130	if (seqStore->longLengthType == ZSTD_llt_literalLength) {
131	seqLen.litLength += `0x10000`;
132	}
133	if (seqStore->longLengthType == ZSTD_llt_matchLength) {
134	seqLen.matchLength += `0x10000`;
135	}
136	}
137	return seqLen;
138	}
139
140	const SeqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); / compress & dictBuilder /
141	int ZSTD_seqToCodes(const SeqStore_t* seqStorePtr); / compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) /
142
143
144	/* *********************************************
145	* Entropy buffer statistics structs and funcs *
146	***********************************************/
147	/ ZSTD_hufCTablesMetadata_t :*
148	* Stores Literals Block Type for a super-block in hType, and
149	* huffman tree description in hufDesBuffer.
150	* hufDesSize refers to the size of huffman tree description in bytes.
151	* This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
152	typedef struct {
153	SymbolEncodingType_e hType;
154	BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
155	size_t hufDesSize;
156	} ZSTD_hufCTablesMetadata_t;
157
158	/ ZSTD_fseCTablesMetadata_t :*
159	* Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
160	* fse tables in fseTablesBuffer.
161	* fseTablesSize refers to the size of fse tables in bytes.
162	* This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
163	typedef struct {
164	SymbolEncodingType_e llType;
165	SymbolEncodingType_e ofType;
166	SymbolEncodingType_e mlType;
167	BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
168	size_t fseTablesSize;
169	size_t lastCountSize; / This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() /
170	} ZSTD_fseCTablesMetadata_t;
171
172	typedef struct {
173	ZSTD_hufCTablesMetadata_t hufMetadata;
174	ZSTD_fseCTablesMetadata_t fseMetadata;
175	} ZSTD_entropyCTablesMetadata_t;
176
177	/ ZSTD_buildBlockEntropyStats() :*
178	* Builds entropy for the block.
179	* @return : 0 on success or error code */
180	size_t ZSTD_buildBlockEntropyStats(
181	const SeqStore_t* seqStorePtr,
182	const ZSTD_entropyCTables_t* prevEntropy,
183	ZSTD_entropyCTables_t* nextEntropy,
184	const ZSTD_CCtx_params* cctxParams,
185	ZSTD_entropyCTablesMetadata_t* entropyMetadata,
186	void* workspace, size_t wkspSize);
187
188	/* *******************************
189	* Compression internals structs *
190	*********************************/
191
192	typedef struct {
193	U32 off; / Offset sumtype code for the match, using ZSTD_storeSeq() format /
194	U32 len; / Raw length of match /
195	} ZSTD_match_t;
196
197	typedef struct {
198	U32 offset; / Offset of sequence /
199	U32 litLength; / Length of literals prior to match /
200	U32 matchLength; / Raw length of match /
201	} rawSeq;
202
203	typedef struct {
204	rawSeq* seq; / The start of the sequences /
205	size_t pos; / The index in seq where reading stopped. pos <= size. /
206	size_t posInSequence; / The position within the sequence at seq[pos] where reading*
207	stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength /*
208	size_t size; / The number of sequences. <= capacity. /
209	size_t capacity; / The capacity starting from `seq` pointer /
210	} RawSeqStore_t;
211
212	UNUSED_ATTR static const RawSeqStore_t kNullRawSeqStore = {NULL, `0`, `0`, `0`, `0`};
213
214	typedef struct {
215	int price; / price from beginning of segment to this position /
216	U32 off; / offset of previous match /
217	U32 mlen; / length of previous match /
218	U32 litlen; / nb of literals since previous match /
219	U32 rep[ZSTD_REP_NUM]; / offset history after previous match /
220	} ZSTD_optimal_t;
221
222	typedef enum { zop_dynamic=`0`, zop_predef } ZSTD_OptPrice_e;
223
224	#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
225	typedef struct {
226	/ All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() /
227	unsigned* litFreq; / table of literals statistics, of size 256 /
228	unsigned* litLengthFreq; / table of litLength statistics, of size (MaxLL+1) /
229	unsigned* matchLengthFreq; / table of matchLength statistics, of size (MaxML+1) /
230	unsigned* offCodeFreq; / table of offCode statistics, of size (MaxOff+1) /
231	ZSTD_match_t* matchTable; / list of found matches, of size ZSTD_OPT_SIZE /
232	ZSTD_optimal_t* priceTable; / All positions tracked by optimal parser, of size ZSTD_OPT_SIZE /
233
234	U32 litSum; / nb of literals /
235	U32 litLengthSum; / nb of litLength codes /
236	U32 matchLengthSum; / nb of matchLength codes /
237	U32 offCodeSum; / nb of offset codes /
238	U32 litSumBasePrice; / to compare to log2(litfreq) /
239	U32 litLengthSumBasePrice; / to compare to log2(llfreq) /
240	U32 matchLengthSumBasePrice;/ to compare to log2(mlfreq) /
241	U32 offCodeSumBasePrice; / to compare to log2(offreq) /
242	ZSTD_OptPrice_e priceType; / prices can be determined dynamically, or follow a pre-defined cost structure /
243	const ZSTD_entropyCTables_t* symbolCosts; / pre-calculated dictionary statistics /
244	ZSTD_ParamSwitch_e literalCompressionMode;
245	} optState_t;
246
247	typedef struct {
248	ZSTD_entropyCTables_t entropy;
249	U32 rep[ZSTD_REP_NUM];
250	} ZSTD_compressedBlockState_t;
251
252	typedef struct {
253	BYTE const* nextSrc; / next block here to continue on current prefix /
254	BYTE const* base; / All regular indexes relative to this position /
255	BYTE const* dictBase; / extDict indexes relative to this position /
256	U32 dictLimit; / below that point, need extDict /
257	U32 lowLimit; / below that point, no more valid data /
258	U32 nbOverflowCorrections; / Number of times overflow correction has run since*
259	* ZSTD_window_init(). Useful for debugging coredumps
260	* and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
261	*/
262	} ZSTD_window_t;
263
264	#define ZSTD_WINDOW_START_INDEX 2
265
266	typedef struct ZSTD_MatchState_t ZSTD_MatchState_t;
267
268	#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */
269
270	struct ZSTD_MatchState_t {
271	ZSTD_window_t window; / State for window round buffer management /
272	U32 loadedDictEnd; / index of end of dictionary, within context's referential.*
273	* When loadedDictEnd != 0, a dictionary is in use, and still valid.
274	* This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
275	* Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
276	* When dict referential is copied into active context (i.e. not attached),
277	* loadedDictEnd == dictSize, since referential starts from zero.
278	*/
279	U32 nextToUpdate; / index from which to continue table update /
280	U32 hashLog3; / dispatch table for matches of len==3 : larger == faster, more memory /
281
282	U32 rowHashLog; / For row-based matchfinder: Hashlog based on nb of rows in the hashTable./
283	BYTE* tagTable; / For row-based matchFinder: A row-based table containing the hashes and head index. /
284	U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; / For row-based matchFinder: a cache of hashes to improve speed /
285	U64 hashSalt; / For row-based matchFinder: salts the hash for reuse of tag table /
286	U32 hashSaltEntropy; / For row-based matchFinder: collects entropy for salt generation /
287
288	U32* hashTable;
289	U32* hashTable3;
290	U32* chainTable;
291
292	int forceNonContiguous; / Non-zero if we should force non-contiguous load for the next window update. /
293
294	int dedicatedDictSearch; / Indicates whether this matchState is using the*
295	* dedicated dictionary search structure.
296	*/
297	optState_t opt; / optimal parser state /
298	const ZSTD_MatchState_t* dictMatchState;
299	ZSTD_compressionParameters cParams;
300	const RawSeqStore_t* ldmSeqStore;
301
302	/ Controls prefetching in some dictMatchState matchfinders.*
303	* This behavior is controlled from the cctx ms.
304	* This parameter has no effect in the cdict ms. */
305	int prefetchCDictTables;
306
307	/ When == 0, lazy match finders insert every position.*
308	* When != 0, lazy match finders only insert positions they search.
309	* This allows them to skip much faster over incompressible data,
310	* at a small cost to compression ratio.
311	*/
312	int lazySkipping;
313	};
314
315	typedef struct {
316	ZSTD_compressedBlockState_t* prevCBlock;
317	ZSTD_compressedBlockState_t* nextCBlock;
318	ZSTD_MatchState_t matchState;
319	} ZSTD_blockState_t;
320
321	typedef struct {
322	U32 offset;
323	U32 checksum;
324	} ldmEntry_t;
325
326	typedef struct {
327	BYTE const* split;
328	U32 hash;
329	U32 checksum;
330	ldmEntry_t* bucket;
331	} ldmMatchCandidate_t;
332
333	#define LDM_BATCH_SIZE 64
334
335	typedef struct {
336	ZSTD_window_t window; / State for the window round buffer management /
337	ldmEntry_t* hashTable;
338	U32 loadedDictEnd;
339	BYTE* bucketOffsets; / Next position in bucket to insert entry /
340	size_t splitIndices[LDM_BATCH_SIZE];
341	ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
342	} ldmState_t;
343
344	typedef struct {
345	ZSTD_ParamSwitch_e enableLdm; / ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default /
346	U32 hashLog; / Log size of hashTable /
347	U32 bucketSizeLog; / Log bucket size for collision resolution, at most 8 /
348	U32 minMatchLength; / Minimum match length /
349	U32 hashRateLog; / Log number of entries to skip /
350	U32 windowLog; / Window log for the LDM /
351	} ldmParams_t;
352
353	typedef struct {
354	int collectSequences;
355	ZSTD_Sequence* seqStart;
356	size_t seqIndex;
357	size_t maxSequences;
358	} SeqCollector;
359
360	struct ZSTD_CCtx_params_s {
361	ZSTD_format_e format;
362	ZSTD_compressionParameters cParams;
363	ZSTD_frameParameters fParams;
364
365	int compressionLevel;
366	int forceWindow; / force back-references to respect limit of*
367	* 1<<wLog, even for dictionary */
368	size_t targetCBlockSize; / Tries to fit compressed block size to be around targetCBlockSize.*
369	* No target when targetCBlockSize == 0.
370	* There is no guarantee on compressed block size */
371	int srcSizeHint; / User's best guess of source size.*
372	* Hint is not valid when srcSizeHint == 0.
373	* There is no guarantee that hint is close to actual source size */
374
375	ZSTD_dictAttachPref_e attachDictPref;
376	ZSTD_ParamSwitch_e literalCompressionMode;
377
378	/ Multithreading: used to pass parameters to mtctx /
379	int nbWorkers;
380	size_t jobSize;
381	int overlapLog;
382	int rsyncable;
383
384	/ Long distance matching parameters /
385	ldmParams_t ldmParams;
386
387	/ Dedicated dict search algorithm trigger /
388	int enableDedicatedDictSearch;
389
390	/ Input/output buffer modes /
391	ZSTD_bufferMode_e inBufferMode;
392	ZSTD_bufferMode_e outBufferMode;
393
394	/ Sequence compression API /
395	ZSTD_SequenceFormat_e blockDelimiters;
396	int validateSequences;
397
398	/ Block splitting*
399	* @postBlockSplitter executes split analysis after sequences are produced,
400	* it's more accurate but consumes more resources.
401	* @preBlockSplitter_level splits before knowing sequences,
402	* it's more approximative but also cheaper.
403	* Valid @preBlockSplitter_level values range from 0 to 6 (included).
404	* 0 means auto, 1 means do not split,
405	* then levels are sorted in increasing cpu budget, from 2 (fastest) to 6 (slowest).
406	* Highest @preBlockSplitter_level combines well with @postBlockSplitter.
407	*/
408	ZSTD_ParamSwitch_e postBlockSplitter;
409	int preBlockSplitter_level;
410
411	/ Adjust the max block size/
412	size_t maxBlockSize;
413
414	/ Param for deciding whether to use row-based matchfinder /
415	ZSTD_ParamSwitch_e useRowMatchFinder;
416
417	/ Always load a dictionary in ext-dict mode (not prefix mode)? /
418	int deterministicRefPrefix;
419
420	/ Internal use, for createCCtxParams() and freeCCtxParams() only /
421	ZSTD_customMem customMem;
422
423	/ Controls prefetching in some dictMatchState matchfinders /
424	ZSTD_ParamSwitch_e prefetchCDictTables;
425
426	/ Controls whether zstd will fall back to an internal matchfinder*
427	* if the external matchfinder returns an error code. */
428	int enableMatchFinderFallback;
429
430	/ Parameters for the external sequence producer API.*
431	* Users set these parameters through ZSTD_registerSequenceProducer().
432	* It is not possible to set these parameters individually through the public API. */
433	void* extSeqProdState;
434	ZSTD_sequenceProducer_F extSeqProdFunc;
435
436	/ Controls repcode search in external sequence parsing /
437	ZSTD_ParamSwitch_e searchForExternalRepcodes;
438	}; / typedef'd to ZSTD_CCtx_params within "zstd.h" /
439
440	#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
441	#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
442	#define TMP_WORKSPACE_SIZE (MAX(ENTROPY_WORKSPACE_SIZE, ZSTD_SLIPBLOCK_WORKSPACESIZE))
443
444	/*
445	* Indicates whether this compression proceeds directly from user-provided
446	* source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
447	* whether the context needs to buffer the input/output (ZSTDb_buffered).
448	*/
449	typedef enum {
450	ZSTDb_not_buffered,
451	ZSTDb_buffered
452	} ZSTD_buffered_policy_e;
453
454	/*
455	* Struct that contains all elements of block splitter that should be allocated
456	* in a wksp.
457	*/
458	#define ZSTD_MAX_NB_BLOCK_SPLITS 196
459	typedef struct {
460	SeqStore_t fullSeqStoreChunk;
461	SeqStore_t firstHalfSeqStore;
462	SeqStore_t secondHalfSeqStore;
463	SeqStore_t currSeqStore;
464	SeqStore_t nextSeqStore;
465
466	U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
467	ZSTD_entropyCTablesMetadata_t entropyMetadata;
468	} ZSTD_blockSplitCtx;
469
470	struct ZSTD_CCtx_s {
471	ZSTD_compressionStage_e stage;
472	int cParamsChanged; / == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. /
473	int bmi2; / == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. /
474	ZSTD_CCtx_params requestedParams;
475	ZSTD_CCtx_params appliedParams;
476	ZSTD_CCtx_params simpleApiParams; / Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. /
477	U32 dictID;
478	size_t dictContentSize;
479
480	ZSTD_cwksp workspace; / manages buffer for dynamic allocations /
481	size_t blockSizeMax;
482	unsigned long long pledgedSrcSizePlusOne; / this way, 0 (default) == unknown /
483	unsigned long long consumedSrcSize;
484	unsigned long long producedCSize;
485	struct xxh64_state xxhState;
486	ZSTD_customMem customMem;
487	ZSTD_threadPool* pool;
488	size_t staticSize;
489	SeqCollector seqCollector;
490	int isFirstBlock;
491	int initialized;
492
493	SeqStore_t seqStore; / sequences storage ptrs /
494	ldmState_t ldmState; / long distance matching state /
495	rawSeq* ldmSequences; / Storage for the ldm output sequences /
496	size_t maxNbLdmSequences;
497	RawSeqStore_t externSeqStore; / Mutable reference to external sequences /
498	ZSTD_blockState_t blockState;
499	void* tmpWorkspace; / used as substitute of stack space - must be aligned for S64 type /
500	size_t tmpWkspSize;
501
502	/ Whether we are streaming or not /
503	ZSTD_buffered_policy_e bufferedPolicy;
504
505	/ streaming /
506	char* inBuff;
507	size_t inBuffSize;
508	size_t inToCompress;
509	size_t inBuffPos;
510	size_t inBuffTarget;
511	char* outBuff;
512	size_t outBuffSize;
513	size_t outBuffContentSize;
514	size_t outBuffFlushedSize;
515	ZSTD_cStreamStage streamStage;
516	U32 frameEnded;
517
518	/ Stable in/out buffer verification /
519	ZSTD_inBuffer expectedInBuffer;
520	size_t stableIn_notConsumed; / nb bytes within stable input buffer that are said to be consumed but are not /
521	size_t expectedOutBufferSize;
522
523	/ Dictionary /
524	ZSTD_localDict localDict;
525	const ZSTD_CDict* cdict;
526	ZSTD_prefixDict prefixDict; / single-usage dictionary /
527
528	/ Multi-threading /
529
530	/ Tracing /
531
532	/ Workspace for block splitter /
533	ZSTD_blockSplitCtx blockSplitCtx;
534
535	/ Buffer for output from external sequence producer /
536	ZSTD_Sequence* extSeqBuf;
537	size_t extSeqBufCapacity;
538	};
539
540	typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
541	typedef enum { ZSTD_tfp_forCCtx, ZSTD_tfp_forCDict } ZSTD_tableFillPurpose_e;
542
543	typedef enum {
544	ZSTD_noDict = `0`,
545	ZSTD_extDict = `1`,
546	ZSTD_dictMatchState = `2`,
547	ZSTD_dedicatedDictSearch = `3`
548	} ZSTD_dictMode_e;
549
550	typedef enum {
551	ZSTD_cpm_noAttachDict = `0`, / Compression with ZSTD_noDict or ZSTD_extDict.*
552	* In this mode we use both the srcSize and the dictSize
553	* when selecting and adjusting parameters.
554	*/
555	ZSTD_cpm_attachDict = `1`, / Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.*
556	* In this mode we only take the srcSize into account when selecting
557	* and adjusting parameters.
558	*/
559	ZSTD_cpm_createCDict = `2`, / Creating a CDict.*
560	* In this mode we take both the source size and the dictionary size
561	* into account when selecting and adjusting the parameters.
562	*/
563	ZSTD_cpm_unknown = `3` / ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.*
564	* We don't know what these parameters are for. We default to the legacy
565	* behavior of taking both the source size and the dict size into account
566	* when selecting and adjusting parameters.
567	*/
568	} ZSTD_CParamMode_e;
569
570	typedef size_t (*ZSTD_BlockCompressor_f) (
571	ZSTD_MatchState_t* bs, SeqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
572	void const* src, size_t srcSize);
573	ZSTD_BlockCompressor_f ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_ParamSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
574
575
576	MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
577	{
578	static const BYTE LL_Code[`64`] = { `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`,
579	`8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
580	`16`, `16`, `17`, `17`, `18`, `18`, `19`, `19`,
581	`20`, `20`, `20`, `20`, `21`, `21`, `21`, `21`,
582	`22`, `22`, `22`, `22`, `22`, `22`, `22`, `22`,
583	`23`, `23`, `23`, `23`, `23`, `23`, `23`, `23`,
584	`24`, `24`, `24`, `24`, `24`, `24`, `24`, `24`,
585	`24`, `24`, `24`, `24`, `24`, `24`, `24`, `24` };
586	static const U32 LL_deltaCode = `19`;
587	return (litLength > `63`) ? ZSTD_highbit32(val: litLength) + LL_deltaCode : LL_Code[litLength];
588	}
589
590	/ ZSTD_MLcode() :*
591	* note : mlBase = matchLength - MINMATCH;
592	* because it's the format it's stored in seqStore->sequences */
593	MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
594	{
595	static const BYTE ML_Code[`128`] = { `0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `10`, `11`, `12`, `13`, `14`, `15`,
596	`16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`,
597	`32`, `32`, `33`, `33`, `34`, `34`, `35`, `35`, `36`, `36`, `36`, `36`, `37`, `37`, `37`, `37`,
598	`38`, `38`, `38`, `38`, `38`, `38`, `38`, `38`, `39`, `39`, `39`, `39`, `39`, `39`, `39`, `39`,
599	`40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`, `40`,
600	`41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`, `41`,
601	`42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`,
602	`42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42`, `42` };
603	static const U32 ML_deltaCode = `36`;
604	return (mlBase > `127`) ? ZSTD_highbit32(val: mlBase) + ML_deltaCode : ML_Code[mlBase];
605	}
606
607	/ ZSTD_cParam_withinBounds:*
608	* @return 1 if value is within cParam bounds,
609	* 0 otherwise */
610	MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
611	{
612	ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
613	if (ZSTD_isError(code: bounds.error)) return `0`;
614	if (value < bounds.lowerBound) return `0`;
615	if (value > bounds.upperBound) return `0`;
616	return `1`;
617	}
618
619	/ ZSTD_selectAddr:*
620	* @return index >= lowLimit ? candidate : backup,
621	* tries to force branchless codegen. */
622	MEM_STATIC const BYTE*
623	ZSTD_selectAddr(U32 index, U32 lowLimit, const BYTE* candidate, const BYTE* backup)
624	{
625	#if defined(__x86_64__)
626	__asm__ (
627	"cmp %1, %2\n"
628	"cmova %3, %0\n"
629	: "+r"(candidate)
630	: "r"(index), "r"(lowLimit), "r"(backup)
631	);
632	return candidate;
633	#else
634	return index >= lowLimit ? candidate : backup;
635	#endif
636	}
637
638	/ ZSTD_noCompressBlock() :*
639	* Writes uncompressed block to dst buffer from given src.
640	* Returns the size of the block */
641	MEM_STATIC size_t
642	ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
643	{
644	U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<`1`) + (U32)(srcSize << `3`);
645	DEBUGLOG(`5`, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity);
646	RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
647	dstSize_tooSmall, "dst buf too small for uncompressed block");
648	MEM_writeLE24(memPtr: dst, val: cBlockHeader24);
649	ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
650	return ZSTD_blockHeaderSize + srcSize;
651	}
652
653	MEM_STATIC size_t
654	ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
655	{
656	BYTE* const op = (BYTE*)dst;
657	U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<`1`) + (U32)(srcSize << `3`);
658	RETURN_ERROR_IF(dstCapacity < `4`, dstSize_tooSmall, "");
659	MEM_writeLE24(memPtr: op, val: cBlockHeader);
660	op[`3`] = src;
661	return `4`;
662	}
663
664
665	/ ZSTD_minGain() :*
666	* minimum compression required
667	* to generate a compress block or a compressed literals section.
668	* note : use same formula for both situations */
669	MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
670	{
671	U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - `1` : `6`;
672	ZSTD_STATIC_ASSERT(ZSTD_btultra == `8`);
673	assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat));
674	return (srcSize >> minlog) + `2`;
675	}
676
677	MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
678	{
679	switch (cctxParams->literalCompressionMode) {
680	case ZSTD_ps_enable:
681	return `0`;
682	case ZSTD_ps_disable:
683	return `1`;
684	default:
685	assert(`0` / impossible: pre-validated /);
686	ZSTD_FALLTHROUGH;
687	case ZSTD_ps_auto:
688	return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > `0`);
689	}
690	}
691
692	/! ZSTD_safecopyLiterals() :*
693	* memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
694	* Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
695	* large copies.
696	*/
697	static void
698	ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
699	{
700	assert(iend > ilimit_w);
701	if (ip <= ilimit_w) {
702	ZSTD_wildcopy(dst: op, src: ip, length: ilimit_w - ip, ovtype: ZSTD_no_overlap);
703	op += ilimit_w - ip;
704	ip = ilimit_w;
705	}
706	while (ip < iend) op++ = ip++;
707	}
708
709
710	#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1)
711	#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2)
712	#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3)
713	#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */
714	#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM)
715	#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM)
716	#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM)
717	#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM)
718	#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */
719
720	/! ZSTD_storeSeqOnly() :*
721	* Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
722	* Literals themselves are not copied, but @litPtr is updated.
723	* @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
724	* @matchLength : must be >= MINMATCH
725	*/
726	HINT_INLINE UNUSED_ATTR void
727	ZSTD_storeSeqOnly(SeqStore_t* seqStorePtr,
728	size_t litLength,
729	U32 offBase,
730	size_t matchLength)
731	{
732	assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
733
734	/ literal Length /
735	assert(litLength <= ZSTD_BLOCKSIZE_MAX);
736	if (UNLIKELY(litLength>`0xFFFF`)) {
737	assert(seqStorePtr->longLengthType == ZSTD_llt_none); / there can only be a single long length /
738	seqStorePtr->longLengthType = ZSTD_llt_literalLength;
739	seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
740	}
741	seqStorePtr->sequences[`0`].litLength = (U16)litLength;
742
743	/ match offset /
744	seqStorePtr->sequences[`0`].offBase = offBase;
745
746	/ match Length /
747	assert(matchLength <= ZSTD_BLOCKSIZE_MAX);
748	assert(matchLength >= MINMATCH);
749	{ size_t const mlBase = matchLength - MINMATCH;
750	if (UNLIKELY(mlBase>`0xFFFF`)) {
751	assert(seqStorePtr->longLengthType == ZSTD_llt_none); / there can only be a single long length /
752	seqStorePtr->longLengthType = ZSTD_llt_matchLength;
753	seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
754	}
755	seqStorePtr->sequences[`0`].mlBase = (U16)mlBase;
756	}
757
758	seqStorePtr->sequences++;
759	}
760
761	/! ZSTD_storeSeq() :*
762	* Store a sequence (litlen, litPtr, offBase and matchLength) into SeqStore_t.
763	* @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE().
764	* @matchLength : must be >= MINMATCH
765	* Allowed to over-read literals up to litLimit.
766	*/
767	HINT_INLINE UNUSED_ATTR void
768	ZSTD_storeSeq(SeqStore_t* seqStorePtr,
769	size_t litLength, const BYTE* literals, const BYTE* litLimit,
770	U32 offBase,
771	size_t matchLength)
772	{
773	BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
774	BYTE const* const litEnd = literals + litLength;
775	#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
776	static const BYTE* g_start = NULL;
777	if (g_start==NULL) g_start = (const BYTE)literals; /* note : index only works for compression within a single segment /
778	{ U32 const pos = (U32)((const BYTE*)literals - g_start);
779	DEBUGLOG(`6`, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u",
780	pos, (U32)litLength, (U32)matchLength, (U32)offBase);
781	}
782	#endif
783	assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
784	/ copy Literals /
785	assert(seqStorePtr->maxNbLit <= `128` KB);
786	assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
787	assert(literals + litLength <= litLimit);
788	if (litEnd <= litLimit_w) {
789	/ Common case we can use wildcopy.*
790	* First copy 16 bytes, because literals are likely short.
791	*/
792	ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= `16`);
793	ZSTD_copy16(dst: seqStorePtr->lit, src: literals);
794	if (litLength > `16`) {
795	ZSTD_wildcopy(dst: seqStorePtr->lit+`16`, src: literals+`16`, length: (ptrdiff_t)litLength-`16`, ovtype: ZSTD_no_overlap);
796	}
797	} else {
798	ZSTD_safecopyLiterals(op: seqStorePtr->lit, ip: literals, iend: litEnd, ilimit_w: litLimit_w);
799	}
800	seqStorePtr->lit += litLength;
801
802	ZSTD_storeSeqOnly(seqStorePtr, litLength, offBase, matchLength);
803	}
804
805	/ ZSTD_updateRep() :*
806	* updates in-place @rep (array of repeat offsets)
807	* @offBase : sum-type, using numeric representation of ZSTD_storeSeq()
808	*/
809	MEM_STATIC void
810	ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
811	{
812	if (OFFBASE_IS_OFFSET(offBase)) { / full offset /
813	rep[`2`] = rep[`1`];
814	rep[`1`] = rep[`0`];
815	rep[`0`] = OFFBASE_TO_OFFSET(offBase);
816	} else { / repcode /
817	U32 const repCode = OFFBASE_TO_REPCODE(offBase) - `1` + ll0;
818	if (repCode > `0`) { / note : if repCode==0, no change /
819	U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[`0`] - `1`) : rep[repCode];
820	rep[`2`] = (repCode >= `2`) ? rep[`1`] : rep[`2`];
821	rep[`1`] = rep[`0`];
822	rep[`0`] = currentOffset;
823	} else { / repCode == 0 /
824	/ nothing to do /
825	}
826	}
827	}
828
829	typedef struct repcodes_s {
830	U32 rep[`3`];
831	} Repcodes_t;
832
833	MEM_STATIC Repcodes_t
834	ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0)
835	{
836	Repcodes_t newReps;
837	ZSTD_memcpy(&newReps, rep, sizeof(newReps));
838	ZSTD_updateRep(rep: newReps.rep, offBase, ll0);
839	return newReps;
840	}
841
842
843	/-************************************
844	* Match length counter
845	***************************************/
846	MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
847	{
848	const BYTE* const pStart = pIn;
849	const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-`1`);
850
851	if (pIn < pInLoopLimit) {
852	{ size_t const diff = MEM_readST(memPtr: pMatch) ^ MEM_readST(memPtr: pIn);
853	if (diff) return ZSTD_NbCommonBytes(val: diff); }
854	pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
855	while (pIn < pInLoopLimit) {
856	size_t const diff = MEM_readST(memPtr: pMatch) ^ MEM_readST(memPtr: pIn);
857	if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
858	pIn += ZSTD_NbCommonBytes(val: diff);
859	return (size_t)(pIn - pStart);
860	} }
861	if (MEM_64bits() && (pIn<(pInLimit-`3`)) && (MEM_read32(memPtr: pMatch) == MEM_read32(memPtr: pIn))) { pIn+=`4`; pMatch+=`4`; }
862	if ((pIn<(pInLimit-`1`)) && (MEM_read16(memPtr: pMatch) == MEM_read16(memPtr: pIn))) { pIn+=`2`; pMatch+=`2`; }
863	if ((pIn<pInLimit) && (pMatch == pIn)) pIn++;
864	return (size_t)(pIn - pStart);
865	}
866
867	/ ZSTD_count_2segments() :*
868	* can count match length with `ip` & `match` in 2 different segments.
869	* convention : on reaching mEnd, match count continue starting from iStart
870	*/
871	MEM_STATIC size_t
872	ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
873	const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
874	{
875	const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
876	size_t const matchLength = ZSTD_count(pIn: ip, pMatch: match, pInLimit: vEnd);
877	if (match + matchLength != mEnd) return matchLength;
878	DEBUGLOG(`7`, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
879	DEBUGLOG(`7`, "distance from match beginning to end dictionary = %i", (int)(mEnd - match));
880	DEBUGLOG(`7`, "distance from current pos to end buffer = %i", (int)(iEnd - ip));
881	DEBUGLOG(`7`, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
882	DEBUGLOG(`7`, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
883	return matchLength + ZSTD_count(pIn: ip+matchLength, pMatch: iStart, pInLimit: iEnd);
884	}
885
886
887	/-************************************
888	* Hashes
889	***************************************/
890	static const U32 prime3bytes = `506832829U`;
891	static U32 ZSTD_hash3(U32 u, U32 h, U32 s) { assert(h <= `32`); return (((u << (`32`-`24`)) * prime3bytes) ^ s) >> (`32`-h) ; }
892	MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(u: MEM_readLE32(memPtr: ptr), h, s: `0`); } / only in zstd_opt.h /
893	MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(u: MEM_readLE32(memPtr: ptr), h, s); }
894
895	static const U32 prime4bytes = `2654435761U`;
896	static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= `32`); return ((u * prime4bytes) ^ s) >> (`32`-h) ; }
897	static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(u: MEM_readLE32(memPtr: ptr), h, s: `0`); }
898	static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(u: MEM_readLE32(memPtr: ptr), h, s); }
899
900	static const U64 prime5bytes = `889523592379ULL`;
901	static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= `64`); return (size_t)((((u << (`64`-`40`)) * prime5bytes) ^ s) >> (`64`-h)) ; }
902	static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(u: MEM_readLE64(memPtr: p), h, s: `0`); }
903	static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(u: MEM_readLE64(memPtr: p), h, s); }
904
905	static const U64 prime6bytes = `227718039650203ULL`;
906	static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= `64`); return (size_t)((((u << (`64`-`48`)) * prime6bytes) ^ s) >> (`64`-h)) ; }
907	static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(u: MEM_readLE64(memPtr: p), h, s: `0`); }
908	static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(u: MEM_readLE64(memPtr: p), h, s); }
909
910	static const U64 prime7bytes = `58295818150454627ULL`;
911	static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= `64`); return (size_t)((((u << (`64`-`56`)) * prime7bytes) ^ s) >> (`64`-h)) ; }
912	static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(u: MEM_readLE64(memPtr: p), h, s: `0`); }
913	static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(u: MEM_readLE64(memPtr: p), h, s); }
914
915	static const U64 prime8bytes = `0xCF1BBCDCB7A56463ULL`;
916	static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= `64`); return (size_t)((((u) * prime8bytes) ^ s) >> (`64`-h)) ; }
917	static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(u: MEM_readLE64(memPtr: p), h, s: `0`); }
918	static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(u: MEM_readLE64(memPtr: p), h, s); }
919
920
921	MEM_STATIC FORCE_INLINE_ATTR
922	size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
923	{
924	/ Although some of these hashes do support hBits up to 64, some do not.*
925	* To be on the safe side, always avoid hBits > 32. */
926	assert(hBits <= `32`);
927
928	switch(mls)
929	{
930	default:
931	case `4`: return ZSTD_hash4Ptr(ptr: p, h: hBits);
932	case `5`: return ZSTD_hash5Ptr(p, h: hBits);
933	case `6`: return ZSTD_hash6Ptr(p, h: hBits);
934	case `7`: return ZSTD_hash7Ptr(p, h: hBits);
935	case `8`: return ZSTD_hash8Ptr(p, h: hBits);
936	}
937	}
938
939	MEM_STATIC FORCE_INLINE_ATTR
940	size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) {
941	/ Although some of these hashes do support hBits up to 64, some do not.*
942	* To be on the safe side, always avoid hBits > 32. */
943	assert(hBits <= `32`);
944
945	switch(mls)
946	{
947	default:
948	case `4`: return ZSTD_hash4PtrS(ptr: p, h: hBits, s: (U32)hashSalt);
949	case `5`: return ZSTD_hash5PtrS(p, h: hBits, s: hashSalt);
950	case `6`: return ZSTD_hash6PtrS(p, h: hBits, s: hashSalt);
951	case `7`: return ZSTD_hash7PtrS(p, h: hBits, s: hashSalt);
952	case `8`: return ZSTD_hash8PtrS(p, h: hBits, s: hashSalt);
953	}
954	}
955
956
957	/ ZSTD_ipow() :*
958	* Return base^exponent.
959	*/
960	static U64 ZSTD_ipow(U64 base, U64 exponent)
961	{
962	U64 power = `1`;
963	while (exponent) {
964	if (exponent & `1`) power *= base;
965	exponent >>= `1`;
966	base *= base;
967	}
968	return power;
969	}
970
971	#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
972
973	/ ZSTD_rollingHash_append() :*
974	* Add the buffer to the hash value.
975	*/
976	static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
977	{
978	BYTE const* istart = (BYTE const*)buf;
979	size_t pos;
980	for (pos = `0`; pos < size; ++pos) {
981	hash *= prime8bytes;
982	hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
983	}
984	return hash;
985	}
986
987	/ ZSTD_rollingHash_compute() :*
988	* Compute the rolling hash value of the buffer.
989	*/
990	MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
991	{
992	return ZSTD_rollingHash_append(hash: `0`, buf, size);
993	}
994
995	/ ZSTD_rollingHash_primePower() :*
996	* Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
997	* over a window of length bytes.
998	*/
999	MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
1000	{
1001	return ZSTD_ipow(base: prime8bytes, exponent: length - `1`);
1002	}
1003
1004	/ ZSTD_rollingHash_rotate() :*
1005	* Rotate the rolling hash by one byte.
1006	*/
1007	MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
1008	{
1009	hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
1010	hash *= prime8bytes;
1011	hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
1012	return hash;
1013	}
1014
1015	/-************************************
1016	* Round buffer management
1017	***************************************/
1018	/ Max @current value allowed:*
1019	* In 32-bit mode: we want to avoid crossing the 2 GB limit,
1020	* reducing risks of side effects in case of signed operations on indexes.
1021	* In 64-bit mode: we want to ensure that adding the maximum job size (512 MB)
1022	* doesn't overflow U32 index capacity (4 GB) */
1023	#define ZSTD_CURRENT_MAX (MEM_64bits() ? 3500U MB : 2000U MB)
1024	/ Maximum chunk size before overflow correction needs to be called again /
1025	#define ZSTD_CHUNKSIZE_MAX \
1026	( ((U32)-1) /* Maximum ending current index */ \
1027	- ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
1028
1029	/*
1030	* ZSTD_window_clear():
1031	* Clears the window containing the history by simply setting it to empty.
1032	*/
1033	MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
1034	{
1035	size_t const endT = (size_t)(window->nextSrc - window->base);
1036	U32 const end = (U32)endT;
1037
1038	window->lowLimit = end;
1039	window->dictLimit = end;
1040	}
1041
1042	MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
1043	{
1044	return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
1045	window.lowLimit == ZSTD_WINDOW_START_INDEX &&
1046	(window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
1047	}
1048
1049	/*
1050	* ZSTD_window_hasExtDict():
1051	* Returns non-zero if the window has a non-empty extDict.
1052	*/
1053	MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
1054	{
1055	return window.lowLimit < window.dictLimit;
1056	}
1057
1058	/*
1059	* ZSTD_matchState_dictMode():
1060	* Inspects the provided matchState and figures out what dictMode should be
1061	* passed to the compressor.
1062	*/
1063	MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_MatchState_t *ms)
1064	{
1065	return ZSTD_window_hasExtDict(window: ms->window) ?
1066	ZSTD_extDict :
1067	ms->dictMatchState != NULL ?
1068	(ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
1069	ZSTD_noDict;
1070	}
1071
1072	/ Defining this macro to non-zero tells zstd to run the overflow correction*
1073	* code much more frequently. This is very inefficient, and should only be
1074	* used for tests and fuzzers.
1075	*/
1076	#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
1077	# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1078	# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
1079	# else
1080	# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
1081	# endif
1082	#endif
1083
1084	/*
1085	* ZSTD_window_canOverflowCorrect():
1086	* Returns non-zero if the indices are large enough for overflow correction
1087	* to work correctly without impacting compression ratio.
1088	*/
1089	MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
1090	U32 cycleLog,
1091	U32 maxDist,
1092	U32 loadedDictEnd,
1093	void const* src)
1094	{
1095	U32 const cycleSize = `1u` << cycleLog;
1096	U32 const curr = (U32)((BYTE const*)src - window.base);
1097	U32 const minIndexToOverflowCorrect = cycleSize
1098	+ MAX(maxDist, cycleSize)
1099	+ ZSTD_WINDOW_START_INDEX;
1100
1101	/ Adjust the min index to backoff the overflow correction frequency,*
1102	* so we don't waste too much CPU in overflow correction. If this
1103	* computation overflows we don't really care, we just need to make
1104	* sure it is at least minIndexToOverflowCorrect.
1105	*/
1106	U32 const adjustment = window.nbOverflowCorrections + `1`;
1107	U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
1108	minIndexToOverflowCorrect);
1109	U32 const indexLargeEnough = curr > adjustedIndex;
1110
1111	/ Only overflow correct early if the dictionary is invalidated already,*
1112	* so we don't hurt compression ratio.
1113	*/
1114	U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
1115
1116	return indexLargeEnough && dictionaryInvalidated;
1117	}
1118
1119	/*
1120	* ZSTD_window_needOverflowCorrection():
1121	* Returns non-zero if the indices are getting too large and need overflow
1122	* protection.
1123	*/
1124	MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
1125	U32 cycleLog,
1126	U32 maxDist,
1127	U32 loadedDictEnd,
1128	void const* src,
1129	void const* srcEnd)
1130	{
1131	U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
1132	if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1133	if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
1134	return `1`;
1135	}
1136	}
1137	return curr > ZSTD_CURRENT_MAX;
1138	}
1139
1140	/*
1141	* ZSTD_window_correctOverflow():
1142	* Reduces the indices to protect from index overflow.
1143	* Returns the correction made to the indices, which must be applied to every
1144	* stored index.
1145	*
1146	* The least significant cycleLog bits of the indices must remain the same,
1147	* which may be 0. Every index up to maxDist in the past must be valid.
1148	*/
1149	MEM_STATIC
1150	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1151	U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
1152	U32 maxDist, void const* src)
1153	{
1154	/ preemptive overflow correction:*
1155	* 1. correction is large enough:
1156	* lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
1157	* 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
1158	*
1159	* current - newCurrent
1160	* > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
1161	* > (3<<29) - (1<<chainLog)
1162	* > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
1163	* > 1<<29
1164	*
1165	* 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
1166	* After correction, current is less than (1<<chainLog + 1<<windowLog).
1167	* In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
1168	* In 32-bit mode we are safe, because (chainLog <= 29), so
1169	* ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
1170	* 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
1171	* windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
1172	*/
1173	U32 const cycleSize = `1u` << cycleLog;
1174	U32 const cycleMask = cycleSize - `1`;
1175	U32 const curr = (U32)((BYTE const*)src - window->base);
1176	U32 const currentCycle = curr & cycleMask;
1177	/ Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. /
1178	U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
1179	? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
1180	: `0`;
1181	U32 const newCurrent = currentCycle
1182	+ currentCycleCorrection
1183	+ MAX(maxDist, cycleSize);
1184	U32 const correction = curr - newCurrent;
1185	/ maxDist must be a power of two so that:*
1186	* (newCurrent & cycleMask) == (curr & cycleMask)
1187	* This is required to not corrupt the chains / binary tree.
1188	*/
1189	assert((maxDist & (maxDist - `1`)) == `0`);
1190	assert((curr & cycleMask) == (newCurrent & cycleMask));
1191	assert(curr > newCurrent);
1192	if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
1193	/ Loose bound, should be around 1<<29 (see above) /
1194	assert(correction > `1`<<`28`);
1195	}
1196
1197	window->base += correction;
1198	window->dictBase += correction;
1199	if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
1200	window->lowLimit = ZSTD_WINDOW_START_INDEX;
1201	} else {
1202	window->lowLimit -= correction;
1203	}
1204	if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
1205	window->dictLimit = ZSTD_WINDOW_START_INDEX;
1206	} else {
1207	window->dictLimit -= correction;
1208	}
1209
1210	/ Ensure we can still reference the full window. /
1211	assert(newCurrent >= maxDist);
1212	assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
1213	/ Ensure that lowLimit and dictLimit didn't underflow. /
1214	assert(window->lowLimit <= newCurrent);
1215	assert(window->dictLimit <= newCurrent);
1216
1217	++window->nbOverflowCorrections;
1218
1219	DEBUGLOG(`4`, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
1220	window->lowLimit);
1221	return correction;
1222	}
1223
1224	/*
1225	* ZSTD_window_enforceMaxDist():
1226	* Updates lowLimit so that:
1227	* (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
1228	*
1229	* It ensures index is valid as long as index >= lowLimit.
1230	* This must be called before a block compression call.
1231	*
1232	* loadedDictEnd is only defined if a dictionary is in use for current compression.
1233	* As the name implies, loadedDictEnd represents the index at end of dictionary.
1234	* The value lies within context's referential, it can be directly compared to blockEndIdx.
1235	*
1236	* If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
1237	* If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
1238	* This is because dictionaries are allowed to be referenced fully
1239	* as long as the last byte of the dictionary is in the window.
1240	* Once input has progressed beyond window size, dictionary cannot be referenced anymore.
1241	*
1242	* In normal dict mode, the dictionary lies between lowLimit and dictLimit.
1243	* In dictMatchState mode, lowLimit and dictLimit are the same,
1244	* and the dictionary is below them.
1245	* forceWindow and dictMatchState are therefore incompatible.
1246	*/
1247	MEM_STATIC void
1248	ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
1249	const void* blockEnd,
1250	U32 maxDist,
1251	U32* loadedDictEndPtr,
1252	const ZSTD_MatchState_t** dictMatchStatePtr)
1253	{
1254	U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1255	U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : `0`;
1256	DEBUGLOG(`5`, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1257	(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1258
1259	/ - When there is no dictionary : loadedDictEnd == 0.*
1260	In which case, the test (blockEndIdx > maxDist) is merely to avoid
1261	overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
1262	- When there is a standard dictionary :
1263	Index referential is copied from the dictionary,
1264	which means it starts from 0.
1265	In which case, loadedDictEnd == dictSize,
1266	and it makes sense to compare `blockEndIdx > maxDist + dictSize`
1267	since `blockEndIdx` also starts from zero.
1268	- When there is an attached dictionary :
1269	loadedDictEnd is expressed within the referential of the context,
1270	so it can be directly compared against blockEndIdx.
1271	*/
1272	if (blockEndIdx > maxDist + loadedDictEnd) {
1273	U32 const newLowLimit = blockEndIdx - maxDist;
1274	if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
1275	if (window->dictLimit < window->lowLimit) {
1276	DEBUGLOG(`5`, "Update dictLimit to match lowLimit, from %u to %u",
1277	(unsigned)window->dictLimit, (unsigned)window->lowLimit);
1278	window->dictLimit = window->lowLimit;
1279	}
1280	/ On reaching window size, dictionaries are invalidated /
1281	if (loadedDictEndPtr) *loadedDictEndPtr = `0`;
1282	if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
1283	}
1284	}
1285
1286	/ Similar to ZSTD_window_enforceMaxDist(),*
1287	* but only invalidates dictionary
1288	* when input progresses beyond window size.
1289	* assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
1290	* loadedDictEnd uses same referential as window->base
1291	* maxDist is the window size */
1292	MEM_STATIC void
1293	ZSTD_checkDictValidity(const ZSTD_window_t* window,
1294	const void* blockEnd,
1295	U32 maxDist,
1296	U32* loadedDictEndPtr,
1297	const ZSTD_MatchState_t** dictMatchStatePtr)
1298	{
1299	assert(loadedDictEndPtr != NULL);
1300	assert(dictMatchStatePtr != NULL);
1301	{ U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
1302	U32 const loadedDictEnd = *loadedDictEndPtr;
1303	DEBUGLOG(`5`, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
1304	(unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
1305	assert(blockEndIdx >= loadedDictEnd);
1306
1307	if (blockEndIdx > loadedDictEnd + maxDist \|\| loadedDictEnd != window->dictLimit) {
1308	/ On reaching window size, dictionaries are invalidated.*
1309	* For simplification, if window size is reached anywhere within next block,
1310	* the dictionary is invalidated for the full block.
1311	*
1312	* We also have to invalidate the dictionary if ZSTD_window_update() has detected
1313	* non-contiguous segments, which means that loadedDictEnd != window->dictLimit.
1314	* loadedDictEnd may be 0, if forceWindow is true, but in that case we never use
1315	* dictMatchState, so setting it to NULL is not a problem.
1316	*/
1317	DEBUGLOG(`6`, "invalidating dictionary for current block (distance > windowSize)");
1318	*loadedDictEndPtr = `0`;
1319	*dictMatchStatePtr = NULL;
1320	} else {
1321	if (*loadedDictEndPtr != `0`) {
1322	DEBUGLOG(`6`, "dictionary considered valid for current block");
1323	} } }
1324	}
1325
1326	MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
1327	ZSTD_memset(window, `0`, sizeof(*window));
1328	window->base = (BYTE const*)" ";
1329	window->dictBase = (BYTE const*)" ";
1330	ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); / Start above ZSTD_DUBT_UNSORTED_MARK /
1331	window->dictLimit = ZSTD_WINDOW_START_INDEX; / start from >0, so that 1st position is valid /
1332	window->lowLimit = ZSTD_WINDOW_START_INDEX; / it ensures first and later CCtx usages compress the same /
1333	window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; / see issue #1241 /
1334	window->nbOverflowCorrections = `0`;
1335	}
1336
1337	/*
1338	* ZSTD_window_update():
1339	* Updates the window by appending [src, src + srcSize) to the window.
1340	* If it is not contiguous, the current prefix becomes the extDict, and we
1341	* forget about the extDict. Handles overlap of the prefix and extDict.
1342	* Returns non-zero if the segment is contiguous.
1343	*/
1344	MEM_STATIC
1345	ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1346	U32 ZSTD_window_update(ZSTD_window_t* window,
1347	const void* src, size_t srcSize,
1348	int forceNonContiguous)
1349	{
1350	BYTE const* const ip = (BYTE const*)src;
1351	U32 contiguous = `1`;
1352	DEBUGLOG(`5`, "ZSTD_window_update");
1353	if (srcSize == `0`)
1354	return contiguous;
1355	assert(window->base != NULL);
1356	assert(window->dictBase != NULL);
1357	/ Check if blocks follow each other /
1358	if (src != window->nextSrc \|\| forceNonContiguous) {
1359	/ not contiguous /
1360	size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
1361	DEBUGLOG(`5`, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
1362	window->lowLimit = window->dictLimit;
1363	assert(distanceFromBase == (size_t)(U32)distanceFromBase); / should never overflow /
1364	window->dictLimit = (U32)distanceFromBase;
1365	window->dictBase = window->base;
1366	window->base = ip - distanceFromBase;
1367	/ ms->nextToUpdate = window->dictLimit; /
1368	if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; / too small extDict /
1369	contiguous = `0`;
1370	}
1371	window->nextSrc = ip + srcSize;
1372	/ if input and dictionary overlap : reduce dictionary (area presumed modified by input) /
1373	if ( (ip+srcSize > window->dictBase + window->lowLimit)
1374	& (ip < window->dictBase + window->dictLimit)) {
1375	size_t const highInputIdx = (size_t)((ip + srcSize) - window->dictBase);
1376	U32 const lowLimitMax = (highInputIdx > (size_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
1377	assert(highInputIdx < UINT_MAX);
1378	window->lowLimit = lowLimitMax;
1379	DEBUGLOG(`5`, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
1380	}
1381	return contiguous;
1382	}
1383
1384	/*
1385	* Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
1386	*/
1387	MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1388	{
1389	U32 const maxDistance = `1U` << windowLog;
1390	U32 const lowestValid = ms->window.lowLimit;
1391	U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1392	U32 const isDictionary = (ms->loadedDictEnd != `0`);
1393	/ When using a dictionary the entire dictionary is valid if a single byte of the dictionary*
1394	* is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
1395	* valid for the entire block. So this check is sufficient to find the lowest valid match index.
1396	*/
1397	U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1398	return matchLowest;
1399	}
1400
1401	/*
1402	* Returns the lowest allowed match index in the prefix.
1403	*/
1404	MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_MatchState_t* ms, U32 curr, unsigned windowLog)
1405	{
1406	U32 const maxDistance = `1U` << windowLog;
1407	U32 const lowestValid = ms->window.dictLimit;
1408	U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
1409	U32 const isDictionary = (ms->loadedDictEnd != `0`);
1410	/ When computing the lowest prefix index we need to take the dictionary into account to handle*
1411	* the edge case where the dictionary and the source are contiguous in memory.
1412	*/
1413	U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
1414	return matchLowest;
1415	}
1416
1417	/ index_safety_check:*
1418	* intentional underflow : ensure repIndex isn't overlapping dict + prefix
1419	* @return 1 if values are not overlapping,
1420	* 0 otherwise */
1421	MEM_STATIC int ZSTD_index_overlap_check(const U32 prefixLowestIndex, const U32 repIndex) {
1422	return ((U32)((prefixLowestIndex-`1`) - repIndex) >= `3`);
1423	}
1424
1425
1426	/ debug functions /
1427	#if (DEBUGLEVEL>=2)
1428
1429	MEM_STATIC double ZSTD_fWeight(U32 rawStat)
1430	{
1431	U32 const fp_accuracy = `8`;
1432	U32 const fp_multiplier = (`1` << fp_accuracy);
1433	U32 const newStat = rawStat + `1`;
1434	U32 const hb = ZSTD_highbit32(newStat);
1435	U32 const BWeight = hb * fp_multiplier;
1436	U32 const FWeight = (newStat << fp_accuracy) >> hb;
1437	U32 const weight = BWeight + FWeight;
1438	assert(hb + fp_accuracy < `31`);
1439	return (double)weight / fp_multiplier;
1440	}
1441
1442	/ display a table content,*
1443	* listing each element, its frequency, and its predicted bit cost */
1444	MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
1445	{
1446	unsigned u, sum;
1447	for (u=`0`, sum=`0`; u<=max; u++) sum += table[u];
1448	DEBUGLOG(`2`, "total nb elts: %u", sum);
1449	for (u=`0`; u<=max; u++) {
1450	DEBUGLOG(`2`, "%2u: %5u (%.2f)",
1451	u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
1452	}
1453	}
1454
1455	#endif
1456
1457	/ Short Cache /
1458
1459	/ Normally, zstd matchfinders follow this flow:*
1460	* 1. Compute hash at ip
1461	* 2. Load index from hashTable[hash]
1462	* 3. Check if ip == (base + index)
1463	* In dictionary compression, loading *(base + index) is often an L2 or even L3 miss.
1464	*
1465	* Short cache is an optimization which allows us to avoid step 3 most of the time
1466	* when the data doesn't actually match. With short cache, the flow becomes:
1467	* 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip.
1468	* 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works.
1469	* 3. Only if currentTag == matchTag, check ip == (base + index). Otherwise, continue.
1470	*
1471	* Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to
1472	* dictMatchState matchfinders.
1473	*/
1474	#define ZSTD_SHORT_CACHE_TAG_BITS 8
1475	#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1)
1476
1477	/ Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable.*
1478	* Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */
1479	MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) {
1480	size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS;
1481	U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK);
1482	assert(index >> (`32` - ZSTD_SHORT_CACHE_TAG_BITS) == `0`);
1483	hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) \| tag;
1484	}
1485
1486	/ Helper function for short cache matchfinders.*
1487	* Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */
1488	MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) {
1489	U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK;
1490	U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK;
1491	return tag1 == tag2;
1492	}
1493
1494	/ ===============================================================*
1495	* Shared internal declarations
1496	* These prototypes may be called from sources not in lib/compress
1497	* =============================================================== */
1498
1499	/ ZSTD_loadCEntropy() :*
1500	* dict : must point at beginning of a valid zstd dictionary.
1501	* return : size of dictionary header (size of magic number + dict ID + entropy tables)
1502	* assumptions : magic number supposed already checked
1503	* and dictSize >= 8 */
1504	size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
1505	const void* const dict, size_t dictSize);
1506
1507	void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
1508
1509	typedef struct {
1510	U32 idx; / Index in array of ZSTD_Sequence /
1511	U32 posInSequence; / Position within sequence at idx /
1512	size_t posInSrc; / Number of bytes given by sequences provided so far /
1513	} ZSTD_SequencePosition;
1514
1515	/ for benchmark /
1516	size_t ZSTD_convertBlockSequences(ZSTD_CCtx* cctx,
1517	const ZSTD_Sequence* const inSeqs, size_t nbSequences,
1518	int const repcodeResolution);
1519
1520	typedef struct {
1521	size_t nbSequences;
1522	size_t blockSize;
1523	size_t litSize;
1524	} BlockSummary;
1525
1526	BlockSummary ZSTD_get1BlockSummary(const ZSTD_Sequence* seqs, size_t nbSeqs);
1527
1528	/ ==============================================================*
1529	* Private declarations
1530	* These prototypes shall only be called from within lib/compress
1531	* ============================================================== */
1532
1533	/ ZSTD_getCParamsFromCCtxParams() :*
1534	* cParams are built depending on compressionLevel, src size hints,
1535	* LDM and manually set compression parameters.
1536	* Note: srcSizeHint == 0 means 0!
1537	*/
1538	ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1539	const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_CParamMode_e mode);
1540
1541	/! ZSTD_initCStream_internal() :*
1542	* Private use only. Init streaming operation.
1543	* expects params to be valid.
1544	* must receive dict, or cdict, or none, but not both.
1545	* @return : 0, or an error code */
1546	size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
1547	const void* dict, size_t dictSize,
1548	const ZSTD_CDict* cdict,
1549	const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
1550
1551	void ZSTD_resetSeqStore(SeqStore_t* ssPtr);
1552
1553	/! ZSTD_getCParamsFromCDict() :*
1554	* as the name implies */
1555	ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
1556
1557	/ ZSTD_compressBegin_advanced_internal() :*
1558	* Private use only. To be called from zstdmt_compress.c. */
1559	size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
1560	const void* dict, size_t dictSize,
1561	ZSTD_dictContentType_e dictContentType,
1562	ZSTD_dictTableLoadMethod_e dtlm,
1563	const ZSTD_CDict* cdict,
1564	const ZSTD_CCtx_params* params,
1565	unsigned long long pledgedSrcSize);
1566
1567	/ ZSTD_compress_advanced_internal() :*
1568	* Private use only. To be called from zstdmt_compress.c. */
1569	size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
1570	void* dst, size_t dstCapacity,
1571	const void* src, size_t srcSize,
1572	const void* dict,size_t dictSize,
1573	const ZSTD_CCtx_params* params);
1574
1575
1576	/ ZSTD_writeLastEmptyBlock() :*
1577	* output an empty Block with end-of-frame mark to complete a frame
1578	* @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
1579	* or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
1580	*/
1581	size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
1582
1583
1584	/ ZSTD_referenceExternalSequences() :*
1585	* Must be called before starting a compression operation.
1586	* seqs must parse a prefix of the source.
1587	* This cannot be used when long range matching is enabled.
1588	* Zstd will use these sequences, and pass the literals to a secondary block
1589	* compressor.
1590	* NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
1591	* access and data corruption.
1592	*/
1593	void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
1594
1595	/ ZSTD_cycleLog() :*
1596	* condition for correct operation : hashLog > 1 */
1597	U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
1598
1599	/ ZSTD_CCtx_trace() :*
1600	* Trace the end of a compression call.
1601	*/
1602	void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
1603
1604	/ Returns 1 if an external sequence producer is registered, otherwise returns 0. /
1605	MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
1606	return params->extSeqProdFunc != NULL;
1607	}
1608
1609	/ ===============================================================*
1610	* Deprecated definitions that are still used internally to avoid
1611	* deprecation warnings. These functions are exactly equivalent to
1612	* their public variants, but avoid the deprecation warnings.
1613	* =============================================================== */
1614
1615	size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
1616
1617	size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx,
1618	void* dst, size_t dstCapacity,
1619	const void* src, size_t srcSize);
1620
1621	size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx,
1622	void* dst, size_t dstCapacity,
1623	const void* src, size_t srcSize);
1624
1625	size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1626
1627
1628	#endif /* ZSTD_COMPRESS_H */
1629

source code of linux/lib/zstd/compress/zstd_compress_internal.h