1 | /* |
2 | * Copyright (c) Yann Collet, Facebook, Inc. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | #ifndef ZSTD_CCOMMON_H_MODULE |
12 | #define ZSTD_CCOMMON_H_MODULE |
13 | |
14 | /* this module contains definitions which must be identical |
15 | * across compression, decompression and dictBuilder. |
16 | * It also contains a few functions useful to at least 2 of them |
17 | * and which benefit from being inlined */ |
18 | |
19 | /*-************************************* |
20 | * Dependencies |
21 | ***************************************/ |
22 | #include "compiler.h" |
23 | #include "cpu.h" |
24 | #include "mem.h" |
25 | #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ |
26 | #include "error_private.h" |
27 | #define ZSTD_STATIC_LINKING_ONLY |
28 | #include <linux/zstd.h> |
29 | #define FSE_STATIC_LINKING_ONLY |
30 | #include "fse.h" |
31 | #define HUF_STATIC_LINKING_ONLY |
32 | #include "huf.h" |
33 | #include <linux/xxhash.h> /* XXH_reset, update, digest */ |
34 | #define ZSTD_TRACE 0 |
35 | |
36 | |
37 | /* ---- static assert (debug) --- */ |
38 | #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) |
39 | #define ZSTD_isError ERR_isError /* for inlining */ |
40 | #define FSE_isError ERR_isError |
41 | #define HUF_isError ERR_isError |
42 | |
43 | |
44 | /*-************************************* |
45 | * shared macros |
46 | ***************************************/ |
47 | #undef MIN |
48 | #undef MAX |
49 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) |
50 | #define MAX(a,b) ((a)>(b) ? (a) : (b)) |
51 | #define BOUNDED(min,val,max) (MAX(min,MIN(val,max))) |
52 | |
53 | |
54 | /*-************************************* |
55 | * Common constants |
56 | ***************************************/ |
57 | #define ZSTD_OPT_NUM (1<<12) |
58 | |
59 | #define ZSTD_REP_NUM 3 /* number of repcodes */ |
60 | static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; |
61 | |
62 | #define KB *(1 <<10) |
63 | #define MB *(1 <<20) |
64 | #define GB *(1U<<30) |
65 | |
66 | #define BIT7 128 |
67 | #define BIT6 64 |
68 | #define BIT5 32 |
69 | #define BIT4 16 |
70 | #define BIT1 2 |
71 | #define BIT0 1 |
72 | |
73 | #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 |
74 | static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; |
75 | static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; |
76 | |
77 | #define ZSTD_FRAMEIDSIZE 4 /* magic number size */ |
78 | |
79 | #define 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ |
80 | static UNUSED_ATTR const size_t = ZSTD_BLOCKHEADERSIZE; |
81 | typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; |
82 | |
83 | #define ZSTD_FRAMECHECKSUMSIZE 4 |
84 | |
85 | #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ |
86 | #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ |
87 | |
88 | #define HufLog 12 |
89 | typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; |
90 | |
91 | #define LONGNBSEQ 0x7F00 |
92 | |
93 | #define MINMATCH 3 |
94 | |
95 | #define Litbits 8 |
96 | #define MaxLit ((1<<Litbits) - 1) |
97 | #define MaxML 52 |
98 | #define MaxLL 35 |
99 | #define DefaultMaxOff 28 |
100 | #define MaxOff 31 |
101 | #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ |
102 | #define MLFSELog 9 |
103 | #define LLFSELog 9 |
104 | #define OffFSELog 8 |
105 | #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog) |
106 | |
107 | #define 128 /* header + <= 127 byte tree description */ |
108 | /* Each table cannot take more than #symbols * FSELog bits */ |
109 | #define (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8) |
110 | |
111 | static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = { |
112 | 0, 0, 0, 0, 0, 0, 0, 0, |
113 | 0, 0, 0, 0, 0, 0, 0, 0, |
114 | 1, 1, 1, 1, 2, 2, 3, 3, |
115 | 4, 6, 7, 8, 9,10,11,12, |
116 | 13,14,15,16 |
117 | }; |
118 | static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = { |
119 | 4, 3, 2, 2, 2, 2, 2, 2, |
120 | 2, 2, 2, 2, 2, 1, 1, 1, |
121 | 2, 2, 2, 2, 2, 2, 2, 2, |
122 | 2, 3, 2, 1, 1, 1, 1, 1, |
123 | -1,-1,-1,-1 |
124 | }; |
125 | #define LL_DEFAULTNORMLOG 6 /* for static allocation */ |
126 | static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; |
127 | |
128 | static UNUSED_ATTR const U8 ML_bits[MaxML+1] = { |
129 | 0, 0, 0, 0, 0, 0, 0, 0, |
130 | 0, 0, 0, 0, 0, 0, 0, 0, |
131 | 0, 0, 0, 0, 0, 0, 0, 0, |
132 | 0, 0, 0, 0, 0, 0, 0, 0, |
133 | 1, 1, 1, 1, 2, 2, 3, 3, |
134 | 4, 4, 5, 7, 8, 9,10,11, |
135 | 12,13,14,15,16 |
136 | }; |
137 | static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = { |
138 | 1, 4, 3, 2, 2, 2, 2, 2, |
139 | 2, 1, 1, 1, 1, 1, 1, 1, |
140 | 1, 1, 1, 1, 1, 1, 1, 1, |
141 | 1, 1, 1, 1, 1, 1, 1, 1, |
142 | 1, 1, 1, 1, 1, 1, 1, 1, |
143 | 1, 1, 1, 1, 1, 1,-1,-1, |
144 | -1,-1,-1,-1,-1 |
145 | }; |
146 | #define ML_DEFAULTNORMLOG 6 /* for static allocation */ |
147 | static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; |
148 | |
149 | static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = { |
150 | 1, 1, 1, 1, 1, 1, 2, 2, |
151 | 2, 1, 1, 1, 1, 1, 1, 1, |
152 | 1, 1, 1, 1, 1, 1, 1, 1, |
153 | -1,-1,-1,-1,-1 |
154 | }; |
155 | #define OF_DEFAULTNORMLOG 5 /* for static allocation */ |
156 | static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; |
157 | |
158 | |
159 | /*-******************************************* |
160 | * Shared functions to include for inlining |
161 | *********************************************/ |
162 | static void ZSTD_copy8(void* dst, const void* src) { |
163 | #if defined(ZSTD_ARCH_ARM_NEON) |
164 | vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); |
165 | #else |
166 | ZSTD_memcpy(dst, src, 8); |
167 | #endif |
168 | } |
169 | #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } |
170 | |
171 | /* Need to use memmove here since the literal buffer can now be located within |
172 | the dst buffer. In circumstances where the op "catches up" to where the |
173 | literal buffer is, there can be partial overlaps in this call on the final |
174 | copy if the literal is being shifted by less than 16 bytes. */ |
175 | static void ZSTD_copy16(void* dst, const void* src) { |
176 | #if defined(ZSTD_ARCH_ARM_NEON) |
177 | vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); |
178 | #elif defined(ZSTD_ARCH_X86_SSE2) |
179 | _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src)); |
180 | #elif defined(__clang__) |
181 | ZSTD_memmove(dst, src, 16); |
182 | #else |
183 | /* ZSTD_memmove is not inlined properly by gcc */ |
184 | BYTE copy16_buf[16]; |
185 | ZSTD_memcpy(copy16_buf, src, 16); |
186 | ZSTD_memcpy(dst, copy16_buf, 16); |
187 | #endif |
188 | } |
189 | #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } |
190 | |
191 | #define WILDCOPY_OVERLENGTH 32 |
192 | #define WILDCOPY_VECLEN 16 |
193 | |
194 | typedef enum { |
195 | ZSTD_no_overlap, |
196 | ZSTD_overlap_src_before_dst |
197 | /* ZSTD_overlap_dst_before_src, */ |
198 | } ZSTD_overlap_e; |
199 | |
200 | /*! ZSTD_wildcopy() : |
201 | * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) |
202 | * @param ovtype controls the overlap detection |
203 | * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. |
204 | * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. |
205 | * The src buffer must be before the dst buffer. |
206 | */ |
207 | MEM_STATIC FORCE_INLINE_ATTR |
208 | void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) |
209 | { |
210 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; |
211 | const BYTE* ip = (const BYTE*)src; |
212 | BYTE* op = (BYTE*)dst; |
213 | BYTE* const oend = op + length; |
214 | |
215 | if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { |
216 | /* Handle short offset copies. */ |
217 | do { |
218 | COPY8(op, ip) |
219 | } while (op < oend); |
220 | } else { |
221 | assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); |
222 | /* Separate out the first COPY16() call because the copy length is |
223 | * almost certain to be short, so the branches have different |
224 | * probabilities. Since it is almost certain to be short, only do |
225 | * one COPY16() in the first call. Then, do two calls per loop since |
226 | * at that point it is more likely to have a high trip count. |
227 | */ |
228 | #ifdef __aarch64__ |
229 | do { |
230 | COPY16(op, ip); |
231 | } |
232 | while (op < oend); |
233 | #else |
234 | ZSTD_copy16(dst: op, src: ip); |
235 | if (16 >= length) return; |
236 | op += 16; |
237 | ip += 16; |
238 | do { |
239 | COPY16(op, ip); |
240 | COPY16(op, ip); |
241 | } |
242 | while (op < oend); |
243 | #endif |
244 | } |
245 | } |
246 | |
247 | MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
248 | { |
249 | size_t const length = MIN(dstCapacity, srcSize); |
250 | if (length > 0) { |
251 | ZSTD_memcpy(dst, src, length); |
252 | } |
253 | return length; |
254 | } |
255 | |
256 | /* define "workspace is too large" as this number of times larger than needed */ |
257 | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 |
258 | |
259 | /* when workspace is continuously too large |
260 | * during at least this number of times, |
261 | * context's memory usage is considered wasteful, |
262 | * because it's sized to handle a worst case scenario which rarely happens. |
263 | * In which case, resize it down to free some memory */ |
264 | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 |
265 | |
266 | /* Controls whether the input/output buffer is buffered or stable. */ |
267 | typedef enum { |
268 | ZSTD_bm_buffered = 0, /* Buffer the input/output */ |
269 | ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ |
270 | } ZSTD_bufferMode_e; |
271 | |
272 | |
273 | /*-******************************************* |
274 | * Private declarations |
275 | *********************************************/ |
276 | typedef struct seqDef_s { |
277 | U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ |
278 | U16 litLength; |
279 | U16 mlBase; /* mlBase == matchLength - MINMATCH */ |
280 | } seqDef; |
281 | |
282 | /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ |
283 | typedef enum { |
284 | ZSTD_llt_none = 0, /* no longLengthType */ |
285 | ZSTD_llt_literalLength = 1, /* represents a long literal */ |
286 | ZSTD_llt_matchLength = 2 /* represents a long match */ |
287 | } ZSTD_longLengthType_e; |
288 | |
289 | typedef struct { |
290 | seqDef* sequencesStart; |
291 | seqDef* sequences; /* ptr to end of sequences */ |
292 | BYTE* litStart; |
293 | BYTE* lit; /* ptr to end of literals */ |
294 | BYTE* llCode; |
295 | BYTE* mlCode; |
296 | BYTE* ofCode; |
297 | size_t maxNbSeq; |
298 | size_t maxNbLit; |
299 | |
300 | /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength |
301 | * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment |
302 | * the existing value of the litLength or matchLength by 0x10000. |
303 | */ |
304 | ZSTD_longLengthType_e longLengthType; |
305 | U32 longLengthPos; /* Index of the sequence to apply long length modification to */ |
306 | } seqStore_t; |
307 | |
308 | typedef struct { |
309 | U32 litLength; |
310 | U32 matchLength; |
311 | } ZSTD_sequenceLength; |
312 | |
313 | /* |
314 | * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences |
315 | * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. |
316 | */ |
317 | MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) |
318 | { |
319 | ZSTD_sequenceLength seqLen; |
320 | seqLen.litLength = seq->litLength; |
321 | seqLen.matchLength = seq->mlBase + MINMATCH; |
322 | if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { |
323 | if (seqStore->longLengthType == ZSTD_llt_literalLength) { |
324 | seqLen.litLength += 0xFFFF; |
325 | } |
326 | if (seqStore->longLengthType == ZSTD_llt_matchLength) { |
327 | seqLen.matchLength += 0xFFFF; |
328 | } |
329 | } |
330 | return seqLen; |
331 | } |
332 | |
333 | /* |
334 | * Contains the compressed frame size and an upper-bound for the decompressed frame size. |
335 | * Note: before using `compressedSize`, check for errors using ZSTD_isError(). |
336 | * similarly, before using `decompressedBound`, check for errors using: |
337 | * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` |
338 | */ |
339 | typedef struct { |
340 | size_t compressedSize; |
341 | unsigned long long decompressedBound; |
342 | } ZSTD_frameSizeInfo; /* decompress & legacy */ |
343 | |
344 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ |
345 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ |
346 | |
347 | /* custom memory allocation functions */ |
348 | void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); |
349 | void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); |
350 | void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); |
351 | |
352 | |
353 | MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ |
354 | { |
355 | assert(val != 0); |
356 | { |
357 | # if (__GNUC__ >= 3) /* GCC Intrinsic */ |
358 | return __builtin_clz (val) ^ 31; |
359 | # else /* Software version */ |
360 | static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; |
361 | U32 v = val; |
362 | v |= v >> 1; |
363 | v |= v >> 2; |
364 | v |= v >> 4; |
365 | v |= v >> 8; |
366 | v |= v >> 16; |
367 | return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; |
368 | # endif |
369 | } |
370 | } |
371 | |
372 | /* |
373 | * Counts the number of trailing zeros of a `size_t`. |
374 | * Most compilers should support CTZ as a builtin. A backup |
375 | * implementation is provided if the builtin isn't supported, but |
376 | * it may not be terribly efficient. |
377 | */ |
378 | MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val) |
379 | { |
380 | if (MEM_64bits()) { |
381 | # if (__GNUC__ >= 4) |
382 | return __builtin_ctzll((U64)val); |
383 | # else |
384 | static const int DeBruijnBytePos[64] = { 0, 1, 2, 7, 3, 13, 8, 19, |
385 | 4, 25, 14, 28, 9, 34, 20, 56, |
386 | 5, 17, 26, 54, 15, 41, 29, 43, |
387 | 10, 31, 38, 35, 21, 45, 49, 57, |
388 | 63, 6, 12, 18, 24, 27, 33, 55, |
389 | 16, 53, 40, 42, 30, 37, 44, 48, |
390 | 62, 11, 23, 32, 52, 39, 36, 47, |
391 | 61, 22, 51, 46, 60, 50, 59, 58 }; |
392 | return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; |
393 | # endif |
394 | } else { /* 32 bits */ |
395 | # if (__GNUC__ >= 3) |
396 | return __builtin_ctz((U32)val); |
397 | # else |
398 | static const int DeBruijnBytePos[32] = { 0, 1, 28, 2, 29, 14, 24, 3, |
399 | 30, 22, 20, 15, 25, 17, 4, 8, |
400 | 31, 27, 13, 23, 21, 19, 16, 7, |
401 | 26, 12, 18, 6, 11, 5, 10, 9 }; |
402 | return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; |
403 | # endif |
404 | } |
405 | } |
406 | |
407 | |
408 | /* ZSTD_invalidateRepCodes() : |
409 | * ensures next compression will not use repcodes from previous block. |
410 | * Note : only works with regular variant; |
411 | * do not use with extDict variant ! */ |
412 | void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ |
413 | |
414 | |
415 | typedef struct { |
416 | blockType_e blockType; |
417 | U32 lastBlock; |
418 | U32 origSize; |
419 | } blockProperties_t; /* declared here for decompress and fullbench */ |
420 | |
421 | /*! ZSTD_getcBlockSize() : |
422 | * Provides the size of compressed block from block header `src` */ |
423 | /* Used by: decompress, fullbench (does not get its definition from here) */ |
424 | size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, |
425 | blockProperties_t* bpPtr); |
426 | |
427 | /*! ZSTD_decodeSeqHeaders() : |
428 | * decode sequence header from src */ |
429 | /* Used by: decompress, fullbench (does not get its definition from here) */ |
430 | size_t (ZSTD_DCtx* dctx, int* nbSeqPtr, |
431 | const void* src, size_t srcSize); |
432 | |
433 | /* |
434 | * @returns true iff the CPU supports dynamic BMI2 dispatch. |
435 | */ |
436 | MEM_STATIC int ZSTD_cpuSupportsBmi2(void) |
437 | { |
438 | ZSTD_cpuid_t cpuid = ZSTD_cpuid(); |
439 | return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); |
440 | } |
441 | |
442 | |
443 | #endif /* ZSTD_CCOMMON_H_MODULE */ |
444 | |